Wikimedia Security Team/SVG filter changes
If you're changing the svg filter, sometimes you want to make sure you didn't accidentally blacklist something that's used everywhere.
Here is a script you can use to check recent uploads. This requires a separate unrelated MW install, and a connection to a db containing the image table (I used the one at tool labs for this). Its a tad hacky, but hey it works.
<?php
/**
* Maintenance script to check if recent uploads still pass filters.
*
* This is meant to check if changes to filter will cause problems.
* The idea being that one would run this script on a modified version
* of mediawiki with the filter changes, on somewhere like tool labs,
* and it will download recent uploads from commons and verify they'd still
* work.
*/
require_once __DIR__ . "/Maintenance.php";
class CheckSVGs extends Maintenance {
private $lastFileDate;
private $lastFileName;
private $upload;
private $foreignDB;
public function __construct() {
parent::__construct();
$this->addDescription( "Check old svgs to see if they are still valid" );
$this->addOption( 'until', 'Date to stop checking', false, true );
$this->addOption( 'basepath', 'Path for remote files', false, true );
$this->addOption( 'fdbserver', 'Foreign DB server', true, true );
$this->addOption( 'fdbuser', 'Foreign DB user', true, true );
$this->addOption( 'fdbpassword', 'Foreign DB pass', true, true );
$this->addOption( 'fdbname', 'Foreign db name', true, true );
$this->addOption( 'fdbtype', 'Foreign db type (default mysql)', false, true );
$this->addOption( 'max-size', 'Max size to check (in bytes)', false, true );
$this->setBatchSize( 500 );
}
/**
* This is meant to connect to a tool labs db.
*/
private function getDBConnection() {
$type = $this->getOption( 'fdbtype', 'mysql' );
$params = [
'host' => $this->getOption( 'fdbserver' ),
'user' => $this->getOption( 'fdbuser' ),
// FIXME, really shouldn't be passed on command line.
'password' => $this->getOption( 'fdbpassword' ),
'dbname' => $this->getOption( 'fdbname' ),
'foreign' => true,
];
$db = Database::factory( $type, $params );
if ( !$db ) {
$this->error( "Could not get db" );
exit(1);
}
return $db;
}
public function execute() {
$this->db = $this->getDBConnection();
$this->upload = new UploadDummy;
$tot = 0;
$bad = 0;
while ( $candidates = $this->getCandidates() ) {
foreach( $candidates as $candidate ) {
$file = $this->getFile( $candidate );
$res = $this->checkFile( $file );
$file = '';
if ( $res !== true ) {
$bad++;
echo "Error ($res): $candidate\n";
}
$tot++;
}
echo "\tDone batch ($tot) - at: " . $candidates[count($candidates)-1] . "\n";
}
echo "Complete: $tot total; $bad bad\n";
}
private function checkFile( $file ) {
return $this->upload->checkFile( $file );
}
private function getFile( $filename ) {
$url = $this->getUrlForFilename( $filename );
$svg = Http::get( $url, [ 'userAgent' => 'SVG validation script. https://www.mediawiki.org/w/index.php?title=Wikimedia_Security_Team/SVG_filter_changes' ] );
if ( $svg === false ) {
throw new Exception( "Could not download file - $url" );
}
return $svg;
}
private function getUrlForFilename( $filename ) {
// e.g. https://upload.wikimedia.org/wikipedia/commons/
$basepath = $this->getOption( 'basepath', 'https://upload.wikimedia.org/wikipedia/commons/' );
$md5 = md5( $filename );
$basepath .= substr( $md5, 0, 1 ) . '/' . substr( $md5, 0, 2 ) . '/';
$basepath .= rawurlencode( $filename );
return $basepath;
}
private function getCandidates() {
$conds = [
'img_size < ' . ( (int)$this->getOption( 'max-size', 1024*1024*10 ) ),
'img_major_mime' => 'image',
'img_minor_mime' => 'svg+xml',
'img_media_type' => 'DRAWING'
];
if ( $this->lastFileDate ) {
$conds[] = 'img_timestamp < ' . $this->db->addQuotes( $this->lastFileDate );
// FIXME, would be better if handling condition where a lot of images have same timestamp.
}
$until = $this->getOption( 'until' );
if ( $until ) {
$conds[] =
'img_timestamp > ' . $this->db->addQuotes( $until );
}
$res = $this->db->select(
'image',
[ 'img_name', 'img_timestamp' ],
$conds,
__METHOD__,
[
'ORDER BY' => 'img_timestamp desc, img_name asc',
'LIMIT' => $this->getOption( 'batch-size' )
]
);
$actualResults = [];
foreach ( $res as $row ) {
$actualResults[] = $row->img_name;
$this->lastFileName = $row->img_name;
$this->lastFileDate = $row->img_timestamp;
}
return $actualResults;
}
}
// Make sure UploadBase can be found.
require_once __DIR__ . "/../includes/AutoLoader.php";
class UploadDummy extends UploadBase {
public function initializeFromRequest( &$request ) {}
/**
* @param String Full contents of svg file
*
* @return true if ok, or string for error code
*/
public function checkFile( $file ) {
$this->mSVGNSError = false;
$check = new XmlTypeCheck(
$file,
[ $this, 'checkSvgScriptCallback' ],
false, /* string not filename */
[ 'processing_instruction_handler' => 'UploadBase::checkSvgPICallback' ]
);
if ( $check->wellFormed !== true ) {
return 'uploadinvalidxml';
} elseif ( $check->filterMatch ) {
if ( $this->mSVGNSError ) {
return $this->mSVGNSError;
}
return $check->filterMatchType;
}
return true;
}
}
$maintClass = "CheckSVGs";
require_once RUN_MAINTENANCE_IF_MAIN;