| Index: trunk/phase3/maintenance/storage/recompressTracked.php |
| — | — | @@ -4,9 +4,13 @@ |
| 5 | 5 | require( dirname( __FILE__ ) .'/../commandLine.inc' ); |
| 6 | 6 | |
| 7 | 7 | if ( count( $args ) < 1 ) { |
| 8 | | - echo "Usage: php recompressTracked.php <cluster> [... <cluster>...]\n"; |
| 9 | | - echo "Moves blobs indexed by trackBlobs.php to a specified list of destination |
| 10 | | -clusters, and recompresses them in the process. Restartable.\n"; |
| | 8 | + echo "Usage: php recompressTracked.php [options] <cluster> [... <cluster>...] |
| | 9 | +Moves blobs indexed by trackBlobs.php to a specified list of destination clusters, and recompresses them in the process. Restartable. |
| | 10 | + |
| | 11 | +Options: |
| | 12 | + --procs <procs> Set the number of child processes (default 8) |
| | 13 | + --copy-only Copy only, do not update the text table. Restart without this option to complete. |
| | 14 | +"; |
| 11 | 15 | exit( 1 ); |
| 12 | 16 | } |
| 13 | 17 | |
| — | — | @@ -18,17 +22,16 @@ |
| 19 | 23 | var $batchSize = 1000; |
| 20 | 24 | var $reportingInterval = 10; |
| 21 | 25 | var $numProcs = 8; |
| | 26 | + var $useDiff, $pageBlobClass, $orphanBlobClass; |
| 22 | 27 | var $slavePipes, $slaveProcs, $prevSlaveId; |
| 23 | | - var $blobClass = 'DiffHistoryBlob'; |
| 24 | 28 | var $copyOnly = false; |
| 25 | 29 | var $isChild = false; |
| 26 | 30 | var $slaveId = false; |
| 27 | 31 | var $store; |
| 28 | 32 | |
| 29 | | - static $optionsWithArgs = array( 'procs', 'class' ); |
| | 33 | + static $optionsWithArgs = array( 'procs', 'slave-id' ); |
| 30 | 34 | static $cmdLineOptionMap = array( |
| 31 | 35 | 'procs' => 'numProcs', |
| 32 | | - 'class' => 'blobClass', |
| 33 | 36 | 'copy-only' => 'copyOnly', |
| 34 | 37 | 'child' => 'isChild', |
| 35 | 38 | 'slave-id' => 'slaveId', |
| — | — | @@ -53,14 +56,18 @@ |
| 54 | 57 | $this->$name = $value; |
| 55 | 58 | } |
| 56 | 59 | $this->store = new ExternalStoreDB; |
| | 60 | + if ( !$this->isChild ) { |
| | 61 | + $GLOBALS['wgDebugLogPrefix'] = "RCT M: "; |
| | 62 | + } elseif ( $this->slaveId !== false ) { |
| | 63 | + $GLOBALS['wgDebugLogPrefix'] = "RCT {$this->slaveId}: "; |
| | 64 | + } |
| | 65 | + $this->useDiff = function_exists( 'xdiff_string_bdiff' ); |
| | 66 | + $this->pageBlobClass = $this->useDiff ? 'DiffHistoryBlob' : 'ConcatenatedGzipHistoryBlob'; |
| | 67 | + $this->orphanBlobClass = 'ConcatenatedGzipHistoryBlob'; |
| 57 | 68 | } |
| 58 | 69 | |
| 59 | 70 | function debug( $msg ) { |
| 60 | | - if ( $this->slaveId !== false ) { |
| 61 | | - $msg = "{$this->slaveId}: $msg"; |
| 62 | | - } |
| 63 | | - $msg .= "\n"; |
| 64 | | - wfDebug( $msg ); |
| | 71 | + wfDebug( "$msg\n" ); |
| 65 | 72 | } |
| 66 | 73 | |
| 67 | 74 | /** |
| — | — | @@ -146,7 +153,7 @@ |
| 147 | 154 | array( 'file', '/dev/stderr', 'w' ) |
| 148 | 155 | ); |
| 149 | 156 | wfSuppressWarnings(); |
| 150 | | - $proc = proc_open( $cmd, $spec, $pipes ); |
| | 157 | + $proc = proc_open( "$cmd --slave-id $i", $spec, $pipes ); |
| 151 | 158 | wfRestoreWarnings(); |
| 152 | 159 | if ( !$proc ) { |
| 153 | 160 | echo "Error opening slave process\n"; |
| — | — | @@ -299,6 +306,7 @@ |
| 300 | 307 | * Main entry point for worker processes |
| 301 | 308 | */ |
| 302 | 309 | function executeChild() { |
| | 310 | + $this->debug( 'starting' ); |
| 303 | 311 | $this->syncDBs(); |
| 304 | 312 | |
| 305 | 313 | while ( !feof( STDIN ) ) { |
| — | — | @@ -306,6 +314,7 @@ |
| 307 | 315 | if ( $line == '' ) { |
| 308 | 316 | continue; |
| 309 | 317 | } |
| | 318 | + $this->debug( $line ); |
| 310 | 319 | $args = explode( ' ', $line ); |
| 311 | 320 | $cmd = array_shift( $args ); |
| 312 | 321 | switch ( $cmd ) { |
| — | — | @@ -325,15 +334,21 @@ |
| 326 | 335 | * Move tracked text in a given page |
| 327 | 336 | */ |
| 328 | 337 | function doPage( $pageId ) { |
| | 338 | + $title = Title::newFromId( $pageId ); |
| | 339 | + if ( $title ) { |
| | 340 | + $titleText = $title->getPrefixedText(); |
| | 341 | + } else { |
| | 342 | + $titleText = '[deleted]'; |
| | 343 | + } |
| 329 | 344 | $dbr = wfGetDB( DB_SLAVE ); |
| 330 | 345 | |
| 331 | 346 | // Finish any incomplete transactions |
| 332 | 347 | if ( !$this->copyOnly ) { |
| 333 | | - $this->finishIncompleteMoves(); |
| | 348 | + $this->finishIncompleteMoves( array( 'bt_page' => $pageId ) ); |
| 334 | 349 | } |
| 335 | 350 | |
| 336 | 351 | $startId = 0; |
| 337 | | - $trx = new CgzCopyTransaction( $this ); |
| | 352 | + $trx = new CgzCopyTransaction( $this, $this->pageBlobClass ); |
| 338 | 353 | |
| 339 | 354 | while ( true ) { |
| 340 | 355 | $res = $dbr->select( |
| — | — | @@ -343,7 +358,7 @@ |
| 344 | 359 | 'bt_page' => $pageId, |
| 345 | 360 | 'bt_text_id > ' . $dbr->addQuotes( $startId ), |
| 346 | 361 | 'bt_moved' => 0, |
| 347 | | - 'bt_new_url' => '', |
| | 362 | + 'bt_new_url IS NULL', |
| 348 | 363 | 'bt_text_id=old_id', |
| 349 | 364 | ), |
| 350 | 365 | __METHOD__, |
| — | — | @@ -372,12 +387,15 @@ |
| 373 | 388 | |
| 374 | 389 | // Queue it |
| 375 | 390 | if ( !$trx->addItem( $text, $row->bt_text_id ) ) { |
| | 391 | + $this->debug( "$titleText: committing blob with " . $trx->getSize() . " items" ); |
| 376 | 392 | $trx->commit(); |
| 377 | | - $trx = new CgzCopyTransaction( $this ); |
| | 393 | + $trx = new CgzCopyTransaction( $this, $this->pageBlobClass ); |
| 378 | 394 | } |
| 379 | 395 | } |
| 380 | 396 | $startId = $row->bt_text_id; |
| 381 | 397 | } |
| | 398 | + |
| | 399 | + $this->debug( "$titleText: committing blob with " . $trx->getSize() . " items" ); |
| 382 | 400 | $trx->commit(); |
| 383 | 401 | } |
| 384 | 402 | |
| — | — | @@ -420,18 +438,18 @@ |
| 421 | 439 | * This function completes any moves that only have done bt_new_url. This |
| 422 | 440 | * can happen when the script is interrupted, or when --copy-only is used. |
| 423 | 441 | */ |
| 424 | | - function finishIncompleteMoves() { |
| | 442 | + function finishIncompleteMoves( $conds ) { |
| 425 | 443 | $dbr = wfGetDB( DB_SLAVE ); |
| 426 | 444 | |
| 427 | 445 | $startId = 0; |
| | 446 | + $conds = array_merge( $conds, array( |
| | 447 | + 'bt_moved' => 0, |
| | 448 | + 'bt_new_url IS NOT NULL' |
| | 449 | + )); |
| 428 | 450 | while ( true ) { |
| 429 | 451 | $res = $dbr->select( 'blob_tracking', |
| 430 | 452 | '*', |
| 431 | | - array( |
| 432 | | - 'bt_text_id > ' . $dbr->addQuotes( $startId ), |
| 433 | | - 'bt_moved' => 0, |
| 434 | | - "bt_new_url <> ''", |
| 435 | | - ), |
| | 453 | + array_merge( $conds, array( 'bt_text_id > ' . $dbr->addQuotes( $startId ) ) ), |
| 436 | 454 | __METHOD__, |
| 437 | 455 | array( |
| 438 | 456 | 'ORDER BY' => 'bt_text_id', |
| — | — | @@ -441,6 +459,7 @@ |
| 442 | 460 | if ( !$res->numRows() ) { |
| 443 | 461 | break; |
| 444 | 462 | } |
| | 463 | + $this->debug( 'Incomplete: ' . $row->numRows() . ' rows' ); |
| 445 | 464 | foreach ( $res as $row ) { |
| 446 | 465 | $this->moveTextRow( $row->bt_text_id, $row->bt_new_url ); |
| 447 | 466 | } |
| — | — | @@ -471,7 +490,10 @@ |
| 472 | 491 | * Move an orphan text_id to the new cluster |
| 473 | 492 | */ |
| 474 | 493 | function doOrphanList( $textIds ) { |
| 475 | | - $trx = new CgzCopyTransaction( $this ); |
| | 494 | + // Finish incomplete moves |
| | 495 | + $this->finishIncompleteMoves( array( 'bt_text_id' => $textIds ) ); |
| | 496 | + |
| | 497 | + $trx = new CgzCopyTransaction( $this, $this->orphanBlobClass ); |
| 476 | 498 | foreach ( $textIds as $textId ) { |
| 477 | 499 | $row = wfGetDB( DB_SLAVE )->selectRow( 'text', array( 'old_text', 'old_flags' ), |
| 478 | 500 | array( 'old_id' => $textId ), __METHOD__ ); |
| — | — | @@ -482,10 +504,13 @@ |
| 483 | 505 | } |
| 484 | 506 | |
| 485 | 507 | if ( !$trx->addItem( $text, $textId ) ) { |
| | 508 | + $this->debug( "[orphan]: committing blob with " . $trx->getSize() . " rows" ); |
| 486 | 509 | $trx->commit(); |
| 487 | | - $trx = new CgzCopyTransaction( $this ); |
| | 510 | + $trx = new CgzCopyTransaction( $this, $this->orphanBlobClass ); |
| 488 | 511 | } |
| 489 | 512 | } |
| | 513 | + $this->debug( "[orphan]: committing blob with " . $trx->getSize() . " rows" ); |
| | 514 | + $trx->commit(); |
| 490 | 515 | } |
| 491 | 516 | } |
| 492 | 517 | |
| — | — | @@ -493,6 +518,7 @@ |
| 494 | 519 | * Class to represent a recompression operation for a single CGZ blob |
| 495 | 520 | */ |
| 496 | 521 | class CgzCopyTransaction { |
| | 522 | + var $parent; |
| 497 | 523 | var $blobClass; |
| 498 | 524 | var $cgz; |
| 499 | 525 | var $referrers; |
| — | — | @@ -500,10 +526,11 @@ |
| 501 | 527 | /** |
| 502 | 528 | * Create a transaction from a RecompressTracked object |
| 503 | 529 | */ |
| 504 | | - function __construct( $parent ) { |
| 505 | | - $this->blobClass = $parent->blobClass; |
| | 530 | + function __construct( $parent, $blobClass ) { |
| | 531 | + $this->blobClass = $blobClass; |
| 506 | 532 | $this->cgz = false; |
| 507 | 533 | $this->texts = array(); |
| | 534 | + $this->parent = $parent; |
| 508 | 535 | } |
| 509 | 536 | |
| 510 | 537 | /** |
| — | — | @@ -521,6 +548,10 @@ |
| 522 | 549 | return $this->cgz->isHappy(); |
| 523 | 550 | } |
| 524 | 551 | |
| | 552 | + function getSize() { |
| | 553 | + return count( $this->texts ); |
| | 554 | + } |
| | 555 | + |
| 525 | 556 | /** |
| 526 | 557 | * Recompress text after some aberrant modification |
| 527 | 558 | */ |
| — | — | @@ -554,16 +585,16 @@ |
| 555 | 586 | // We do a locking read to prevent closer-run race conditions. |
| 556 | 587 | $dbw = wfGetDB( DB_MASTER ); |
| 557 | 588 | $dbw->begin(); |
| | 589 | + $res = $dbw->select( 'blob_tracking', |
| | 590 | + array( 'bt_text_id', 'bt_moved' ), |
| | 591 | + array( 'bt_text_id' => array_keys( $this->referrers ) ), |
| | 592 | + __METHOD__, array( 'FOR UPDATE' ) ); |
| 558 | 593 | $dirty = false; |
| 559 | | - foreach ( $this->referrers as $textId => $hash ) { |
| 560 | | - $moved = $dbw->selectField( 'blob_tracking', 'bt_moved', |
| 561 | | - array( 'bt_text_id' => $textId ), |
| 562 | | - __METHOD__, |
| 563 | | - array( 'FOR UPDATE' ) |
| 564 | | - ); |
| 565 | | - if ( !$moved ) { |
| | 594 | + foreach ( $res as $row ) { |
| | 595 | + if ( $row->bt_moved ) { |
| 566 | 596 | # This row has already been moved, remove it |
| 567 | | - unset( $this->texts[$textId] ); |
| | 597 | + $this->parent->debug( "TRX: conflict detected in old_id={$row->bt_text_id}" ); |
| | 598 | + unset( $this->texts[$row->bt_text_id] ); |
| 568 | 599 | $dirty = true; |
| 569 | 600 | } |
| 570 | 601 | } |
| — | — | @@ -574,7 +605,7 @@ |
| 575 | 606 | // All have been moved already |
| 576 | 607 | if ( $originalCount > 1 ) { |
| 577 | 608 | // This is suspcious, make noise |
| 578 | | - echo "Warning: concurrent operation detected, are there two conflicting\n" . |
| | 609 | + echo "Warning: concurrent operation detected, are there two conflicting " . |
| 579 | 610 | "processes running, doing the same job?\n"; |
| 580 | 611 | } |
| 581 | 612 | return; |
| — | — | @@ -616,9 +647,5 @@ |
| 617 | 648 | } |
| 618 | 649 | } |
| 619 | 650 | } |
| 620 | | - |
| 621 | | - function signalHandler() { |
| 622 | | - $this->signalled = true; |
| 623 | | - } |
| 624 | 651 | } |
| 625 | 652 | |
| Index: trunk/phase3/maintenance/storage/testCompression.php |
| — | — | @@ -15,7 +15,13 @@ |
| 16 | 16 | } else { |
| 17 | 17 | $start = '19700101000000'; |
| 18 | 18 | } |
| 19 | | -$limit = isset( $options['limit'] ) ? $options['limit'] : 10; |
| | 19 | +if ( isset( $options['limit'] ) ) { |
| | 20 | + $limit = $options['limit']; |
| | 21 | + $untilHappy = false; |
| | 22 | +} else { |
| | 23 | + $limit = 1000; |
| | 24 | + $untilHappy = true; |
| | 25 | +} |
| 20 | 26 | $type = isset( $options['type'] ) ? $options['type'] : 'ConcatenatedGzipHistoryBlob'; |
| 21 | 27 | |
| 22 | 28 | |
| — | — | @@ -43,16 +49,21 @@ |
| 44 | 50 | $uncompressedSize += strlen( $text ); |
| 45 | 51 | $hashes[$row->rev_id] = md5( $text ); |
| 46 | 52 | $keys[$row->rev_id] = $blob->addItem( $text ); |
| | 53 | + if ( $untilHappy && !$blob->isHappy() ) { |
| | 54 | + break; |
| | 55 | + } |
| 47 | 56 | } |
| 48 | 57 | |
| 49 | 58 | $serialized = serialize( $blob ); |
| 50 | 59 | $t += microtime( true ); |
| | 60 | +#print_r( $blob->mDiffMap ); |
| 51 | 61 | |
| 52 | | -printf( "Compression ratio for %d revisions: %5.2f, %s -> %s\n", |
| 53 | | - $res->numRows(), |
| | 62 | +printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n", |
| | 63 | + $type, |
| | 64 | + count( $hashes ), |
| 54 | 65 | $uncompressedSize / strlen( $serialized ), |
| 55 | 66 | $wgLang->formatSize( $uncompressedSize ), |
| 56 | | - $wgLang->formatSize( strlen( $serialized ) ) |
| | 67 | + strlen( $serialized ) |
| 57 | 68 | ); |
| 58 | 69 | printf( "Compression time: %5.2f ms\n", $t * 1000 ); |
| 59 | 70 | |
| Index: trunk/phase3/maintenance/storage/blob_tracking.sql |
| — | — | @@ -4,10 +4,14 @@ |
| 5 | 5 | CREATE TABLE /*$wgDBprefix*/blob_tracking ( |
| 6 | 6 | -- page.page_id |
| 7 | 7 | -- This may be zero for orphan or deleted text |
| | 8 | + -- Note that this is for compression grouping only -- it doesn't need to be |
| | 9 | + -- accurate at the time recompressTracked is run. Operations such as a |
| | 10 | + -- delete/undelete cycle may make it inaccurate. |
| 8 | 11 | bt_page integer not null, |
| 9 | 12 | |
| 10 | 13 | -- revision.rev_id |
| 11 | 14 | -- This may be zero for orphan or deleted text |
| | 15 | + -- Like bt_page, it does not need to be accurate when recompressTracked is run. |
| 12 | 16 | bt_rev_id integer not null, |
| 13 | 17 | |
| 14 | 18 | -- text.old_id |
| Index: trunk/phase3/includes/GlobalFunctions.php |
| — | — | @@ -195,6 +195,7 @@ |
| 196 | 196 | */ |
| 197 | 197 | function wfDebug( $text, $logonly = false ) { |
| 198 | 198 | global $wgOut, $wgDebugLogFile, $wgDebugComments, $wgProfileOnly, $wgDebugRawPage; |
| | 199 | + global $wgDebugLogPrefix; |
| 199 | 200 | static $recursion = 0; |
| 200 | 201 | |
| 201 | 202 | static $cache = array(); // Cache of unoutputted messages |
| — | — | @@ -227,6 +228,7 @@ |
| 228 | 229 | # Strip unprintables; they can switch terminal modes when binary data |
| 229 | 230 | # gets dumped, which is pretty annoying. |
| 230 | 231 | $text = preg_replace( '![\x00-\x08\x0b\x0c\x0e-\x1f]!', ' ', $text ); |
| | 232 | + $text = $wgDebugLogPrefix . $text; |
| 231 | 233 | wfErrorLog( $text, $wgDebugLogFile ); |
| 232 | 234 | } |
| 233 | 235 | } |
| Index: trunk/phase3/includes/DefaultSettings.php |
| — | — | @@ -838,7 +838,6 @@ |
| 839 | 839 | |
| 840 | 840 | /** |
| 841 | 841 | * Translation using MediaWiki: namespace. |
| 842 | | - * This will increase load times by 25-60% unless memcached is installed. |
| 843 | 842 | * Interface messages will be loaded from the database. |
| 844 | 843 | */ |
| 845 | 844 | $wgUseDatabaseMessages = true; |
| — | — | @@ -952,6 +951,16 @@ |
| 953 | 952 | $wgExtraSubtitle = ''; |
| 954 | 953 | $wgSiteSupportPage = ''; # A page where you users can receive donations |
| 955 | 954 | |
| | 955 | +/** |
| | 956 | + * Set this to a string to put the wiki into read-only mode. The text will be |
| | 957 | + * used as an explanation to users. |
| | 958 | + * |
| | 959 | + * This prevents most write operations via the web interface. Cache updates may |
| | 960 | + * still be possible. To prevent database writes completely, use the read_only |
| | 961 | + * option in MySQL. |
| | 962 | + */ |
| | 963 | +$wgReadOnly = null; |
| | 964 | + |
| 956 | 965 | /*** |
| 957 | 966 | * If this lock file exists, the wiki will be forced into read-only mode. |
| 958 | 967 | * Its contents will be shown to users as part of the read-only warning |
| — | — | @@ -960,15 +969,42 @@ |
| 961 | 970 | $wgReadOnlyFile = false; ///< defaults to "{$wgUploadDirectory}/lock_yBgMBwiR"; |
| 962 | 971 | |
| 963 | 972 | /** |
| | 973 | + * Filename for debug logging. |
| 964 | 974 | * The debug log file should be not be publicly accessible if it is used, as it |
| 965 | | - * may contain private data. */ |
| | 975 | + * may contain private data. |
| | 976 | + */ |
| 966 | 977 | $wgDebugLogFile = ''; |
| 967 | 978 | |
| | 979 | +/** |
| | 980 | + * Prefix for debug log lines |
| | 981 | + */ |
| | 982 | +$wgDebugLogPrefix = ''; |
| | 983 | + |
| | 984 | +/** |
| | 985 | + * If true, instead of redirecting, show a page with a link to the redirect |
| | 986 | + * destination. This allows for the inspection of PHP error messages, and easy |
| | 987 | + * resubmission of form data. For developer use only. |
| | 988 | + */ |
| 968 | 989 | $wgDebugRedirects = false; |
| 969 | | -$wgDebugRawPage = false; # Avoid overlapping debug entries by leaving out CSS |
| 970 | 990 | |
| | 991 | +/** |
| | 992 | + * If true, log debugging data from action=raw. |
| | 993 | + * This is normally false to avoid overlapping debug entries due to gen=css and |
| | 994 | + * gen=js requests. |
| | 995 | + */ |
| | 996 | +$wgDebugRawPage = false; |
| | 997 | + |
| | 998 | +/** |
| | 999 | + * Send debug data to an HTML comment in the output. |
| | 1000 | + * |
| | 1001 | + * This may occasionally be useful when supporting a non-technical end-user. It's |
| | 1002 | + * more secure than exposing the debug log file to the web, since the output only |
| | 1003 | + * contains private data for the current user. But it's not ideal for development |
| | 1004 | + * use since data is lost on fatal errors and redirects. |
| | 1005 | + */ |
| 971 | 1006 | $wgDebugComments = false; |
| 972 | | -$wgReadOnly = null; |
| | 1007 | + |
| | 1008 | +/** Does nothing. Obsolete? */ |
| 973 | 1009 | $wgLogQueries = false; |
| 974 | 1010 | |
| 975 | 1011 | /** |
| — | — | @@ -1027,7 +1063,8 @@ |
| 1028 | 1064 | * same options. |
| 1029 | 1065 | * |
| 1030 | 1066 | * This can provide a significant speedup for medium to large pages, |
| 1031 | | - * so you probably want to keep it on. |
| | 1067 | + * so you probably want to keep it on. Extensions that conflict with the |
| | 1068 | + * parser cache should disable the cache on a per-page basis instead. |
| 1032 | 1069 | */ |
| 1033 | 1070 | $wgEnableParserCache = true; |
| 1034 | 1071 | |