User:Leucosticte/Analysis of file uploads

From mediawiki.org

Files[edit]

ApiUpload.php[edit]

$this->mUpload = new UploadFromFile();
$this->mUpload->initialize(
	$this->mParams['filename'],
	$request->getUpload( 'file' )
);
...
$status = $this->mUpload->performUpload( $this->mParams['comment'],
	$this->mParams['text'], $watch, $this->getUser() );

UploadFromFile.php[edit]

function initialize( $name, $webRequestUpload ) {
	$this->mUpload = $webRequestUpload;
	$this->initializePathInfo( $name,
		$this->mUpload->getTempName(), $this->mUpload->getSize() );
}

WebRequest.php[edit]

public function getUpload( $key ) {
	return new WebRequestUpload( $this, $key );
}
/**
 * Object to access the $_FILES array
 */
class WebRequestUpload {
	protected $request;
	protected $doesExist;
	protected $fileInfo;

	/**
	 * Constructor. Should only be called by WebRequest
	 *
	 * @param WebRequest $request The associated request
	 * @param string $key Key in $_FILES array (name of form field)
	 */
	public function __construct( $request, $key ) {
		$this->request = $request;
		$this->doesExist = isset( $_FILES[$key] );
		if ( $this->doesExist ) {
			$this->fileInfo = $_FILES[$key];
		}
	}

	/**
	 * Return whether a file with this name was uploaded.
	 *
	 * @return bool
	 */
	public function exists() {
		return $this->doesExist;
	}

	/**
	 * Return the original filename of the uploaded file
	 *
	 * @return string|null Filename or null if non-existent
	 */
	public function getName() {
		if ( !$this->exists() ) {
			return null;
		}

		global $wgContLang;
		$name = $this->fileInfo['name'];

		# Safari sends filenames in HTML-encoded Unicode form D...
		# Horrid and evil! Let's try to make some kind of sense of it.
		$name = Sanitizer::decodeCharReferences( $name );
		$name = $wgContLang->normalize( $name );
		wfDebug( __METHOD__ . ": {$this->fileInfo['name']} normalized to '$name'\n" );
		return $name;
	}

	/**
	 * Return the file size of the uploaded file
	 *
	 * @return int File size or zero if non-existent
	 */
	public function getSize() {
		if ( !$this->exists() ) {
			return 0;
		}

		return $this->fileInfo['size'];
	}

	/**
	 * Return the path to the temporary file
	 *
	 * @return string|null Path or null if non-existent
	 */
	public function getTempName() {
		if ( !$this->exists() ) {
			return null;
		}

		return $this->fileInfo['tmp_name'];
	}

	/**
	 * Return the upload error. See link for explanation
	 * http://www.php.net/manual/en/features.file-upload.errors.php
	 *
	 * @return int One of the UPLOAD_ constants, 0 if non-existent
	 */
	public function getError() {
		if ( !$this->exists() ) {
			return 0; # UPLOAD_ERR_OK
		}

		return $this->fileInfo['error'];
	}

	/**
	 * Returns whether this upload failed because of overflow of a maximum set
	 * in php.ini
	 *
	 * @return bool
	 */
	public function isIniSizeOverflow() {
		if ( $this->getError() == UPLOAD_ERR_INI_SIZE ) {
			# PHP indicated that upload_max_filesize is exceeded
			return true;
		}

		$contentLength = $this->request->getHeader( 'CONTENT_LENGTH' );
		if ( $contentLength > wfShorthandToInteger( ini_get( 'post_max_size' ) ) ) {
			# post_max_size is exceeded
			return true;
		}

		return false;
	}
}

UploadBase.php[edit]

	/**
	 * Really perform the upload. Stores the file in the local repo, watches
	 * if necessary and runs the UploadComplete hook.
	 *
	 * @param string $comment
	 * @param string $pageText
	 * @param bool $watch
	 * @param User $user
	 *
	 * @return Status Indicating the whether the upload succeeded.
	 */
	public function performUpload( $comment, $pageText, $watch, $user ) {
		wfProfileIn( __METHOD__ );

		$status = $this->getLocalFile()->upload(
			$this->mTempPath,
			$comment,
			$pageText,
			File::DELETE_SOURCE,
			$this->mFileProps,
			false,
			$user
		);

		if ( $status->isGood() ) {
			if ( $watch ) {
				WatchAction::doWatch(
					$this->getLocalFile()->getTitle(),
					$user,
					WatchedItem::IGNORE_USER_RIGHTS
				);
			}
			wfRunHooks( 'UploadComplete', array( &$this ) );

			$this->postProcessUpload();
		}

		wfProfileOut( __METHOD__ );

		return $status;
	}

LocalFile.php[edit]

	/**
	 * Upload a file and record it in the DB
	 * @param string $srcPath Source storage path, virtual URL, or filesystem path
	 * @param string $comment Upload description
	 * @param string $pageText Text to use for the new description page,
	 *   if a new description page is created
	 * @param int|bool $flags Flags for publish()
	 * @param array|bool $props File properties, if known. This can be used to
	 *   reduce the upload time when uploading virtual URLs for which the file
	 *   info is already known
	 * @param string|bool $timestamp Timestamp for img_timestamp, or false to use the
	 *   current time
	 * @param User|null $user User object or null to use $wgUser
	 *
	 * @return FileRepoStatus On success, the value member contains the
	 *     archive name, or an empty string if it was a new file.
	 */
	function upload( $srcPath, $comment, $pageText, $flags = 0, $props = false,
		$timestamp = false, $user = null
	) {
		global $wgContLang;

		if ( $this->getRepo()->getReadOnlyReason() !== false ) {
			return $this->readOnlyFatalStatus();
		}

		if ( !$props ) {
			wfProfileIn( __METHOD__ . '-getProps' );
			if ( $this->repo->isVirtualUrl( $srcPath )
				|| FileBackend::isStoragePath( $srcPath )
			) {
				$props = $this->repo->getFileProps( $srcPath );
			} else {
				$props = FSFile::getPropsFromPath( $srcPath );
			}
			wfProfileOut( __METHOD__ . '-getProps' );
		}

		$options = array();
		$handler = MediaHandler::getHandler( $props['mime'] );
		if ( $handler ) {
			$options['headers'] = $handler->getStreamHeaders( $props['metadata'] );
		} else {
			$options['headers'] = array();
		}

		// Trim spaces on user supplied text
		$comment = trim( $comment );

		// Truncate nicely or the DB will do it for us
		// non-nicely (dangling multi-byte chars, non-truncated version in cache).
		$comment = $wgContLang->truncate( $comment, 255 );
		$this->lock(); // begin
		$status = $this->publish( $srcPath, $flags, $options );

		if ( $status->successCount >= 2 ) {
			// There will be a copy+(one of move,copy,store).
			// The first succeeding does not commit us to updating the DB
			// since it simply copied the current version to a timestamped file name.
			// It is only *preferable* to avoid leaving such files orphaned.
			// Once the second operation goes through, then the current version was
			// updated and we must therefore update the DB too.
			if ( !$this->recordUpload2( $status->value, $comment, $pageText, $props, $timestamp, $user ) ) {
				$status->fatal( 'filenotfound', $srcPath );
			}
		}

		$this->unlock(); // done

		return $status;
	}
	/**
	 * Move or copy a file to its public location. If a file exists at the
	 * destination, move it to an archive. Returns a FileRepoStatus object with
	 * the archive name in the "value" member on success.
	 *
	 * The archive name should be passed through to recordUpload for database
	 * registration.
	 *
	 * @param string $srcPath Local filesystem path to the source image
	 * @param int $flags A bitwise combination of:
	 *     File::DELETE_SOURCE    Delete the source file, i.e. move rather than copy
	 * @param array $options Optional additional parameters
	 * @return FileRepoStatus On success, the value member contains the
	 *     archive name, or an empty string if it was a new file.
	 */
	function publish( $srcPath, $flags = 0, array $options = array() ) {
		return $this->publishTo( $srcPath, $this->getRel(), $flags, $options );
	}

	/**
	 * Move or copy a file to a specified location. Returns a FileRepoStatus
	 * object with the archive name in the "value" member on success.
	 *
	 * The archive name should be passed through to recordUpload for database
	 * registration.
	 *
	 * @param string $srcPath Local filesystem path to the source image
	 * @param string $dstRel Target relative path
	 * @param int $flags A bitwise combination of:
	 *     File::DELETE_SOURCE    Delete the source file, i.e. move rather than copy
	 * @param array $options Optional additional parameters
	 * @return FileRepoStatus On success, the value member contains the
	 *     archive name, or an empty string if it was a new file.
	 */
	function publishTo( $srcPath, $dstRel, $flags = 0, array $options = array() ) {
		if ( $this->getRepo()->getReadOnlyReason() !== false ) {
			return $this->readOnlyFatalStatus();
		}

		$this->lock(); // begin

		$archiveName = wfTimestamp( TS_MW ) . '!' . $this->getName();
		$archiveRel = 'archive/' . $this->getHashPath() . $archiveName;
		$flags = $flags & File::DELETE_SOURCE ? LocalRepo::DELETE_SOURCE : 0;
		$status = $this->repo->publish( $srcPath, $dstRel, $archiveRel, $flags, $options );

		if ( $status->value == 'new' ) {
			$status->value = '';
		} else {
			$status->value = $archiveName;
		}

		$this->unlock(); // done

		return $status;
	}

FileRepo.php[edit]

	/**
	 * Copy or move a file either from a storage path, virtual URL,
	 * or file system path, into this repository at the specified destination location.
	 *
	 * Returns a FileRepoStatus object. On success, the value contains "new" or
	 * "archived", to indicate whether the file was new with that name.
	 *
	 * Options to $options include:
	 *   - headers : name/value map of HTTP headers to use in response to GET/HEAD requests
	 *
	 * @param string $srcPath The source file system path, storage path, or URL
	 * @param string $dstRel The destination relative path
	 * @param string $archiveRel The relative path where the existing file is to
	 *   be archived, if there is one. Relative to the public zone root.
	 * @param int $flags Bitfield, may be FileRepo::DELETE_SOURCE to indicate
	 *   that the source file should be deleted if possible
	 * @param array $options Optional additional parameters
	 * @return FileRepoStatus
	 */
	public function publish(
		$srcPath, $dstRel, $archiveRel, $flags = 0, array $options = array()
	) {
		$this->assertWritableRepo(); // fail out if read-only

		$status = $this->publishBatch(
			array( array( $srcPath, $dstRel, $archiveRel, $options ) ), $flags );
		if ( $status->successCount == 0 ) {
			$status->ok = false;
		}
		if ( isset( $status->value[0] ) ) {
			$status->value = $status->value[0];
		} else {
			$status->value = false;
		}

		return $status;
	}

	/**
	 * Publish a batch of files
	 *
	 * @param array $ntuples (source, dest, archive) triplets or
	 *   (source, dest, archive, options) 4-tuples as per publish().
	 * @param int $flags Bitfield, may be FileRepo::DELETE_SOURCE to indicate
	 *   that the source files should be deleted if possible
	 * @throws MWException
	 * @return FileRepoStatus
	 */
	public function publishBatch( array $ntuples, $flags = 0 ) {
		$this->assertWritableRepo(); // fail out if read-only

		$backend = $this->backend; // convenience
		// Try creating directories
		$status = $this->initZones( 'public' );
		if ( !$status->isOK() ) {
			return $status;
		}

		$status = $this->newGood( array() );

		$operations = array();
		$sourceFSFilesToDelete = array(); // cleanup for disk source files
		// Validate each triplet and get the store operation...
		foreach ( $ntuples as $ntuple ) {
			list( $srcPath, $dstRel, $archiveRel ) = $ntuple;
			$options = isset( $ntuple[3] ) ? $ntuple[3] : array();
			// Resolve source to a storage path if virtual
			$srcPath = $this->resolveToStoragePath( $srcPath );
			if ( !$this->validateFilename( $dstRel ) ) {
				throw new MWException( 'Validation error in $dstRel' );
			}
			if ( !$this->validateFilename( $archiveRel ) ) {
				throw new MWException( 'Validation error in $archiveRel' );
			}

			$publicRoot = $this->getZonePath( 'public' );
			$dstPath = "$publicRoot/$dstRel";
			$archivePath = "$publicRoot/$archiveRel";

			$dstDir = dirname( $dstPath );
			$archiveDir = dirname( $archivePath );
			// Abort immediately on directory creation errors since they're likely to be repetitive
			if ( !$this->initDirectory( $dstDir )->isOK() ) {
				return $this->newFatal( 'directorycreateerror', $dstDir );
			}
			if ( !$this->initDirectory( $archiveDir )->isOK() ) {
				return $this->newFatal( 'directorycreateerror', $archiveDir );
			}

			// Set any desired headers to be use in GET/HEAD responses
			$headers = isset( $options['headers'] ) ? $options['headers'] : array();

			// Archive destination file if it exists.
			// This will check if the archive file also exists and fail if does.
			// This is a sanity check to avoid data loss. On Windows and Linux,
			// copy() will overwrite, so the existence check is vulnerable to
			// race conditions unless a functioning LockManager is used.
			// LocalFile also uses SELECT FOR UPDATE for synchronization.
			$operations[] = array(
				'op' => 'copy',
				'src' => $dstPath,
				'dst' => $archivePath,
				'ignoreMissingSource' => true
			);

			// Copy (or move) the source file to the destination
			if ( FileBackend::isStoragePath( $srcPath ) ) {
				if ( $flags & self::DELETE_SOURCE ) {
					$operations[] = array(
						'op' => 'move',
						'src' => $srcPath,
						'dst' => $dstPath,
						'overwrite' => true, // replace current
						'headers' => $headers
					);
				} else {
					$operations[] = array(
						'op' => 'copy',
						'src' => $srcPath,
						'dst' => $dstPath,
						'overwrite' => true, // replace current
						'headers' => $headers
					);
				}
			} else { // FS source path
				$operations[] = array(
					'op' => 'store',
					'src' => $srcPath,
					'dst' => $dstPath,
					'overwrite' => true, // replace current
					'headers' => $headers
				);
				if ( $flags & self::DELETE_SOURCE ) {
					$sourceFSFilesToDelete[] = $srcPath;
				}
			}
		}

		// Execute the operations for each triplet
		$status->merge( $backend->doOperations( $operations ) );
		// Find out which files were archived...
		foreach ( $ntuples as $i => $ntuple ) {
			list( , , $archiveRel ) = $ntuple;
			$archivePath = $this->getZonePath( 'public' ) . "/$archiveRel";
			if ( $this->fileExists( $archivePath ) ) {
				$status->value[$i] = 'archived';
			} else {
				$status->value[$i] = 'new';
			}
		}
		// Cleanup for disk source files...
		foreach ( $sourceFSFilesToDelete as $file ) {
			wfSuppressWarnings();
			unlink( $file ); // FS cleanup
			wfRestoreWarnings();
		}

		return $status;
	}

FileBackend.php[edit]

	/**
	 * This is the main entry point into the backend for write operations.
	 * Callers supply an ordered list of operations to perform as a transaction.
	 * Files will be locked, the stat cache cleared, and then the operations attempted.
	 * If any serious errors occur, all attempted operations will be rolled back.
	 *
	 * $ops is an array of arrays. The outer array holds a list of operations.
	 * Each inner array is a set of key value pairs that specify an operation.
	 *
	 * Supported operations and their parameters. The supported actions are:
	 *  - create
	 *  - store
	 *  - copy
	 *  - move
	 *  - delete
	 *  - describe (since 1.21)
	 *  - null
	 *
	 * a) Create a new file in storage with the contents of a string
	 * @code
	 *     array(
	 *         'op'                  => 'create',
	 *         'dst'                 => <storage path>,
	 *         'content'             => <string of new file contents>,
	 *         'overwrite'           => <boolean>,
	 *         'overwriteSame'       => <boolean>,
	 *         'headers'             => <HTTP header name/value map> # since 1.21
	 *     );
	 * @endcode
	 *
	 * b) Copy a file system file into storage
	 * @code
	 *     array(
	 *         'op'                  => 'store',
	 *         'src'                 => <file system path>,
	 *         'dst'                 => <storage path>,
	 *         'overwrite'           => <boolean>,
	 *         'overwriteSame'       => <boolean>,
	 *         'headers'             => <HTTP header name/value map> # since 1.21
	 *     )
	 * @endcode
	 *
	 * c) Copy a file within storage
	 * @code
	 *     array(
	 *         'op'                  => 'copy',
	 *         'src'                 => <storage path>,
	 *         'dst'                 => <storage path>,
	 *         'overwrite'           => <boolean>,
	 *         'overwriteSame'       => <boolean>,
	 *         'ignoreMissingSource' => <boolean>, # since 1.21
	 *         'headers'             => <HTTP header name/value map> # since 1.21
	 *     )
	 * @endcode
	 *
	 * d) Move a file within storage
	 * @code
	 *     array(
	 *         'op'                  => 'move',
	 *         'src'                 => <storage path>,
	 *         'dst'                 => <storage path>,
	 *         'overwrite'           => <boolean>,
	 *         'overwriteSame'       => <boolean>,
	 *         'ignoreMissingSource' => <boolean>, # since 1.21
	 *         'headers'             => <HTTP header name/value map> # since 1.21
	 *     )
	 * @endcode
	 *
	 * e) Delete a file within storage
	 * @code
	 *     array(
	 *         'op'                  => 'delete',
	 *         'src'                 => <storage path>,
	 *         'ignoreMissingSource' => <boolean>
	 *     )
	 * @endcode
	 *
	 * f) Update metadata for a file within storage
	 * @code
	 *     array(
	 *         'op'                  => 'describe',
	 *         'src'                 => <storage path>,
	 *         'headers'             => <HTTP header name/value map>
	 *     )
	 * @endcode
	 *
	 * g) Do nothing (no-op)
	 * @code
	 *     array(
	 *         'op'                  => 'null',
	 *     )
	 * @endcode
	 *
	 * Boolean flags for operations (operation-specific):
	 *   - ignoreMissingSource : The operation will simply succeed and do
	 *                           nothing if the source file does not exist.
	 *   - overwrite           : Any destination file will be overwritten.
	 *   - overwriteSame       : If a file already exists at the destination with the
	 *                           same contents, then do nothing to the destination file
	 *                           instead of giving an error. This does not compare headers.
	 *                           This option is ignored if 'overwrite' is already provided.
	 *   - headers             : If supplied, the result of merging these headers with any
	 *                           existing source file headers (replacing conflicting ones)
	 *                           will be set as the destination file headers. Headers are
	 *                           deleted if their value is set to the empty string. When a
	 *                           file has headers they are included in responses to GET and
	 *                           HEAD requests to the backing store for that file.
	 *                           Header values should be no larger than 255 bytes, except for
	 *                           Content-Disposition. The system might ignore or truncate any
	 *                           headers that are too long to store (exact limits will vary).
	 *                           Backends that don't support metadata ignore this. (since 1.21)
	 *
	 * $opts is an associative of boolean flags, including:
	 *   - force               : Operation precondition errors no longer trigger an abort.
	 *                           Any remaining operations are still attempted. Unexpected
	 *                           failures may still cause remaining operations to be aborted.
	 *   - nonLocking          : No locks are acquired for the operations.
	 *                           This can increase performance for non-critical writes.
	 *                           This has no effect unless the 'force' flag is set.
	 *   - nonJournaled        : Don't log this operation batch in the file journal.
	 *                           This limits the ability of recovery scripts.
	 *   - parallelize         : Try to do operations in parallel when possible.
	 *   - bypassReadOnly      : Allow writes in read-only mode. (since 1.20)
	 *   - preserveCache       : Don't clear the process cache before checking files.
	 *                           This should only be used if all entries in the process
	 *                           cache were added after the files were already locked. (since 1.20)
	 *
	 * @remarks Remarks on locking:
	 * File system paths given to operations should refer to files that are
	 * already locked or otherwise safe from modification from other processes.
	 * Normally these files will be new temp files, which should be adequate.
	 *
	 * @par Return value:
	 *
	 * This returns a Status, which contains all warnings and fatals that occurred
	 * during the operation. The 'failCount', 'successCount', and 'success' members
	 * will reflect each operation attempted.
	 *
	 * The status will be "OK" unless:
	 *   - a) unexpected operation errors occurred (network partitions, disk full...)
	 *   - b) significant operation errors occurred and 'force' was not set
	 *
	 * @param array $ops List of operations to execute in order
	 * @param array $opts Batch operation options
	 * @return Status
	 */
	final public function doOperations( array $ops, array $opts = array() ) {
		if ( empty( $opts['bypassReadOnly'] ) && $this->isReadOnly() ) {
			return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly );
		}
		if ( !count( $ops ) ) {
			return Status::newGood(); // nothing to do
		}
		if ( empty( $opts['force'] ) ) { // sanity
			unset( $opts['nonLocking'] );
		}
		foreach ( $ops as &$op ) {
			if ( isset( $op['disposition'] ) ) { // b/c (MW 1.20)
				$op['headers']['Content-Disposition'] = $op['disposition'];
			}
		}
		$scope = $this->getScopedPHPBehaviorForOps(); // try to ignore client aborts
		return $this->doOperationsInternal( $ops, $opts );
	}

FileBackendStore.php[edit]

	/**
	 * @see FileBackendStore::createInternal()
	 * @param array $params
	 * @return Status
	 */
	abstract protected function doCreateInternal( array $params );

	/**
	 * Store a file into the backend from a file on disk.
	 * This will overwrite any file that exists at the destination.
	 * Do not call this function from places outside FileBackend and FileOp.
	 *
	 * $params include:
	 *   - src         : source path on disk
	 *   - dst         : destination storage path
	 *   - headers     : HTTP header name/value map
	 *   - async       : Status will be returned immediately if supported.
	 *                   If the status is OK, then its value field will be
	 *                   set to a FileBackendStoreOpHandle object.
	 *   - dstExists   : Whether a file exists at the destination (optimization).
	 *                   Callers can use "false" if no existing file is being changed.
	 *
	 * @param array $params
	 * @return Status
	 */
	final public function storeInternal( array $params ) {
		$section = new ProfileSection( __METHOD__ . "-{$this->name}" );
		if ( filesize( $params['src'] ) > $this->maxFileSizeInternal() ) {
			$status = Status::newFatal( 'backend-fail-maxsize',
				$params['dst'], $this->maxFileSizeInternal() );
		} else {
			$status = $this->doStoreInternal( $params );
			$this->clearCache( array( $params['dst'] ) );
			if ( !isset( $params['dstExists'] ) || $params['dstExists'] ) {
				$this->deleteFileCache( $params['dst'] ); // persistent cache
			}
		}

		return $status;
	}

	/**
	 * @see FileBackendStore::storeInternal()
	 * @param array $params
	 * @return Status
	 */
	abstract protected function doStoreInternal( array $params );

Links[edit]