User:Jeblad/Cite body.php

From MediaWiki.org
Jump to: navigation, search
<?php
 
/**#@+
 * A parser extension that adds two tags, <ref> and <references> for adding
 * citations to pages
 *
 * @addtogroup Extensions
 *
 * @link http://meta.wikimedia.org/wiki/Cite/Cite.php Documentation
 * @link http://www.w3.org/TR/html4/struct/text.html#edef-CITE <cite> definition in HTML
 * @link http://www.w3.org/TR/2005/WD-xhtml2-20050527/mod-text.html#edef_text_cite <cite> definition in XHTML 2.0
 *
 * @bug 4579
 *
 * @author Ævar Arnfjörð Bjarmason <avarab@gmail.com>
 * @copyright Copyright © 2005, Ævar Arnfjörð Bjarmason
 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
 */
 
class Cite {
        /**#@+
         * @access private
         */
 
        /**
         * Datastructure representing <ref> input, in the format of:
         * <code>
         * array(
         *      'user supplied' => array(
         *              'text' => 'user supplied reference & key',
         *              'count' => 1, // occurs twice
         *              'number' => 1, // The first reference, we want
         *                             // all occourances of it to
         *                             // use the same number
         *      ),
         *      0 => 'Anonymous reference',
         *      1 => 'Another anonymous reference',
         *      'some key' => array(
         *              'text' => 'this one occurs once'
         *              'count' => 0,
         *              'number' => 4
         *      ),
         *      3 => 'more stuff'
         * );
         * </code>
         *
         * This works because:
         * * PHP's datastructures are guarenteed to be returned in the
         *   order that things are inserted into them (unless you mess
         *   with that)
         * * User supplied keys can't be integers, therefore avoiding
         *   conflict with anonymous keys
         *
         * @var array
         **/
        var $mRefs = array();
 
        /**
         * Count for user displayed output (ref[1], ref[2], ...)
         *
         * @var int
         */
        var $mOutCnt = 0;
        var $mGroupCnt = array();
 
        /**
         * Internal counter for anonymous references, seperate from
         * $mOutCnt because anonymous references won't increment it,
         * but will incremement $mOutCnt
         *
         * @var int
         */
        var $mInCnt = 0;
 
        /**
         * The backlinks, in order, to pass as $3 to
         * 'cite_references_link_many_format', defined in
         * 'cite_references_link_many_format_backlink_labels
         *
         * @var array
         */
        var $mBacklinkLabels;
 
        /**
         * @var object
         */
        var $mParser;
 
        /**
         * True when a <ref> or <references> tag is being processed.
         * Used to avoid infinite recursion
         * 
         * @var boolean
         */
        var $mInCite = false;
 
        /**#@-*/
 
        /**
         * Constructor
         */
        function Cite() {
                $this->setHooks();
        }
 
        /**#@+ @access private */
 
        /**
         * Callback function for <ref>
         *
         * @param string $str Input
         * @param array $argv Arguments
         * @return string
         */
        function ref( $str, $argv, $parser ) {
                wfLoadExtensionMessages( 'Cite' );
                if ( $this->mInCite ) {
                        return htmlspecialchars( "<ref>$str</ref>" );
                } else {
                        $this->mInCite = true;
                        $ret = $this->guardedRef( $str, $argv, $parser );
                        $this->mInCite = false;
                        return $ret;
                }
        }
 
        function guardedRef( $str, $argv, $parser, $default_group=CITE_DEFAULT_GROUP ) {
                $this->mParser = $parser;
 
                # The key here is the "name" attribute.
                list($key,$group) = $this->refArg( $argv );
 
                if( $str === '' ) {
                        # <ref ...></ref>.  This construct is always invalid: either
                        # it's a contentful ref, or it's a named duplicate and should
                        # be <ref ... />.
                        return $this->error( 'cite_error_ref_no_input' );
                }
 
                if( $key === false ) {
                        # TODO: Comment this case; what does this condition mean?
                        return $this->error( 'cite_error_ref_too_many_keys' );
                }
 
                if( $str === null and $key === null ) {
                        # Something like <ref />; this makes no sense.
                        return $this->error( 'cite_error_ref_no_key' );
                }
 
                if( preg_match( '/^[0-9]+$/', $key ) ) {
                        # Numeric names mess up the resulting id's, potentially produ-
                        # cing duplicate id's in the XHTML.  The Right Thing To Do
                        # would be to mangle them, but it's not really high-priority
                        # (and would produce weird id's anyway).
                        return $this->error( 'cite_error_ref_numeric_key' );
                }
 
                # Split these into groups.
                if( $group === null ) {
                        $group = $default_group;
                }
 
                if( is_string( $key ) or is_string( $str ) ) {
                        # We don't care about the content: if the key exists, the ref
                        # is presumptively valid.  Either it stores a new ref, or re-
                        # fers to an existing one.  If it refers to a nonexistent ref,
                        # we'll figure that out later.  Likewise it's definitely valid
                        # if there's any content, regardless of key.
                        return $this->stack( $str, $key, $group );
                }
 
                # Not clear how we could get here, but something is probably
                # wrong with the types.  Let's fail fast.
                $this->croak( 'cite_error_key_str_invalid', serialize( "$str; $key" ) );
        }
 
        /**
         * Parse the arguments to the <ref> tag
         *
         * @static
         *
         * @param array $argv The argument vector
         * @return mixed false on invalid input, a string on valid
         *               input and null on no input
         */
        function refArg( $argv ) {
                global $wgAllowCiteGroups;
                $cnt = count( $argv );
                $group = null;
                $key = null;
 
                if ( $cnt > 2 )
                        // There should only be one key and one group
                        return false;
                else if ( $cnt >= 1 ) {
                        if ( isset( $argv['name'] ) ) {
                                // Key given.
                                $key = Sanitizer::escapeId( $argv['name'] );
                                unset( $argv['name']);
                                --$cnt;
                        }
                        if ( isset( $argv['group'] ) ){
                                if (! $wgAllowCiteGroups ) return array(false); //remove when groups are fully tested.
                                // Group given.
                                $group = $argv['group'];
                                unset( $argv['group']);
                                --$cnt;
                        }
 
                        if ( $cnt == 0)
                                return array ($key,$group);
                        else
                                // Invalid key
                                return array(false);
                }
                else
                        // No key
                        return array(null,$group);
        }
 
        /**
         * Populate $this->mRefs based on input and arguments to <ref>
         *
         * @param string $str Input from the <ref> tag
         * @param mixed $key Argument to the <ref> tag as returned by $this->refArg()
         * @return string 
         */
        function stack( $str, $key = null, $group ) {
                if (! isset($this->mRefs[$group])) 
                        $this->mRefs[$group]=array();
                if (! isset($this->mGroupCnt[$group]))
                        $this->mGroupCnt[$group]=0;
 
                if ( $key === null ) {
                        // No key
                        //$this->mRefs[$group][] = $str;
                        $this->mRefs[$group][] = array('count'=>-1, 'text'=>$str, 'key'=>++$this->mOutCnt);
 
                        return $this->linkRef( $group, $this->mInCnt++ );
                } else if ( is_string( $key ) ) {
                        // Valid key
                        if ( ! isset( $this->mRefs[$group][$key] ) || ! is_array( $this->mRefs[$group][$key] ) ) {
                                // First occurance
                                $this->mRefs[$group][$key] = array(
                                        'text' => $str,
                                        'count' => 0,
                                        'key' => ++$this->mOutCnt,
                                        'number' => ++$this->mGroupCnt[$group]
                                );
                                $this->mInCnt++;
                                return
                                        $this->linkRef(
                                                $group,
                                                $key,
                                                $this->mRefs[$group][$key]['key']."-".$this->mRefs[$group][$key]['count'],
                                                $this->mRefs[$group][$key]['number'],
                                                "-".$this->mRefs[$group][$key]['key']
                                        );
                        } else {
                                // We've been here before
                                if ( $this->mRefs[$group][$key]['text'] === null && $str !== '' ) {
                                        // If no text found before, use this text
                                        $this->mRefs[$group][$key]['text'] = $str;
                                };
                                return 
                                        $this->linkRef(
                                                $group,
                                                $key,
                                                $this->mRefs[$group][$key]['key']."-".++$this->mRefs[$group][$key]['count'],
                                                $this->mRefs[$group][$key]['number'],
                                                "-".$this->mRefs[$group][$key]['key']
                                        ); }
                }
 
                else
                        $this->croak( 'cite_error_stack_invalid_input', serialize( array( $key, $str ) ) );
        }
 
        /**
         * Callback function for <references>
         *
         * @param string $str Input
         * @param array $argv Arguments
         * @return string
         */
        function references( $str, $argv, $parser ) {
                wfLoadExtensionMessages( 'Cite' );
                if ( $this->mInCite ) {
                        if ( is_null( $str ) ) {
                                return htmlspecialchars( "<references/>" );
                        } else {
                                return htmlspecialchars( "<references>$str</references>" );
                        }
                } else {
                        $this->mInCite = true;
                        $ret = $this->guardedReferences( $str, $argv, $parser );
                        $this->mInCite = false;
                        return $ret;
                }
        }
 
        function guardedReferences( $str, $argv, $parser, $group = CITE_DEFAULT_GROUP ) {
                global $wgAllowCiteGroups;
 
                $this->mParser = $parser;
 
                if ( isset( $argv['group'] ) and $wgAllowCiteGroups) {
                        $group = $argv['group'];
                        unset ($argv['group']);
 
                }
 
                if ( count( $argv ) )
                        return $this->error( 'cite_error_references_invalid_parameters' );
                else
                        return $this->referencesFormat($str, $group);
        }
 
        /**
         * Make output to be returned from the references() function
         *
         * @param string $str The content of the reference
         * @return string XHTML ready for output
         */
        function referencesFormat($str, $group) {
                global $wgAllowTailDefs;
                if ((strval( $str ) == '') && (( count( $this->mRefs ) == 0 ) or (empty( $this->mRefs[$group] ) )))
                        return '';
 
                wfProfileIn( __METHOD__ );
                wfProfileIn( __METHOD__ .'-addentries' );
                $str = "\n" . $str . "\n";
                $used = array();
                $addents = array();
                $fragments = array();
                # match lines like these:
                # somename|This is the reference text for some text
                # lines may continue on following lines if indented
                $count = 0;
                if ($wgAllowTailDefs) {
                        preg_match_all( "/\\n([^:\\n\\t ]*?)[\\t ]*:[\\t ]*([^\\n]*(?:\\n[\\t ][^\\n]*)*)/s", $str, $fragments, PREG_SET_ORDER );
                        foreach ( $fragments as $fragment ) {
                                $count++;
                                # Skip empty lines
                                if ( count( $fragment ) == 0 ) {
                                        continue;
                                }
                                if ($fragment[1] == '' && is_array($this->mRefs[$group]) && !isset( $this->mRefs[$group][$group . '-anon-' . $count] ))
                                        $addents[$group . '-anon-' . $count] = $fragment[2];
                                else
                                        $addents[$fragment[1]] = $fragment[2];
                        }
                }
                wfProfileOut( __METHOD__ .'-addentries' );
                wfProfileIn( __METHOD__ .'-entries' );
                $ent = array();
                if (is_array($this->mRefs[$group])) {
                        foreach ( $this->mRefs[$group] as $k => $v ) {
                                if (is_array( $v ) && $v['text']=='') {
                                        $used[$k]++;
                                        $ent[] = $this->referencesFormatEntry( $k, $addents[$k] );
                                }
                                else $ent[] = $this->referencesFormatEntry( $k, $v );
                        }
                }
 
                foreach ( $addents as $k => $v ) {
                        if (!$used[$k]) {
                                $ent[] = wfMsgForContentNoTrans(
                                        'cite_references_link_none',
                                        $addents[$k],
                                        $this->error( 'cite_error_references_no_text', $k )
                                );
                        }
                }
 
                $prefix = wfMsgForContentNoTrans( 'cite_references_prefix' );
                $suffix = wfMsgForContentNoTrans( 'cite_references_suffix' );
                $content = implode( "\n", $ent );
 
                wfProfileOut( __METHOD__ .'-entries' );
                wfProfileIn( __METHOD__ .'-parse' );
                // Live hack: parse() adds two newlines on WM, can't reproduce it locally -ævar
                $ret = rtrim( $this->parse( $prefix . $content . $suffix ), "\n" );
                wfProfileOut( __METHOD__ .'-parse' );
                wfProfileOut( __METHOD__ );
 
                //done, clean up so we can reuse the group
                unset ($this->mRefs[$group]);
                unset($this->mGroupCnt[$group]);
 
                return $ret;
        }
 
        /**
         * Format a single entry for the referencesFormat() function
         *
         * @param string $key The key of the reference
         * @param mixed $val The value of the reference, string for anonymous
         *                   references, array for user-suppplied
         * @return string Wikitext
         */
        function referencesFormatEntry( $key, $val ) {
                // Anonymous reference
                if ( ! is_array( $val ) )
                        return
                                wfMsgForContentNoTrans(
                                        'cite_references_link_one',
                                        $this->referencesKey( $key ),
                                        $this->refKey( $key ),
                                        $val
                                );
                else if ($val['text']=='') return
                                wfMsgForContentNoTrans(
                                        'cite_references_link_one',
                                        $this->referencesKey( $key ),
                                        $this->refKey( $key, $val['count'] ),
                                        $this->error( 'cite_error_references_no_text', $key )
                                );
                if ( $val['count'] < 0 )
                        return
                                wfMsgForContentNoTrans(
                                        'cite_references_link_one',
                                        $this->referencesKey( $val['key'] ),
                                        #$this->refKey( $val['key'], $val['count'] ),
                                        $this->refKey( $val['key'] ),
 
                                        ( $val['text'] != '' ? $val['text'] : $this->error( 'cite_error_references_no_text', $key ) )                                           
                                );
                // Standalone named reference, I want to format this like an
                // anonymous reference because displaying "1. 1.1 Ref text" is
                // overkill and users frequently use named references when they
                // don't need them for convenience
                else if ( $val['count'] === 0 )
                        return
                                wfMsgForContentNoTrans(
                                        'cite_references_link_one',
                                        $this->referencesKey( $key ."-" . $val['key'] ),
                                        #$this->refKey( $key, $val['count'] ),
                                        $this->refKey( $key, $val['key']."-".$val['count'] ),
                                        ( $val['text'] != '' ? $val['text'] : $this->error( 'cite_error_references_no_text', $key ) )
                                );
                // Named references with >1 occurrences
                else {
                        $links = array();
//for group handling, we have an extra key here.
                        for ( $i = 0; $i <= $val['count']; ++$i ) {
                                $links[] = wfMsgForContentNoTrans(
                                                'cite_references_link_many_format',
                                                $this->refKey( $key, $val['key']."-$i" ),
                                                $this->referencesFormatEntryNumericBacklinkLabel( $val['number'], $i, $val['count'] ),
                                                $this->referencesFormatEntryAlternateBacklinkLabel( $i )
                                );
                        }
 
                        $list = $this->listToText( $links );
 
                        return
                                wfMsgForContentNoTrans( 'cite_references_link_many',
                                        $this->referencesKey( $key ."-" . $val['key'] ),
                                        $list,
                                        ( $val['text'] != '' ? $val['text'] : $this->error( 'cite_error_references_no_text', $key ) )
                                );
                }
        }
 
        /**
         * Generate a numeric backlink given a base number and an
         * offset, e.g. $base = 1, $offset = 2; = 1.2
         * Since bug #5525, it correctly does 1.9 -> 1.10 as well as 1.099 -> 1.100
         *
         * @static
         *
         * @param int $base The base
         * @param int $offset The offset
         * @param int $max Maximum value expected.
         * @return string
         */
        function referencesFormatEntryNumericBacklinkLabel( $base, $offset, $max ) {
                global $wgContLang;
                $scope = strlen( $max );
                $ret = $wgContLang->formatNum(
                        sprintf("%s.%0{$scope}s", $base, $offset)
                );
                return $ret;
        }
 
        /**
         * Generate a custom format backlink given an offset, e.g.
         * $offset = 2; = c if $this->mBacklinkLabels = array( 'a',
         * 'b', 'c', ...). Return an error if the offset > the # of
         * array items
         *
         * @param int $offset The offset
         *
         * @return string
         */
        function referencesFormatEntryAlternateBacklinkLabel( $offset ) {
                if ( !isset( $this->mBacklinkLabels ) ) {
                        $this->genBacklinkLabels();
                }
                if ( isset( $this->mBacklinkLabels[$offset] ) ) {
                        return $this->mBacklinkLabels[$offset];
                } else {
                        // Feed me!
                        return $this->error( 'cite_error_references_no_backlink_label' );
                }
        }
 
        /**
         * Return an id for use in wikitext output based on a key and
         * optionally the number of it, used in <references>, not <ref>
         * (since otherwise it would link to itself)
         *
         * @static
         *
         * @param string $key The key
         * @param int $num The number of the key
         * @return string A key for use in wikitext
         */
        function refKey( $key, $num = null ) {
                $prefix = wfMsgForContent( 'cite_reference_link_prefix' );
                $suffix = wfMsgForContent( 'cite_reference_link_suffix' );
                if ( isset( $num ) )
                        $key = wfMsgForContentNoTrans( 'cite_reference_link_key_with_num', $key, $num );
 
                return $prefix . $key . $suffix;
        }
 
        /**
         * Return an id for use in wikitext output based on a key and
         * optionally the number of it, used in <ref>, not <references>
         * (since otherwise it would link to itself)
         *
         * @static
         *
         * @param string $key The key
         * @param int $num The number of the key
         * @return string A key for use in wikitext
         */
        function referencesKey( $key, $num = null ) {
                $prefix = wfMsgForContent( 'cite_references_link_prefix' );
                $suffix = wfMsgForContent( 'cite_references_link_suffix' );
                if ( isset( $num ) )
                        $key = wfMsgForContentNoTrans( 'cite_reference_link_key_with_num', $key, $num );
 
                return $prefix . $key . $suffix;
        }
 
        /**
         * Generate a link (<sup ...) for the <ref> element from a key
         * and return XHTML ready for output
         *
         * @param string $key The key for the link
         * @param int $count The index of the key, used for distinguishing
         *                   multiple occurances of the same key
         * @param int $label The label to use for the link, I want to
         *                   use the same label for all occourances of
         *                   the same named reference.
         * @return string
         */
        function linkRef( $group, $key, $count = null, $label = null, $subkey = '' ) {
                global $wgContLang;
                return
                        $this->parse(
                                wfMsgForContentNoTrans(
                                        'cite_reference_link',
                                        $this->refKey( $key, $count ),
                                        $this->referencesKey( $key . $subkey ),
                                        (($group == CITE_DEFAULT_GROUP)?'':"$group ").$wgContLang->formatNum( is_null( $label ) ? ++$this->mGroupCnt[$group] : $label )
                                )
                        );
        }
 
        /**
         * This does approximately the same thing as
         * Language::listToText() but due to this being used for a
         * slightly different purpose (people might not want , as the
         * first seperator and not 'and' as the second, and this has to
         * use messages from the content language) I'm rolling my own.
         *
         * @static
         *
         * @param array $arr The array to format
         * @return string
         */
        function listToText( $arr ) {
                $cnt = count( $arr );
 
                $sep = wfMsgForContentNoTrans( 'cite_references_link_many_sep' );
                $and = wfMsgForContentNoTrans( 'cite_references_link_many_and' );
 
                if ( $cnt == 1 )
                        // Enforce always returning a string
                        return (string)$arr[0];
                else {
                        $t = array_slice( $arr, 0, $cnt - 1 );
                        return implode( $sep, $t ) . $and . $arr[$cnt - 1];
                }
        }
 
        /**
         * Parse a given fragment and fix up Tidy's trail of blood on
         * it...
         *
         * @param string $in The text to parse
         * @return string The parsed text
         */
        function parse( $in ) {
                if ( method_exists( $this->mParser, 'recursiveTagParse' ) ) {
                        // New fast method
                        return $this->mParser->recursiveTagParse( $in );
                } else {
                        // Old method
                        $ret = $this->mParser->parse(
                                $in,
                                $this->mParser->mTitle,
                                $this->mParser->mOptions,
                                // Avoid whitespace buildup
                                false,
                                // Important, otherwise $this->clearState()
                                // would get run every time <ref> or
                                // <references> is called, fucking the whole
                                // thing up.
                                false
                        );
                        $text = $ret->getText();
 
                        return $this->fixTidy( $text );
                }
        }
 
        /**
         * Tidy treats all input as a block, it will e.g. wrap most
         * input in <p> if it isn't already, fix that and return the fixed text
         *
         * @static
         *
         * @param string $text The text to fix
         * @return string The fixed text
         */
        function fixTidy( $text ) {
                global $wgUseTidy;
 
                if ( ! $wgUseTidy )
                        return $text;
                else {
                        $text = preg_replace( '~^<p>\s*~', '', $text );
                        $text = preg_replace( '~\s*</p>\s*~', '', $text );
                        $text = preg_replace( '~\n$~', '', $text );
 
                        return $text;
                }
        }
 
        /**
         * Generate the labels to pass to the
         * 'cite_references_link_many_format' message, the format is an
         * arbitary number of tokens seperated by [\t\n ]
         */
        function genBacklinkLabels() {
                wfProfileIn( __METHOD__ );
                $text = wfMsgForContentNoTrans( 'cite_references_link_many_format_backlink_labels' );
                $this->mBacklinkLabels = preg_split( '#[\n\t ]#', $text );
                wfProfileOut( __METHOD__ );
        }
 
        /**
         * Gets run when Parser::clearState() gets run, since we don't
         * want the counts to transcend pages and other instances
         */
        function clearState() {
                # Don't clear state when we're in the middle of parsing
                # a <ref> tag
                if($this->mInCite)
                        return true;
 
                $this->mGroupCnt = array();
                $this->mOutCnt = -1;
                $this->mInCnt = 0;
                $this->mRefs = array();
 
                return true;
        }
 
        /**
         * Initialize the parser hooks
         */
        function setHooks() {
                global $wgParser, $wgHooks;
 
                $wgParser->setHook( 'ref' , array( &$this, 'ref' ) );
                $wgParser->setHook( 'references' , array( &$this, 'references' ) );
 
                $wgHooks['ParserClearState'][] = array( &$this, 'clearState' );
        }
 
        /**
         * Return an error message based on an error ID
         *
         * @param string $key   Message name for the error
         * @param string $param Parameter to pass to the message
         * @return string XHTML ready for output
         */
        function error( $key, $param=null ) {
                # We rely on the fact that PHP is okay with passing unused argu-
                # ments to functions.  If $1 is not used in the message, wfMsg will
                # just ignore the extra parameter.
                return 
                        $this->parse(
                                '<strong class="error">' .
                                wfMsg( 'cite_error', wfMsg( $key, $param ) ) .
                                '</strong>'
                        );
        }
 
        /**
         * Die with a backtrace if something happens in the code which
         * shouldn't have
         *
         * @param int $error  ID for the error
         * @param string $data Serialized error data
         */
        function croak( $error, $data ) {
                wfDebugDieBacktrace( wfMsgForContent( 'cite_croak', $this->error( $error ), $data ) );
        }
 
        /**#@-*/
}
 
?>
Personal tools
Namespaces

Variants
Actions
Navigation
Support
Download
Development
Communication
Print/export
Toolbox