Extension:PagesOnDemand/PMID OnDemand

From MediaWiki.org
Jump to navigation Jump to search
<?php
/*
 * PMID_OnDemand.php - An extension 'module' for the PagesOnDemand extension.
 * @authora Jim R. Wilson (wilson.jim.r@gmail.com) and Jim Hu (jimhu@tamu.edu)
 * @version 0.1
 * @copyright Copyright (C) 2007 Jim R. Wilson
 * @license The MIT License - http://www.opensource.org/licenses/mit-license.php 
 */

if ( ! defined( 'MEDIAWIKI' ) ) die();

# Credits
$wgExtensionCredits['other'][] = array(
    'name'=>'PMID_OnDemand',
    'author'=>'Jim Hu &lt;jimhu@tamu.edu&gt; and Jim Wilson &lt;wilson.jim.r@gmail.com&gt;',
    'description'=>'Uses PagesOnDemand to generate wiki articles about papers indexed in the National Library of Medicine PubMed database on demand.',
    'version'=>'0.1'
);

# Register hooks ('PagesOnDemand' hook is provided by the PagesOnDemand extension).
$wgHooks['PagesOnDemand'][] = 'wfLoadPubmedPageOnDemand';

/**
* Loads a demo page if the title matches a particular pattern.
* @param Title title The Title to check or create.
*/
function wfLoadPubmedPageOnDemand( $title, $article ){

	$myTitle = preg_replace('/^PMID:([ _]|%20)*/','PMID:', $title->getDBkey());
	$linkstring = '';
	# Short-circuit if $title isn't in the MAIN namespace or doesn't match the PMID pattern.
	if ( $title->getNamespace() != NS_MAIN || !preg_match('/^PMID:\\d+$/', $myTitle ) ) {
		return true;
	}

	# Create the Article's new text by getting content from PubMed EUtilities
	# Seee: http://eutils.ncbi.nlm.nih.gov/entrez/query/static/eutils_help.html
	$ref_fields = explode(':',$myTitle);
	$data = trim(array_pop($ref_fields));
	$url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=$data&retmode=xml";
	$pubmed_record = file_get_contents( $url );

	# exit if no pubmed id is found
	if (!$pubmed_record || strpos($pubmed_record, '<!-- Error>') > 0) return false;

	$xml_parser = xml_parser_create();
	xml_parse_into_struct($xml_parser, $pubmed_record, $vals, $index);
	xml_parser_free($xml_parser);

#	echo "<pre>";print_r($index);print_r($vals);echo "</pre>";
	#assemble author list
	$i = 0;
	foreach ($index['LASTNAME'] as $valkey){
		$author[] = $vals[$valkey]['value'].", ".$vals[$index['INITIALS'][$i]]['value'];
		$i++;
	}

	switch (count($author)){
		case 0:
			$refstring = 'No author listed';
			break;
		case 1:
			$refstring = $author[0];
		case 2:
			$refstring = implode (' and ',$author);
			break;
		default:	
			$last_auth = array_pop($author);
			$refstring = implode(", ",$author). " and ".$last_auth;
	}
	if ($refstring != '') $refstring = "'''".$refstring."''' "; # bold author list
	#Pubdate
	$pubdate_start = array_shift($index['PUBDATE']);
	$pubdate_end = array_pop($index['PUBDATE']);
	for ($i = $pubdate_start; $i<= $pubdate_end; $i++){
		$pubdate[$vals[$i]['tag']] = @$vals[$i]['value'];
	}
	
	if (isset($pubdate['YEAR'])) $refstring .= " (".$pubdate['YEAR'].") ";	
	$refstring .= $vals[$index['ARTICLETITLE'][0]]['value'];
	
	# Journal is either in an isoabbreviation or in medlineta or in journal, title.  Use abbreviation if available.
	if (isset($index['ISOABBREVIATION'])){
		$refstring .= "''".$vals[$index['ISOABBREVIATION'][0]]['value']."'' ";
	}elseif (isset($index['MEDLINETA'])){
			$refstring .= "''".$vals[$index['MEDLINETA'][0]]['value']."'' ";
	}elseif (isset($index['JOURNAL'])){
		foreach ($index['JOURNAL'] as $valkey){
			if ($vals[$valkey+1]['tag'] == 'TITLE') $refstring .= "''".$vals[$valkey+1]['value']."'' ";
		}
	}	
	if (isset($index['VOLUME'])) $refstring .= "'''".$vals[$index['VOLUME'][0]]['value']."''':";
	if (isset($index['MEDLINEPGN'])) $refstring .= $vals[$index['MEDLINEPGN'][0]]['value'];
	$linkstring .= " [http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=pubmed&dopt=Abstract&list_uids=$data PubMed]";	
	
	$doistring = '';
	if(isset($index['ARTICLEID'])){
		foreach ($index['ARTICLEID'] as $key){
			if ($vals[$key]['attributes']['IDTYPE'] == 'doi') $doistring .= "Online version:[http://dx.doi.org/".$vals[$key]['value']." ".$vals[$key]['value']."]";
		}	
	}


	if (isset($index['ABSTRACTTEXT'])){
		$abstract = $vals[$index['ABSTRACTTEXT'][0]]['value'];
	}else{
		$abstract = 'No abstract in PubMed';
	}
	# get the template
    $template = Revision::newFromTitle(Title::makeTitle(NS_TEMPLATE, 'PMID_page'));
    if (! $template){
    	$text = "Template not found\n\n$refstring\n\n$abstract\n\n$linkstring";
	}else{
	    $text = $template->getText();
	    #strip out noinclude sections
	    $text = preg_replace( '/<noinclude>.*?<\/noinclude>/s', '', $text );
		$text = str_replace('{{{REFERENCE}}}',$refstring,$text);
		$text = str_replace('{{{ABSTRACT}}}',$abstract,$text);
		$text = str_replace('{{{LINKS}}}',$linkstring,$text);
		$text = str_replace('{{{DOI}}}',$doistring,$text);
	}
	# Create the Article, supplying the new text
	# Handle case where there is an extra space after the colon
	if ($myTitle != $title->getDBkey()){
		$article = new Article($title);
		$article->doEdit( "#REDIRECT [[$myTitle]]", 'New PMID: Page!', EDIT_NEW | EDIT_FORCE_BOT );
		$title = Title::makeTitle(NS_MAIN, $myTitle);
	}
	$article = new Article($title);
	# check again that the page doesn't already exist (we previously checked the original title, not the redirected one)
	if (!$title->exists()){
		$article->doEdit( '', 'New PMID: Page!', EDIT_NEW | EDIT_FORCE_BOT );
		$article->doEdit( $text, 'Fill PMID: Page!', EDIT_UPDATE | EDIT_FORCE_BOT );
	}	

	# All done (returning false to kill PoD's wfRunHooks stack)
	return false;
}
?>