User:J.saterfiel

From MediaWiki.org
Jump to navigation Jump to search

Here is the PdfBook.php I use on my mediawiki installation (1.14). It's a little more advanced than the one currently available. List of new features:

  • Ability to remove links in the documents
  • Ability for a printed Category (collection of articles) to have a cover page with its Category Name and date created printed on it
  • Ability to have a "Download as PDF" link in the tool bar on any page without needing to explicitly place a link on a page you want to create pdfs on.
  • Ability to change the date format used on the header page (http://us3.php.net/manual/en/function.date.php)
  • Ability to change the information printed on each page header and footer(will need to lookup htmldoc http://www.htmldoc.org/ for more info on what the options are and once installed run htmldoc -help as the full options are not displayed on their website.)
<?php

/**
 * PdfBook extension altered version by J.saterfiel
 * - Composes a book from articles in a category and exports as a PDF book
 *
 * See http://www.mediawiki.org/Extension:PdfBook for installation and usage details
 * See http://www.organicdesign.co.nz/Extension_talk:PdfBook for development notes and disucssion
 *
 * Started: 2007-08-08
 * 
 * @package MediaWiki
 * @subpackage Extensions
 * @author Aran Dunkley [http://www.organicdesign.co.nz/nad User:Nad]
 * @author J.saterfiel
 * @copyright © 2007 Aran Dunkley
 * @licence GNU General Public Licence 2.0 or later
 */
if (!defined('MEDIAWIKI')) die('Not an entry point.');

define('PDFBOOK_VERSION', '1.0.3, 2008-12-09');

$wgExtensionFunctions[]        = 'wfSetupPdfBook';
$wgHooks['LanguageGetMagic'][] = 'wfPdfBookLanguageGetMagic';

$wgExtensionCredits['parserhook'][] = array(
	'path'        => __FILE__,
	'name'	      => 'PdfBook',
	'author'      => '[http://www.organicdesign.co.nz/nad User:Nad]',
	'description' => 'Composes a book from articles in a category and exports as a PDF book',
	'url'	      => 'http://www.mediawiki.org/wiki/Extension:PdfBook',
	'version'     => PDFBOOK_VERSION
	);

class PdfBook {
	public $ignoreLinks = false;
	public $coverPage = "<html><body><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p>".
"<center><h1><b>ARTICLE_TITLE</b></h1></center><center><h3>Downloaded from WIKI_URL</h3></center><center><h3>Accurate as of: CURRENT_DATE</h3></center></body></html>";
	public $dateFormat = 'n/j/Y h:i A';
	public $headerFormat = 'th.';
	public $footerFormat = '.D/';
	
	
	function PdfBook() {
		global $wgHooks, $wgParser, $wgPdfBookMagic;
		global $wgLogTypes, $wgLogNames, $wgLogHeaders, $wgLogActions;
		$wgHooks['UnknownAction'][] = $this;
		# Add a new pdf log type
		$wgLogTypes[]             = 'pdf';
		$wgLogNames  ['pdf']      = 'pdflogpage';
		$wgLogHeaders['pdf']      = 'pdflogpagetext';
		$wgLogActions['pdf/book'] = 'pdflogentry';
	}

	/**
	 * Perform the export operation
	 */
	function onUnknownAction($action, $article) {
		global $wgOut, $wgUser, $wgTitle, $wgParser, $wgRequest;
		global $wgServer, $wgArticlePath, $wgScriptPath, $wgUploadPath, $wgUploadDirectory, $wgScript;

		if ($action == 'pdfbook') {

			$title = $article->getTitle();
			$opt = ParserOptions::newFromUser($wgUser);

			# Log the export
			$msg = $wgUser->getUserPage()->getPrefixedText().' exported as a PDF book';
			$log = new LogPage('pdf', false);
			$log->addEntry('book', $wgTitle, $msg);

			# Initialise PDF variables
			$format  = $wgRequest->getText('format');
			//$notitle = $wgRequest->getText('notitle');
			$notitle = false;
			$layout  = $format == 'single' ? '--webpage' : '--firstpage toc';
			$charset = $this->setProperty('Charset',     'iso-8859-1');
			$left    = $this->setProperty('LeftMargin',  '1cm');
			$right   = $this->setProperty('RightMargin', '1cm');
			$top     = $this->setProperty('TopMargin',   '1cm');
			$bottom  = $this->setProperty('BottomMargin','1cm');
			$font    = $this->setProperty('Font',	     'Arial');
			$size    = $this->setProperty('FontSize',    '8');
			$linkcol = $this->setProperty('LinkColour',  '217A28');
			$levels  = $this->setProperty('TocLevels',   '2');
			$exclude = $this->setProperty('Exclude',     array());
			$width   = $this->setProperty('Width',       '');
			$width   = $width ? "--browserwidth $width" : '';
			if (!is_array($exclude)) $exclude = split('\\s*,\\s*', $exclude);
 
			# Select articles from members if a category or links in content if not
			if ($format == 'single') $articles = array($title);
			else {
				$articles = array();
				if ($title->getNamespace() == NS_CATEGORY) {
					$db     = wfGetDB(DB_SLAVE);
					$cat    = $db->addQuotes($title->getDBkey());
					$result = $db->select(
						'categorylinks',
						'cl_from',
						"cl_to = $cat",
						'PdfBook',
						array('ORDER BY' => 'cl_sortkey')
					);
					if ($result instanceof ResultWrapper) $result = $result->result;
					while ($row = $db->fetchRow($result)) $articles[] = Title::newFromID($row[0]);
				}
				else {
					$text = $article->fetchContent();
					$text = $wgParser->preprocess($text, $title, $opt);
					if (preg_match_all('/^\\*\\s*\\[{2}\\s*([^\\|\\]]+)\\s*.*?\\]{2}/m', $text, $links))
						foreach ($links[1] as $link) $articles[] = Title::newFromText($link);
				}
			}

			# Format the article(s) as a single HTML document with absolute URL's
			$book = $title->getText();
			$html = '';
			$wgArticlePath = $wgServer.$wgArticlePath;
			$wgScriptPath  = $wgServer.$wgScriptPath;
			$wgUploadPath  = $wgServer.$wgUploadPath;
			$wgScript      = $wgServer.$wgScript;
			foreach ($articles as $title) {
				$ttext = $title->getPrefixedText();
				if (!in_array($ttext, $exclude)) {
					$article = new Article($title);
					$text    = $article->fetchContent();
					$text    = preg_replace('/<!--([^@]+?)-->/s', '@@'.'@@$1@@'.'@@', $text); # preserve HTML comments
					
					if ($format != 'single') $text .= '__NOTOC__';
					$opt->setEditSection(false);    # remove section-edit links
					$wgOut->setHTMLTitle($ttext);   # use this so DISPLAYTITLE magic works
					$out     = $wgParser->parse($text, $title, $opt, true, true);
					$ttext   = $wgOut->getHTMLTitle();
					$text    = $out->getText();
					$text    = preg_replace('|(<img[^>]+?src=")(/.+?>)|', "$1$wgServer$2", $text);       # make image urls absolute
					$text    = preg_replace('|<div\s*class=[\'"]?noprint["\']?>.+?</div>|s', '', $text); # non-printable areas
					$text    = preg_replace('|@{4}([^@]+?)@{4}|s', '<!--$1-->', $text);                  # HTML comments hack
					if($this->ignoreLinks){
						$text = str_ireplace('<a','<span',$text);
						$text = str_ireplace('</a>','</span>',$text);
					}
					#$text    = preg_replace('|<table|', '<table border borderwidth=2 cellpadding=3 cellspacing=0', $text);
					$ttext   = basename($ttext);
					$h1      = $notitle ? '' : "<center><h1>$ttext</h1></center>";
					$html   .= utf8_decode("$h1$text\n");
				}
			}

			# If format=html in query-string, return html content directly
			if ($format == 'html') {
				$wgOut->disable();
				header("Content-Type: text/html");
				header("Content-Disposition: attachment; filename=\"$book.html\"");
				print $html;
			}
			else {
				# Write the HTML to a tmp file
				$file = "$wgUploadDirectory/".uniqid('pdf-book');
				$fh = fopen($file, 'w+');
				fwrite($fh, $html);
				fclose($fh);
				
				#Write cover page to a tmp file
				$curr_date = date($this->dateFormat);
				$curr_year = date('Y');
				$fileCoverLetter = "$wgUploadDirectory/".uniqid('pdf-book').'htm';
				$fh = fopen($fileCoverLetter, 'w+');
				$coverPageTmp = str_replace('CURRENT_DATE',$curr_date,$this->coverPage);
				$coverPageTmp = str_replace('CURRENT_YEAR',$curr_year,$coverPageTmp);
				$coverPageTmp = str_replace('ARTICLE_TITLE',$book,$coverPageTmp);
				$coverPageTmp = str_replace('WIKI_URL',$_SERVER['SERVER_NAME'],$coverPageTmp);
				fwrite($fh, $coverPageTmp);
				fclose($fh);

				$footer = $this->footerFormat;
				$header = $this->headerFormat;
				$toc    = $format == 'single' ? '' : " --toclevels $levels";

				# Send the file to the client via htmldoc converter
				$wgOut->disable();
				header("Content-Type: application/pdf");
				header("Content-Disposition: attachment; filename=\"$book.pdf\"");
				$cmd  = "--left $left --right $right --top $top --bottom $bottom";
				$cmd .= " --header $header --footer $footer --headfootsize 8 --quiet --jpeg --color";
				$cmd .= " --bodyfont $font --fontsize $size --linkstyle plain --linkcolor $linkcol";
				$cmd .= " --titlefile $fileCoverLetter";
				$cmd .= "$toc --format pdf14 --numbered $layout $width";
				$cmd  = "htmldoc -t pdf --charset $charset $cmd $file";
				putenv("HTMLDOC_NOCGI=1");
				passthru($cmd);
				@unlink($file);
			}
			return false;
		}
	
		return true;
	}

	/**
	 * Return a property for htmldoc using global, request or passed default
	 */
	function setProperty($name, $default) {
		global $wgRequest;
		if ($wgRequest->getText("pdf$name"))   return $wgRequest->getText("pdf$name");
		if (isset($GLOBALS["wgPdfBook$name"])) return $GLOBALS["wgPdfBook$name"];
		return $default;
	}

	/**
	 * Needed in some versions to prevent Special:Version from breaking
	 */
	function __toString() { return 'PdfBook'; }
}

/**
 * Called from $wgExtensionFunctions array when initialising extensions
 */
function wfSetupPdfBook() {
	global $wgPdfBook;
	global $wgPDFBookIgnoreLinks,$wgPDFBookCoverPage,$wgPDFBookCoverPageDateFormat,$wgPDFBookHeaderFormat,$wgPDFBookFooterFormat;
	$wgPdfBook = new PdfBook();
	if(isset($wgPDFBookIgnoreLinks)){
		$wgPdfBook->ignoreLinks = $wgPDFBookIgnoreLinks;
	}
	if(isset($wgPDFBookCoverPage)){
		$wgPdfBook->coverPage = $wgPDFBookCoverPage;
	}
	if(isset($wgPDFBookCoverPageDateFormat)){
		$wgPdfBook->dateFormat = $wgPDFBookCoverPageDateFormat;
	}
	if(isset($wgPDFBookHeaderFormat)){
		$wgPdfBook->headerFormat = $wgPDFBookHeaderFormat;
	}
	if(isset($wgPDFBookFooterFormat)){
		$wgPdfBook->footerFormat = $wgPDFBookFooterFormat;
	}
}

/**
 * Needed in MediaWiki >1.8.0 for magic word hooks to work properly
 */
function wfPdfBookLanguageGetMagic(&$magicWords, $langCode = 0) {
	global $wgPdfBookMagic;
	$magicWords[$wgPdfBookMagic] = array($langCode, $wgPdfBookMagic);
	return true;
}

//Add on for link to print on the tool bar menu
$wgHooks['SkinTemplateBuildNavUrlsNav_urlsAfterPermalink'][] = 'wfSpecialPdfNav';
$wgHooks['SkinTemplateToolboxEnd'][] = 'wfSpecialPdfToolbox';

function wfSpecialPdfNav( &$skintemplate, &$nav_urls, &$oldid, &$revid ) {
        $nav_urls['pdfprint'] = array(
                        'text' => 'Download as PDF',
                        'href' => $nav_urls['href'].'?action=pdfbook&format=single'
                );
        return true;
}

function wfSpecialPdfToolbox( &$monobook ) {
        if ( isset( $monobook->data['nav_urls']['pdfprint'] ) )
                if ( $monobook->data['nav_urls']['pdfprint']['href'] == '' ) {
                        ?><li id="t-ispdf"><?php htmlspecialchars( $monobook->data['nav_urls']['pdfprint']['text'] ); ?></li><?php
                } else {
                        ?><li id="t-pdf"><?php
                                ?><a href="<?php echo htmlspecialchars( $monobook->data['nav_urls']['pdfprint']['href'] ) ?>"><?php
                                        echo htmlspecialchars( $monobook->data['nav_urls']['pdfprint']['text'] );
                                ?></a><?php
                        ?></li><?php
                }
        return true;
}
?>

Example configuration that would be placed in your LocalSettings.php file:

<?
#PdfBook Configuration
require "$IP/extensions/PdfBook/PdfBook.php";
$wgPDFBookIgnoreLinks = true;
//Use ARTICLE_TITLE to place the category or single article title
//Use CURRENT_DATE to place the current date
//Use CURRENT_YEAR to place the current year 4 digits
//Use WIKI_URL for the url of the request
//Default value is below
$wgPDFBookCoverPage = "<html><body><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p>"
."<p>&nbsp;</p><center><h1><b>ARTICLE_TITLE</b></h1></center><center><h3>Downloaded from WIKI_URL</h3></center><center><h3>Accurate as of: CURRENT_DATE</h3>"
."</center></body></html>";
//Default value is 'n/j/Y h:i A'
$wgPDFBookCoverPageDateFormat = 'n/j/Y h:i A';
//Default value is 'th.'
$wgPDFBookHeaderFormat = 'th.';
//Default value is '.D/'
$wgPDFBookFooterFormat = '.D/';
?>