User:J.saterfiel

From MediaWiki.org
Jump to: navigation, search

Here is the PdfBook.php I use on my mediawiki installation (1.14). It's a little more advanced than the one currently available. List of new features:

  • Ability to remove links in the documents
  • Ability for a printed Category (collection of articles) to have a cover page with its Category Name and date created printed on it
  • Ability to have a "Download as PDF" link in the tool bar on any page without needing to explicitly place a link on a page you want to create pdfs on.
  • Ability to change the date format used on the header page (http://us3.php.net/manual/en/function.date.php)
  • Ability to change the information printed on each page header and footer(will need to lookup htmldoc http://www.htmldoc.org/ for more info on what the options are and once installed run htmldoc -help as the full options are not displayed on their website.)
<?php
 
/**
 * PdfBook extension altered version by J.saterfiel
 * - Composes a book from articles in a category and exports as a PDF book
 *
 * See http://www.mediawiki.org/Extension:PdfBook for installation and usage details
 * See http://www.organicdesign.co.nz/Extension_talk:PdfBook for development notes and disucssion
 *
 * Started: 2007-08-08
 * 
 * @package MediaWiki
 * @subpackage Extensions
 * @author Aran Dunkley [http://www.organicdesign.co.nz/nad User:Nad]
 * @author J.saterfiel
 * @copyright © 2007 Aran Dunkley
 * @licence GNU General Public Licence 2.0 or later
 */
if (!defined('MEDIAWIKI')) die('Not an entry point.');
 
define('PDFBOOK_VERSION', '1.0.3, 2008-12-09');
 
$wgExtensionFunctions[]        = 'wfSetupPdfBook';
$wgHooks['LanguageGetMagic'][] = 'wfPdfBookLanguageGetMagic';
 
$wgExtensionCredits['parserhook'][] = array(
        'path'        => __FILE__,
        'name'        => 'PdfBook',
        'author'      => '[http://www.organicdesign.co.nz/nad User:Nad]',
        'description' => 'Composes a book from articles in a category and exports as a PDF book',
        'url'         => 'http://www.mediawiki.org/wiki/Extension:PdfBook',
        'version'     => PDFBOOK_VERSION
        );
 
class PdfBook {
        public $ignoreLinks = false;
        public $coverPage = "<html><body><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p>".
"<center><h1><b>ARTICLE_TITLE</b></h1></center><center><h3>Downloaded from WIKI_URL</h3></center><center><h3>Accurate as of: CURRENT_DATE</h3></center></body></html>";
        public $dateFormat = 'n/j/Y h:i A';
        public $headerFormat = 'th.';
        public $footerFormat = '.D/';
 
 
        function PdfBook() {
                global $wgHooks, $wgParser, $wgPdfBookMagic;
                global $wgLogTypes, $wgLogNames, $wgLogHeaders, $wgLogActions;
                $wgHooks['UnknownAction'][] = $this;
                # Add a new pdf log type
                $wgLogTypes[]             = 'pdf';
                $wgLogNames  ['pdf']      = 'pdflogpage';
                $wgLogHeaders['pdf']      = 'pdflogpagetext';
                $wgLogActions['pdf/book'] = 'pdflogentry';
        }
 
        /**
         * Perform the export operation
         */
        function onUnknownAction($action, $article) {
                global $wgOut, $wgUser, $wgTitle, $wgParser, $wgRequest;
                global $wgServer, $wgArticlePath, $wgScriptPath, $wgUploadPath, $wgUploadDirectory, $wgScript;
 
                if ($action == 'pdfbook') {
 
                        $title = $article->getTitle();
                        $opt = ParserOptions::newFromUser($wgUser);
 
                        # Log the export
                        $msg = $wgUser->getUserPage()->getPrefixedText().' exported as a PDF book';
                        $log = new LogPage('pdf', false);
                        $log->addEntry('book', $wgTitle, $msg);
 
                        # Initialise PDF variables
                        $format  = $wgRequest->getText('format');
                        //$notitle = $wgRequest->getText('notitle');
                        $notitle = false;
                        $layout  = $format == 'single' ? '--webpage' : '--firstpage toc';
                        $charset = $this->setProperty('Charset',     'iso-8859-1');
                        $left    = $this->setProperty('LeftMargin',  '1cm');
                        $right   = $this->setProperty('RightMargin', '1cm');
                        $top     = $this->setProperty('TopMargin',   '1cm');
                        $bottom  = $this->setProperty('BottomMargin','1cm');
                        $font    = $this->setProperty('Font',        'Arial');
                        $size    = $this->setProperty('FontSize',    '8');
                        $linkcol = $this->setProperty('LinkColour',  '217A28');
                        $levels  = $this->setProperty('TocLevels',   '2');
                        $exclude = $this->setProperty('Exclude',     array());
                        $width   = $this->setProperty('Width',       '');
                        $width   = $width ? "--browserwidth $width" : '';
                        if (!is_array($exclude)) $exclude = split('\\s*,\\s*', $exclude);
 
                        # Select articles from members if a category or links in content if not
                        if ($format == 'single') $articles = array($title);
                        else {
                                $articles = array();
                                if ($title->getNamespace() == NS_CATEGORY) {
                                        $db     = wfGetDB(DB_SLAVE);
                                        $cat    = $db->addQuotes($title->getDBkey());
                                        $result = $db->select(
                                                'categorylinks',
                                                'cl_from',
                                                "cl_to = $cat",
                                                'PdfBook',
                                                array('ORDER BY' => 'cl_sortkey')
                                        );
                                        if ($result instanceof ResultWrapper) $result = $result->result;
                                        while ($row = $db->fetchRow($result)) $articles[] = Title::newFromID($row[0]);
                                }
                                else {
                                        $text = $article->fetchContent();
                                        $text = $wgParser->preprocess($text, $title, $opt);
                                        if (preg_match_all('/^\\*\\s*\\[{2}\\s*([^\\|\\]]+)\\s*.*?\\]{2}/m', $text, $links))
                                                foreach ($links[1] as $link) $articles[] = Title::newFromText($link);
                                }
                        }
 
                        # Format the article(s) as a single HTML document with absolute URL's
                        $book = $title->getText();
                        $html = '';
                        $wgArticlePath = $wgServer.$wgArticlePath;
                        $wgScriptPath  = $wgServer.$wgScriptPath;
                        $wgUploadPath  = $wgServer.$wgUploadPath;
                        $wgScript      = $wgServer.$wgScript;
                        foreach ($articles as $title) {
                                $ttext = $title->getPrefixedText();
                                if (!in_array($ttext, $exclude)) {
                                        $article = new Article($title);
                                        $text    = $article->fetchContent();
                                        $text    = preg_replace('/<!--([^@]+?)-->/s', '@@'.'@@$1@@'.'@@', $text); # preserve HTML comments
                                        
                                        if ($format != 'single') $text .= '__NOTOC__';
                                        $opt->setEditSection(false);    # remove section-edit links
                                        $wgOut->setHTMLTitle($ttext);   # use this so DISPLAYTITLE magic works
                                        $out     = $wgParser->parse($text, $title, $opt, true, true);
                                        $ttext   = $wgOut->getHTMLTitle();
                                        $text    = $out->getText();
                                        $text    = preg_replace('|(<img[^>]+?src=")(/.+?>)|', "$1$wgServer$2", $text);       # make image urls absolute
                                        $text    = preg_replace('|<div\s*class=[\'"]?noprint["\']?>.+?</div>|s', '', $text); # non-printable areas
                                        $text    = preg_replace('|@{4}([^@]+?)@{4}|s', '<!--$1-->', $text);                  # HTML comments hack
                                        if($this->ignoreLinks){
                                                $text = str_ireplace('<a','<span',$text);
                                                $text = str_ireplace('</a>','</span>',$text);
                                        }
                                        #$text    = preg_replace('|<table|', '<table border borderwidth=2 cellpadding=3 cellspacing=0', $text);
                                        $ttext   = basename($ttext);
                                        $h1      = $notitle ? '' : "<center><h1>$ttext</h1></center>";
                                        $html   .= utf8_decode("$h1$text\n");
                                }
                        }
 
                        # If format=html in query-string, return html content directly
                        if ($format == 'html') {
                                $wgOut->disable();
                                header("Content-Type: text/html");
                                header("Content-Disposition: attachment; filename=\"$book.html\"");
                                print $html;
                        }
                        else {
                                # Write the HTML to a tmp file
                                $file = "$wgUploadDirectory/".uniqid('pdf-book');
                                $fh = fopen($file, 'w+');
                                fwrite($fh, $html);
                                fclose($fh);
 
                                #Write cover page to a tmp file
                                $curr_date = date($this->dateFormat);
                                $curr_year = date('Y');
                                $fileCoverLetter = "$wgUploadDirectory/".uniqid('pdf-book').'htm';
                                $fh = fopen($fileCoverLetter, 'w+');
                                $coverPageTmp = str_replace('CURRENT_DATE',$curr_date,$this->coverPage);
                                $coverPageTmp = str_replace('CURRENT_YEAR',$curr_year,$coverPageTmp);
                                $coverPageTmp = str_replace('ARTICLE_TITLE',$book,$coverPageTmp);
                                $coverPageTmp = str_replace('WIKI_URL',$_SERVER['SERVER_NAME'],$coverPageTmp);
                                fwrite($fh, $coverPageTmp);
                                fclose($fh);
 
                                $footer = $this->footerFormat;
                                $header = $this->headerFormat;
                                $toc    = $format == 'single' ? '' : " --toclevels $levels";
 
                                # Send the file to the client via htmldoc converter
                                $wgOut->disable();
                                header("Content-Type: application/pdf");
                                header("Content-Disposition: attachment; filename=\"$book.pdf\"");
                                $cmd  = "--left $left --right $right --top $top --bottom $bottom";
                                $cmd .= " --header $header --footer $footer --headfootsize 8 --quiet --jpeg --color";
                                $cmd .= " --bodyfont $font --fontsize $size --linkstyle plain --linkcolor $linkcol";
                                $cmd .= " --titlefile $fileCoverLetter";
                                $cmd .= "$toc --format pdf14 --numbered $layout $width";
                                $cmd  = "htmldoc -t pdf --charset $charset $cmd $file";
                                putenv("HTMLDOC_NOCGI=1");
                                passthru($cmd);
                                @unlink($file);
                        }
                        return false;
                }
 
                return true;
        }
 
        /**
         * Return a property for htmldoc using global, request or passed default
         */
        function setProperty($name, $default) {
                global $wgRequest;
                if ($wgRequest->getText("pdf$name"))   return $wgRequest->getText("pdf$name");
                if (isset($GLOBALS["wgPdfBook$name"])) return $GLOBALS["wgPdfBook$name"];
                return $default;
        }
 
        /**
         * Needed in some versions to prevent Special:Version from breaking
         */
        function __toString() { return 'PdfBook'; }
}
 
/**
 * Called from $wgExtensionFunctions array when initialising extensions
 */
function wfSetupPdfBook() {
        global $wgPdfBook;
        global $wgPDFBookIgnoreLinks,$wgPDFBookCoverPage,$wgPDFBookCoverPageDateFormat,$wgPDFBookHeaderFormat,$wgPDFBookFooterFormat;
        $wgPdfBook = new PdfBook();
        if(isset($wgPDFBookIgnoreLinks)){
                $wgPdfBook->ignoreLinks = $wgPDFBookIgnoreLinks;
        }
        if(isset($wgPDFBookCoverPage)){
                $wgPdfBook->coverPage = $wgPDFBookCoverPage;
        }
        if(isset($wgPDFBookCoverPageDateFormat)){
                $wgPdfBook->dateFormat = $wgPDFBookCoverPageDateFormat;
        }
        if(isset($wgPDFBookHeaderFormat)){
                $wgPdfBook->headerFormat = $wgPDFBookHeaderFormat;
        }
        if(isset($wgPDFBookFooterFormat)){
                $wgPdfBook->footerFormat = $wgPDFBookFooterFormat;
        }
}
 
/**
 * Needed in MediaWiki >1.8.0 for magic word hooks to work properly
 */
function wfPdfBookLanguageGetMagic(&$magicWords, $langCode = 0) {
        global $wgPdfBookMagic;
        $magicWords[$wgPdfBookMagic] = array($langCode, $wgPdfBookMagic);
        return true;
}
 
//Add on for link to print on the tool bar menu
$wgHooks['SkinTemplateBuildNavUrlsNav_urlsAfterPermalink'][] = 'wfSpecialPdfNav';
$wgHooks['SkinTemplateToolboxEnd'][] = 'wfSpecialPdfToolbox';
 
function wfSpecialPdfNav( &$skintemplate, &$nav_urls, &$oldid, &$revid ) {
        $nav_urls['pdfprint'] = array(
                        'text' => 'Download as PDF',
                        'href' => $nav_urls['href'].'?action=pdfbook&format=single'
                );
        return true;
}
 
function wfSpecialPdfToolbox( &$monobook ) {
        if ( isset( $monobook->data['nav_urls']['pdfprint'] ) )
                if ( $monobook->data['nav_urls']['pdfprint']['href'] == '' ) {
                        ?><li id="t-ispdf"><?php htmlspecialchars( $monobook->data['nav_urls']['pdfprint']['text'] ); ?></li><?php
                } else {
                        ?><li id="t-pdf"><?php
                                ?><a href="<?php echo htmlspecialchars( $monobook->data['nav_urls']['pdfprint']['href'] ) ?>"><?php
                                        echo htmlspecialchars( $monobook->data['nav_urls']['pdfprint']['text'] );
                                ?></a><?php
                        ?></li><?php
                }
        return true;
}
?>

Example configuration that would be placed in your LocalSettings.php file:

<?
#PdfBook Configuration
require "$IP/extensions/PdfBook/PdfBook.php";
$wgPDFBookIgnoreLinks = true;
//Use ARTICLE_TITLE to place the category or single article title
//Use CURRENT_DATE to place the current date
//Use CURRENT_YEAR to place the current year 4 digits
//Use WIKI_URL for the url of the request
//Default value is below
$wgPDFBookCoverPage = "<html><body><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p><p>&nbsp;</p>"
."<p>&nbsp;</p><center><h1><b>ARTICLE_TITLE</b></h1></center><center><h3>Downloaded from WIKI_URL</h3></center><center><h3>Accurate as of: CURRENT_DATE</h3>"
."</center></body></html>";
//Default value is 'n/j/Y h:i A'
$wgPDFBookCoverPageDateFormat = 'n/j/Y h:i A';
//Default value is 'th.'
$wgPDFBookHeaderFormat = 'th.';
//Default value is '.D/'
$wgPDFBookFooterFormat = '.D/';
?>
Personal tools
Namespaces

Variants
Actions
Navigation
Support
Download
Development
Communication
Print/export
Toolbox