Module:Transcluder

local p = {}

-- Helper function to test for truthy and falsy values local function is(value) if not value or value == "" or value == "0" or value == "false" or value == "no" then return false end return true end

-- Error handling function -- Throws a Lua error or returns an empty string if error reporting is disabled errors = true -- show errors by default local function throwError(message, value) if not is(errors) then return "" end -- error reporting is disabled message = require('Module:TNT').format('I18n/Excerpt', 'error-' .. message, value) error(message, 2) end

-- Error handling function -- Returns a wiki friendly error or an empty string if error reporting is disabled local function getError(message, value) if not is(errors) then return "" end -- error reporting is disabled message = d.errors[message] or message or "" message = mw.ustring.format(message, value) message = d.errors.prefix .. message if mw.title.getCurrentTitle.isContentPage then local errorsCategory = mw.title.new(d.errorsCategory, 'Category') if errorsCategory then message = message ..  .. errorsCategory.prefixedText ..  end end message = mw.html.create('div'):addClass('error'):wikitext(message) return message end

-- Helper function to match from a list regular expressions -- Like so: match pre..list[1]..post or pre..list[2]..post or ... local function matchAny(text, pre, list, post, init) local match = {} for i = 1, #list do match = { mw.ustring.match(text, pre .. list[i] .. post, init) } if match[1] then return unpack(match) end end return nil end

-- Helper function to convert a comma-separated list of numbers or min-max ranges into a list of booleans -- For example: "1,3-5" to {1=true,2=false,3=true,4=true,5=true} local function numberFlags(str) if not str then return {} end local flags = {} local ranges = mw.text.split(str, ",") -- parse ranges: "1,3-5" to {"1","3-5"} for _, r in pairs(ranges) do		local min, max = mw.ustring.match(r, "^%s*(%d+)%s*%-%s*(%d+)%s*$") -- "3-5" to min=3 max=5 if not max then	min, max = mw.ustring.match(r, "^%s*((%d+))%s*$") end -- "1" to min=1 max=1 if max then for p = min, max do flags[p] = true end end end return flags end

-- Helper function to convert template arguments into an array of arguments fit for get local function parseArgs(frame) local args = {} for key, value in pairs(frame:getParent.args) do args[key] = value end for key, value in pairs(frame.args) do args[key] = value end -- args from a Lua call have priority over parent args from template args.paraflags = numberFlags(args["paragraphs"] or "") -- parse paragraphs: "1,3-5" to {"1","3-5"} args.fileflags = numberFlags(args["files"] or "") -- parse file numbers return args end

-- Get a page's content, following redirects -- Also returns the page name, or the target page name if a redirect was followed, or false if no page was found -- For file pages, returns the content of the file description page local function getContent(page) local title = mw.title.new(page) if not title then return false, false end

local target = title.redirectTarget if target then title = target end

return title:getContent, title.prefixedText end

-- Get the paragraphs only local function getParagraphs(text, options) local paragraphs = {}

text = '\n' .. text .. '\n' -- add newlines to match start and end of text

-- Remove tables and full-line templates such as infoboxes for template in mw.ustring.gmatch(text, "\n%b{}\n") do		text = mw.ustring.gsub(text, template, '\n') end

-- Remove files for file in mw.ustring.gmatch(text, "\n%b[]\n") do		text = mw.ustring.gsub(text, file, '\n') end

-- Assume that each remaining line is a paragraph local count = 0 for paragraph in mw.ustring.gmatch(text, "[^\n]+") do		count = count + 1 if options and options.paragraphs then if options.paragraphs[count] then table.insert(paragraphs, paragraph) end else table.insert(paragraphs, paragraph) end end

return paragraphs end

-- Get the files only local function getFiles(text, options) local files = {}

-- Build a table with all the file namespaces local fileNamespaces = mw.site.namespaces.File.aliases table.insert(fileNamespaces, mw.site.namespaces.File.name) table.insert(fileNamespaces, mw.site.namespaces.File.canonicalName)

-- Check all links for candidate in mw.ustring.gmatch(text, "%b[]") do		if matchAny(text, '%[%[%s*', fileNamespaces, "%s*:.*%]%]") then table.insert(files, candidate) end end

return files end

-- Get the tables only local function getTables(text, options) local tables = {} local count = 0 for t in mw.ustring.gmatch(text, "%b{}") do		if mw.ustring.sub(t, 1, 2) == '{|' then count = count + 1 if options and options.tables then if options.tables[count] then table.insert(tables, t) end else table.insert(tables, t)			end end end return tables end

-- Get the lists only local function getLists(text, options) local lists = {} local count = 0 text = '\n' .. text -- prepend newline to match lists at the start for list in mw.ustring.gmatch(text, '\n[*#][^\n]+') do		count = count + 1 if options and options.lists then if options.lists[count] then table.insert(lists, list) end else table.insert(lists, list) end end return lists end

-- Get the lead section of the given wikitext local function getLead(text) text = mw.ustring.gsub(content, '\n==.*', '') if not text then return throwError('lead-empty') end return text end

-- Get the given section out of the given wikitext local function getSection(text, section) local escapedSection = mw.ustring.gsub(mw.uri.decode(section), '([%^%$%(%)%%%.%[%]%*%+%-%?])', '%%%1') local level, text = mw.ustring.match(text .. '\n', '\n(==+)%s*' .. escapedSection .. '%s*==.-\n(.*)') if not text then return throwError('section-not-found', section) end local nextSection = '\n==' .. mw.ustring.rep('=?', #level - 2) .. '[^=].*'	text = mw.ustring.gsub(text, nextSection, '') -- remove later sections with headings at this level or higher text = mw.text.trim(text) if text == '' then return throwError('section-empty', section) end return text end

-- Parse a  -- @todo Implement custom parsing of fragments rather than relying on #lst local function getFragment(page, fragment) local frame = mw.getCurrentFrame local text = frame:callParserFunction('#lst', page, fragment) text = mw.text.trim(text) if text == '' then return throwError('fragment-empty', fragment) end return text end

local function cleanText(text, options) if not options then options = {} end text = mw.ustring.gsub(text, "<!%-%-.-%-%->","") -- remove HTML comments text = mw.ustring.gsub(text, ".-", "") -- remove noinclude bits if mw.ustring.find(text, "[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]") then -- avoid expensive search if possible text = mw.ustring.gsub(text, ".-", "") -- remove text between onlyinclude sections text = mw.ustring.gsub(text, "^.-", "") -- remove text before first onlyinclude section text = mw.ustring.gsub(text, ".*", "") -- remove text after last onlyinclude section end if not is(options.keepSubsections) then text = mw.ustring.gsub(text, "\n==.*","") -- remove first ==Heading== and everything after it		text = mw.ustring.gsub(text, "^==.*","") -- ...even if the lead is empty end if not is(options.keepRefs) then text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]-/%s*>", "") -- remove refs cited elsewhere text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff].->.-<%s*/%s*[Rr][Ee][Ff]%s*>", "") -- remove refs text = mw.ustring.gsub(text, "%b{}", stripTemplate) -- remove unwanted templates such as references end text = mw.ustring.gsub(text, "%s*__[A-Z]*__", "") -- remove magic words text = mw.ustring.gsub(text, "%s*%[%[%s*:?[Cc]ategory:.-%]%]", "") -- remove categories return text end

-- Replace the first call to each reference defined outside of the text for the full reference, to prevent undefined references -- Then prefix the page title to the reference names to prevent conflicts -- that is, replace  for  -- and also for -- also remove reference groups:  for  -- and  for -- @todo The current regex may fail in cases with both kinds of quotes, like  local function fixRefs(text, page, full) if not full then full = getContent(page) end local refNames = {} local refName local refBody local position = 1 while position < mw.ustring.len(text) do		refName, position = mw.ustring.match(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?([^\"'>]+)[\"']?[^>]*/%s*>", position)		if refName then			refName = mw.text.trim(refName)			if not refNames[refName] then -- make sure we process each ref name only once				table.insert(refNames, refName)				refName = mw.ustring.gsub(refName, "[%^%$%(%)%.%[%]%*%+%-%?%%]", "%%%0") -- escape special characters				refBody = mw.ustring.match(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?%s*" .. refName .. "%s*[\"']?[^>/]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>")				if not refBody then -- the ref body is not in the excerpt					refBody = mw.ustring.match(full, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?%s*" .. refName .. "%s*[\"']?[^/>]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>")					if refBody then -- the ref body was found elsewhere						text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?%s*" .. refName .. "%s*[\"']?[^>]*/?%s*>", refBody, 1)					end				end			end		else			position = mw.ustring.len(text)		end	end	text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?([^\"'>/]+)[\"']?[^>/]*(/?)%s*>", '')	text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]*group%s*=%s*[\"']?[^\"'>/]+[\"']%s*>", ' ')	return text end

-- Replace the bold title or synonym near the start of the article by a wikilink to the article function linkBold(text, page) local lang = mw.language.getContentLanguage local position = mw.ustring.find(text, "" .. lang:ucfirst(page) .. "", 1, true) -- look for "Foo is..." (uc) or "A foo is..." (lc) or mw.ustring.find(text, "" .. lang:lcfirst(page) .. "", 1, true) -- plain search: special characters in page represent themselves if position then local length = mw.ustring.len(page) text = mw.ustring.sub(text, 1, position + 2) .. "" .. mw.ustring.sub(text, position + 3, position + length + 2) .. "" .. mw.ustring.sub(text, position + length + 3, -1) -- link it else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name) text = mw.ustring.gsub(text, "(.-'*)", function(a, b)			if not mw.ustring.find(b, "%[") then -- if not wikilinked				return "" .. b .. "" -- replace Foo by Foo			else				return nil -- instruct gsub to make no change			end		 end, 1) -- "end" here terminates the anonymous replacement function(a, b) passed to gsub end return text end

-- Main function for modules local function get(page, options) if not options then options = {} end if options.errors then errors = options.errors end

if not page or page == "" then return throwError('no-page') end

local page, section = mw.ustring.match(page, '([^#]+)#?([^#]*)') local text, page = getContent(page) if not page then return throwError('no-page') end if not text then return throwError('page-not-found', page) end local full = text -- save the full text for later

if is(options.fragment) then text = getFragment(page, options.fragment) end

if is(section) then text = getSection(text, section) end

-- Replace the bold title or synonym near the start of the article by a wikilink to the article text = linkBold(text, page)

-- Remove bold text if requested if is(options.nobold) then text = mw.ustring.gsub(text, "'''", "") end

-- Keep only the requested elements if options.only == 'lists' or options.only == 'list' then local lists = getLists(text, options) text = table.concat(lists, '\n\n') elseif options.only == 'files' or options.only == 'file' then local files = getFiles(text, options) text = table.concat(files, '\n\n') elseif options.only == 'tables' or options.only == 'table' then local tables = getTables(text, options) text = table.concat(tables, '\n\n') elseif options.only == 'paragraphs' or options.only == 'paragraph' then local paragraphs = getParagraphs(text, options) text = table.concat(paragraphs, '\n\n') end

-- Fix broken references if is(options.keepRefs) then text = fixRefs(text, page, full) end

return text end

-- Main invocation function for templates local function main(frame) local args = parseArgs(frame) local page = args[1] local ok, text = pcall(get, page, args) if not ok then return getError(text) end return frame:preprocess(text) end

-- Entry points for templates function p.main(frame) return main(frame) end

-- Entry points for other Lua modules function p.get(page, options) return get(page, options) end function p.getContent(page) return getContent(page) end function p.getSection(text, options) return getSection(text, options) end function p.getParagraphs(text, options) return getParagraphs(text, options) end function p.getTables(text, options) return getTables(text, options) end function p.getLists(text, options) return getLists(text, options) end function p.getFiles(text, options) return getFiles(text, options) end function p.getError(message, value) return getError(message, value) end

return p