Module:Transcluder

local p = {}

-- Helper function to test for truthy and falsy values local function is(value) if not value or value == '' or value == 0 or value == '0' or value == 'false' or value == 'no' then return false end return true end

-- Helper function to match from a list regular expressions -- Like so: match pre..list[1]..post or pre..list[2]..post or ... local function matchAny(text, pre, list, post, init) local match = {} for i = 1, #list do match = { mw.ustring.match(text, pre .. list[i] .. post, init) } if match[1] then return unpack(match) end end return nil end

-- Helper function to convert a comma-separated list of numbers or min-max ranges into a list of booleans -- For example: '1,3-5' to {1=true,2=false,3=true,4=true,5=true} local function numberFlags(str) if not str then return {} end local flags = {} local ranges = mw.text.split(str, ',') -- parse ranges: '1,3-5' to {'1','3-5'} for _, r in pairs(ranges) do		local min, max = mw.ustring.match(r, '^%s*(%d+)%s*%-%s*(%d+)%s*$') -- '3-5' to min=3 max=5 if not max then	min, max = mw.ustring.match(r, '^%s*((%d+))%s*$') end -- '1' to min=1 max=1 if max then for p = min, max do flags[p] = true end end end return flags end

-- Helper function to convert template arguments into an array of arguments fit for get local function parseArgs(frame) local args = {} for key, value in pairs(frame:getParent.args) do args[key] = value end for key, value in pairs(frame.args) do args[key] = value end -- args from Lua calls have priority over parent args from template args.files = numberFlags( args['files'] ) args.lists = numberFlags( args['lists'] ) args.tables = numberFlags( args['tables'] ) args.paragraphs = numberFlags( args['paragraphs'] ) return args end

-- Error handling function -- Throws a Lua error or returns an empty string if error reporting is disabled local function throwError(message, value) message = require('Module:TNT').format('I18n/Excerpt', 'error-' .. message, value) error(message, 2) end

-- Error handling function -- Returns a wiki friendly error or an empty string if error reporting is disabled local function getError(message, value) message = d.errors[message] or message or '' message = mw.ustring.format(message, value) message = d.errors.prefix .. message if mw.title.getCurrentTitle.isContentPage then local errorsCategory = mw.title.new(d.errorsCategory, 'Category') if errorsCategory then message = message ..  .. errorsCategory.prefixedText ..  end end message = mw.html.create('div'):addClass('error'):wikitext(message) return message end

-- Get the page wikitext, following redirects -- Also returns the page name, or the target page name if a redirect was followed, or false if no page was found -- For file pages, returns the content of the file description page local function getText(page) local title = mw.title.new(page) if not title then return false, false end

local target = title.redirectTarget if target then title = target end

return title:getContent, title.prefixedText end

-- Get the requested paragraphs local function getParagraphs(text, flags) local paragraphs = {}

text = '\n' .. text .. '\n' -- add newlines to match start and end of text

-- Remove tables and block templates such as infoboxes for template in mw.ustring.gmatch(text, '\n%b{}\n') do		text = mw.ustring.gsub(text, template, '\n') end

-- Remove files local fileNamespaces = mw.site.namespaces.File.aliases table.insert(fileNamespaces, mw.site.namespaces.File.name) table.insert(fileNamespaces, mw.site.namespaces.File.canonicalName) for link in mw.ustring.gmatch(text, '%b[]') do		if matchAny(link, '%[%[%s*', fileNamespaces, '%s*:.*%]%]') then link = mw.ustring.gsub(link, '([%^%$%(%)%%%.%[%]%*%+%-%?])', '%%%1') text = mw.ustring.gsub(text, link, '\n') end end

-- Assume that each remaining line is a paragraph local count = 0 for paragraph in mw.ustring.gmatch(text, '[^\n]+') do		count = count + 1 if flags then if flags[count] then table.insert(paragraphs, paragraph) end else table.insert(paragraphs, paragraph) end end

return paragraphs end

-- Get the files only local function getFiles(text, flags) local files = {}

local fileNamespaces = mw.site.namespaces.File.aliases table.insert(fileNamespaces, mw.site.namespaces.File.name) table.insert(fileNamespaces, mw.site.namespaces.File.canonicalName)

local count = 0 for link in mw.ustring.gmatch(text, '%b[]') do		if matchAny(link, '%[%[%s*', fileNamespaces, '%s*:.*%]%]') then count = count + 1 if flags then if flags[count] then table.insert(files, link) end else table.insert(files, link) end end end

return files end

-- Get the tables only local function getTables(text, flags) local tables = {} local count = 0 for t in mw.ustring.gmatch(text, '%b{}') do		if mw.ustring.sub(t, 1, 2) == '{|' then count = count + 1 if flags then if flags[count] then table.insert(tables, t) end else table.insert(tables, t)			end end end return tables end

-- Get the lists only local function getLists(text, flags) local lists = {} local count = 0 text = '\n' .. text -- prepend newline to match lists at the start for list in mw.ustring.gmatch(text, '\n([*#].-)\n[^*#]') do		count = count + 1 if flags then if flags[count] then table.insert(lists, list) end else table.insert(lists, list) end end return lists end

-- Get the lead section of the given wikitext local function getLead(text) text = mw.ustring.gsub(text, '\n==.*', '') text = mw.text.trim(text) if not text then return throwError('lead-empty') end return text end

-- Get the requested section (including) out of the given wikitext local function getSection(text, section, subsections) local escapedSection = mw.ustring.gsub(mw.uri.decode(section), '([%^%$%(%)%%%.%[%]%*%+%-%?])', '%%%1') local level, text = mw.ustring.match(text .. '\n', '\n(==+)%s*' .. escapedSection .. '%s*==.-\n(.*)') if not text then return throwError('section-not-found', section) end local nextSection = '\n==' .. mw.ustring.rep('=?', #level - 2) .. '[^=].*'	text = mw.ustring.gsub(text, nextSection, '') -- remove later sections with headings at this level or higher text = mw.text.trim(text) if text == '' then return throwError('section-empty', section) end return text end

-- Parse a  -- @todo Implement custom parsing of fragments rather than relying on #lst local function getFragment(page, fragment) local frame = mw.getCurrentFrame local text = frame:callParserFunction('#lst', page, fragment) text = mw.text.trim(text) if text == '' then return throwError('fragment-empty', fragment) end return text end

local function cleanText(text) text = mw.ustring.gsub(text, "<!%-%-.-%-%->","") -- remove HTML comments text = mw.ustring.gsub(text, ".-", "") -- remove noinclude bits if mw.ustring.find(text, "[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]") then -- avoid expensive search if possible text = mw.ustring.gsub(text, ".-", "") -- remove text between onlyinclude sections text = mw.ustring.gsub(text, "^.-", "") -- remove text before first onlyinclude section text = mw.ustring.gsub(text, ".*", "") -- remove text after last onlyinclude section end text = mw.ustring.gsub(text, "%s*__[A-Z]*__", "") -- remove magic words text = mw.ustring.gsub(text, "%s*%[%[%s*:?[Cc]ategory:.-%]%]", "") -- remove categories return text end

-- Replace the first call to each reference defined outside of the text for the full reference, to prevent undefined references -- Then prefix the page title to the reference names to prevent conflicts -- that is, replace  for  -- and also for -- also remove reference groups:  for  -- and  for -- @todo The current regex may fail in cases with both kinds of quotes, like  local function fixRefs(text, page, full) if not full then full = getText(page) end local refNames = {} local refName local refBody local position = 1 while position < mw.ustring.len(text) do		refName, position = mw.ustring.match(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?([^\"'>]+)[\"']?[^>]*/%s*>", position)		if refName then			refName = mw.text.trim(refName)			if not refNames[refName] then -- make sure we process each ref name only once				table.insert(refNames, refName)				refName = mw.ustring.gsub(refName, "[%^%$%(%)%.%[%]%*%+%-%?%%]", "%%%0") -- escape special characters				refBody = mw.ustring.match(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?%s*" .. refName .. "%s*[\"']?[^>/]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>")				if not refBody then -- the ref body is not in the excerpt					refBody = mw.ustring.match(full, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?%s*" .. refName .. "%s*[\"']?[^/>]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>")					if refBody then -- the ref body was found elsewhere						text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?%s*" .. refName .. "%s*[\"']?[^>]*/?%s*>", refBody, 1)					end				end			end		else			position = mw.ustring.len(text)		end	end	text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]*name%s*=%s*[\"']?([^\"'>/]+)[\"']?[^>/]*(/?)%s*>", '')	text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]*group%s*=%s*[\"']?[^\"'>/]+[\"']%s*>", ' ')	return text end

-- Replace the bold title or synonym near the start of the article by a wikilink to the article function linkBold(text, page) local lang = mw.language.getContentLanguage local position = mw.ustring.find(text, "" .. lang:ucfirst(page) .. "", 1, true) -- look for "Foo is..." (uc) or "A foo is..." (lc) or mw.ustring.find(text, "" .. lang:lcfirst(page) .. "", 1, true) -- plain search: special characters in page represent themselves if position then local length = mw.ustring.len(page) text = mw.ustring.sub(text, 1, position + 2) .. "" .. mw.ustring.sub(text, position + 3, position + length + 2) .. "" .. mw.ustring.sub(text, position + length + 3, -1) -- link it else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name) text = mw.ustring.gsub(text, "(.-'*)", function(a, b)			if not mw.ustring.find(b, "%[") then -- if not wikilinked				return "" .. b .. "" -- replace Foo by Foo			else				return nil -- instruct gsub to make no change			end		 end, 1) -- "end" here terminates the anonymous replacement function(a, b) passed to gsub end return text end

-- Main function for modules local function get(page, options) if not page or page == '' then return throwError('no-page') end local page, hash, section = mw.ustring.match(page, '([^#]+)(#?)([^#]*)') local text, page = getText(page) if not page then return throwError('no-page') end if not text then return throwError('page-not-found', page) end local full = text -- save the full text for fixRefs below

if not options then options = {} end

if is(options.fragment) then text = getFragment(page, options.fragment) elseif is(section) then text = getSection(text, section, options.subsections) elseif is(hash) then text = getLead(text) end

-- Keep only the requested elements if options.only == 'lists' or options.only == 'list' then local lists = getLists(text, options.lists) text = table.concat(lists, '\n\n') elseif options.only == 'files' or options.only == 'file' then local files = getFiles(text, options.files) text = table.concat(files, '\n\n') elseif options.only == 'tables' or options.only == 'table' then local tables = getTables(text, options.tables) text = table.concat(tables, '\n\n') elseif options.only == 'paragraphs' or options.only == 'paragraph' then local paragraphs = getParagraphs(text, options.paragraphs) text = table.concat(paragraphs, '\n\n') end

-- Misc options if is(options.noRefs) then text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff].->.-<%s*/%s*[Rr][Ee][Ff]%s*>", "") -- remove refs text = mw.ustring.gsub(text, "<%s*[Rr][Ee][Ff][^>]-/%s*>", "") -- remove citations end if is(options.fixRefs) then text = fixRefs(text, page, full) end if is(options.linkBold) then linkBold(text, page) end if is(options.noBold) then text = mw.ustring.gsub(text, "'", ) end

return text end

-- Main invocation function for templates local function main(frame) local args = parseArgs(frame) local page = args[1] local ok, text = pcall(get, page, args) if not ok then return getError(text) end return frame:preprocess(text) end

-- Entry points for templates function p.main(frame) return main(frame) end

-- Entry points for other Lua modules function p.get(page, options) return get(page, options) end function p.getText(page) return getText(page) end function p.getFragment(page, fragment) return getFragment(page, section) end function p.getSection(text, section) return getSection(text, section) end function p.getParagraphs(text, flags) return getParagraphs(text, flags) end function p.getTables(text, flags) return getTables(text, flags) end function p.getLists(text, flags) return getLists(text, flags) end function p.getFiles(text, flags) return getFiles(text, flags) end function p.getError(message, value) return getError(message, value) end

return p