Module:Transcluder

local p = {}

-- Helper function to test for truthy and falsy values local function is(value) if not value or value == '' or value == 0 or value == '0' or value == 'false' or value == 'no' then return false end return true end

-- Helper function to match from a list regular expressions -- Like so: match pre..list[1]..post or pre..list[2]..post or ... local function matchAny(text, pre, list, post, init) local match = {} for i = 1, #list do match = { mw.ustring.match(text, pre .. list[i] .. post, init) } if match[1] then return unpack(match) end end return nil end

local function escapeSpecials(str) return mw.ustring.gsub(str, '[%^%$%(%)%.%[%]%*%+%-%?%%]', '%%%0') end

-- Helper function to remove a string from a text local function removeString(text, str) str = escapeSpecials(str) text = mw.ustring.gsub(text, str, '') return text end

-- Helper function to convert a comma-separated list of numbers or min-max ranges into a list of booleans -- @param flags Comma-separated list of numbers or min-max ranges, for example '1,3-5' -- @return Map from integers to booleans, for example {1=true,2=false,3=true,4=true,5=true} -- @return Boolean indicating wether the flags should be treated as a blacklist or not local function parseFlags(value) local flags = {} local blacklist = false

if not value then return nil, false end

if type(value) == 'number' then if value < 0 then value = value * -1 blacklist = true end flags = { [value] = true }

elseif type(value) == 'string' then local ranges = mw.text.split(value, ',') -- split ranges: '1,3-5' to {'1','3-5'} for _, range in pairs(ranges) do			local neg, min, max = mw.ustring.match(range, '^%s*(%-?)%s*(%d+)%s*%-%s*(%d+)%s*$') -- '3-5' to min=3 max=5 if not max then	neg, min, max = mw.ustring.match(range, '^%s*(%-?)%s*((%d+))%s*$') end -- '1' to min=1 max=1 if max then for p = min, max do flags[p] = true end else flags[range] = true end if neg == '-' then blacklist = true end end

elseif type(value) == 'table' then flags = value.flags or value blacklist = value.blacklist or false end

return flags, blacklist end

-- Helper function to convert template arguments into an array of options fit for get local function parseArgs(frame) local args = {} for key, value in pairs(frame:getParent.args) do args[key] = value end for key, value in pairs(frame.args) do args[key] = value end -- args from Lua calls have priority over parent args from template return args end

-- Error handling function -- Throws a Lua error or returns an empty string if error reporting is disabled local function throwError(message, value) message = require('Module:TNT').format('I18n/Excerpt', 'error-' .. message, value) error(message, 2) end

-- Error handling function -- Returns a wiki friendly error or an empty string if error reporting is disabled local function getError(message, value) message = d.errors[message] or message or '' message = mw.ustring.format(message, value) message = d.errors.prefix .. message if mw.title.getCurrentTitle.isContentPage then local errorsCategory = mw.title.new(d.errorsCategory, 'Category') if errorsCategory then message = message ..  .. errorsCategory.prefixedText ..  end end message = mw.html.create('div'):addClass('error'):wikitext(message) return message end

-- Get the page wikitext, following redirects -- Also returns the page name, or the target page name if a redirect was followed, or false if no page was found -- For file pages, returns the content of the file description page local function getText(page) local title = mw.title.new(page) if not title then return false, false end

local target = title.redirectTarget if target then title = target end

return title:getContent, title.prefixedText end

-- Get the requested files out of the given wikitext. -- @param text Required. Wikitext to parse. -- @param flags Range of files to return, for example 2 or '1,3-5'. Omit to return all files. -- @return Sequence of strings containing the wikitext of the requested files. -- @return Original wikitext minus non-requested files. local function getFiles(text, flags) local files = {}

local flags, blacklist = parseFlags(flags)

local fileNamespaces = mw.site.namespaces.File.aliases table.insert(fileNamespaces, mw.site.namespaces.File.name) table.insert(fileNamespaces, mw.site.namespaces.File.canonicalName)

local count = 0 for link in mw.ustring.gmatch(text, '%b[]') do		if matchAny(link, '%[%[%s*', fileNamespaces, '%s*:.*%]%]') then count = count + 1 if not flags or flags[count] then if blacklist then text = removeString(text, link) else table.insert(files, link) end else if blacklist then table.insert(files, link) else text = removeString(text, link) end end end end

return files, text end

-- Get the requested tables out of the given wikitext. -- @param text Required. Wikitext to parse. -- @param flags Range of tables to return, for example 2 or '1,3-5'. Omit to return all tables. -- @return Sequence of strings containing the wikitext of the requested tables. -- @return Original wikitext minus non-requested tables. local function getTables(text, flags) local tables = {}

local flags, blacklist = parseFlags(flags)

local count = 0 for t in mw.ustring.gmatch(text, '%b{}') do		if mw.ustring.sub(t, 1, 2) == '{|' then count = count + 1 if not flags or flags[count] then if blacklist then text = removeString(text, t) else table.insert(tables, t) end else if blacklist then table.insert(tables, t) else text = removeString(text, t) end end end end

return tables, text end

-- Get the requested templates out of the given wikitext. -- @param text Required. Wikitext to parse. -- @param flags Range of templates to return, for example 2 or '1,3-5'. Omit to return all templates. -- @return Sequence of strings containing the wikitext of the requested templates. -- @return Original wikitext minus non-requested templates. local function getTemplates(text, flags) local templates = {}

local flags, blacklist = parseFlags(flags)

local lang = mw.language.getContentLanguage local name local count = 0 for template in mw.ustring.gmatch(text, '{%b{}}') do		if mw.ustring.sub(template, 1, 3) ~= '{{#' then -- skip parser functions like #if name = mw.ustring.match(template, '{{([^}|]+)') -- get the template name count = count + 1 if not flags or flags[count] or flags[lang:ucfirst(name)] or flags[lang:lcfirst(name)] then if blacklist then text = removeString(text, template) else table.insert(templates, template) end else if blacklist then table.insert(templates, template) else text = removeString(text, template) end end end end

return templates, text end

-- Get the requested lists out of the given wikitext. -- @param text Required. Wikitext to parse. -- @param flags Range of lists to return, for example 2 or '1,3-5'. Omit to return all lists. -- @return Sequence of strings containing the wikitext of the requested lists. -- @return Original wikitext minus non-requested lists. local function getLists(text, flags) local lists = {}

local flags, blacklist = parseFlags(flags)

local count = 0 for list in mw.ustring.gmatch('\n' .. text .. '\n\n', '\n([*#].-)\n[^*#]') do		count = count + 1 if not flags or flags[count] then if blacklist then text = removeString(text, list) else table.insert(lists, list) end else if blacklist then table.insert(lists, list) else text = removeString(text, list) end end end

return lists, text end

-- Get the requested paragraphs out of the given wikitext. -- @param text Required. Wikitext to parse. -- @param flags Range of paragraphs to return, for example 2 or '1,3-5'. Omit to return all paragraphs. -- @return Sequence of strings containing the wikitext of the requested paragraphs. -- @return Original wikitext minus non-requested paragraphs. local function getParagraphs(text, flags) local paragraphs = {}

local flags, blacklist = parseFlags(flags)

-- Remove tables and block templates such as infoboxes for template in mw.ustring.gmatch('\n' .. text .. '\n', '\n%b{}\n') do text = mw.ustring.gsub('\n' .. text .. '\n', template, '\n') end

-- Remove non-paragraphs local lists, text = getLists(text, 0) local files, text = getFiles(text, 0)

-- Assume that each remaining line is a paragraph local count = 0 for paragraph in mw.ustring.gmatch(text, '[^\n]+') do		count = count + 1 if not flags or flags[count] then if blacklist then text = removeString(text, paragraph) else table.insert(paragraphs, paragraph) end else if blacklist then table.insert(paragraphs, paragraph) else text = removeString(text, paragraph) end end end

return paragraphs, text end

-- Get the requested categories out of the given wikitext. -- @param text Required. Wikitext to parse. -- @param flags Range of categories to return, for example 2 or '1,3-5'. Omit to return all categories. -- @return Sequence of strings containing the wikitext of the requested categories. -- @return Original wikitext minus non-requested categories. local function getCategories(text, flags) local categories = {}

local flags, blacklist = parseFlags(flags)

local categoryNamespaces = mw.site.namespaces.Category.aliases table.insert(categoryNamespaces, mw.site.namespaces.Category.name) table.insert(categoryNamespaces, mw.site.namespaces.Category.canonicalName)

local count = 0 for category in mw.ustring.gmatch(text, '%b[]') do		if matchAny(category, '%[%[%s*', categoryNamespaces, '%s*:.*%]%]') then count = count + 1 if not flags or flags[count] then if blacklist then text = removeString(text, category) else table.insert(categories, category) end else if blacklist then table.insert(categories, category) else text = removeString(text, category) end end end end

return categories, text end

-- Get the requested references out of the given wikitext. -- @param text Required. Wikitext to parse. -- @param flags Range of references to return, for example 2 or '1,3-5'. Omit to return all references. -- @return Sequence of strings containing the wikitext of the requested references. -- @return Original wikitext minus non-requested references. local function getReferences(text, flags) local references = {}

local flags, blacklist = parseFlags(flags)

local count = 0 for reference in mw.ustring.gmatch(text, '<%s*[Rr][Ee][Ff].->.-<%s*/%s*[Rr][Ee][Ff]%s*>') do		count = count + 1 if not flags or flags[count] then if blacklist then text = removeString(text, reference) else table.insert(references, reference) end else text = removeString(text, reference) local name = mw.ustring.match(text, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?([^"\'>/]+)["\']?[^>]*%s*>')			if name then				for citation in mw.ustring.gmatch(text, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?[^"\'>/]+["\']?[^/>]*/%s*>') do					if blacklist then table.insert(references, reference) else text = removeString(text, reference) end end end end end

return references, text end

-- Get the lead section out of the given wikitext. -- @param text Required. Wikitext to parse. -- @return Wikitext of the lead section. local function getLead(text) text = mw.ustring.gsub(text, '\n==.*', '') text = mw.text.trim(text) if not text then return throwError('lead-empty') end return text end

-- Get the requested section out of the given wikitext (including subsections). -- @param text Required. Wikitext to parse. -- @param section Required. Title of the section to get (in wikitext), for example 'History' or 'History of Athens'. -- @return Wikitext of the requested section. local function getSection(text, section) text = '\n' .. text .. '\n' -- add newlines to match sections at the start and end of text local escapedSection = mw.ustring.gsub(mw.uri.decode(section), '([%^%$%(%)%%%.%[%]%*%+%-%?])', '%%%1') local level, text = mw.ustring.match(text, '\n(==+)%s*' .. escapedSection .. '%s*==.-\n(.*)') if not text then return throwError('section-not-found', section) end local nextSection = '\n==' .. mw.ustring.rep('=?', #level - 2) .. '[^=].*'	text = mw.ustring.gsub(text, nextSection, '') -- remove later sections with headings at this level or higher text = mw.text.trim(text) if text == '' then return throwError('section-empty', section) end return text end

-- Get the wikitext between  and  tags. -- @param text Required. Wikitext to parse. -- @param fragment Required. Name of the fragment to get. -- @return Wikitext of the requested fragment. -- @todo Implement custom parsing of fragments rather than relying on #lst. local function getFragment(text, fragment) local escapedFragment = mw.ustring.gsub(fragment, '[%^%$%(%)%%%.%[%]%*%+%-%?]', '%%%0') local found local fragments = '' for f in mw.ustring.gmatch(text, '<%s*[Ss]ection%s+begin%s*=%s*[\"\']?%s*' .. escapedFragment .. '%s*[\"\']?%s*/>(.-)<%s*[Ss]ection%s+end=%s*[\"\']?%s*' .. escapedFragment .. '%s*[\"\']?%s*/>') do		found = true fragments = fragments .. f	end if not found then return throwError('fragment-not-found', fragment) end fragments = mw.text.trim(fragments) if fragments == '' then return throwError('fragment-empty', fragment) end return fragments end

-- Remove elements that most certainly we don't want transcluded. -- @param text Required. Wikitext to clean. -- @return Clean wikitext. local function cleanText(text)

-- Remove HTML comments text = mw.ustring.gsub(text, '<!%-%-.-%-%->','')

-- Remove tags text = mw.ustring.gsub(text, '.-', '') -- remove noinclude bits

-- Keep tags if mw.ustring.find(text, '[Oo][Nn][Ll][Yy][Ii][Nn][Cc][Ll][Uu][Dd][Ee]') then -- avoid expensive search if possible text = mw.ustring.gsub(text, '.-', '') -- remove text between onlyinclude sections text = mw.ustring.gsub(text, '^.-', '') -- remove text before first onlyinclude section text = mw.ustring.gsub(text, '.*', '') -- remove text after last onlyinclude section end

return text end

-- Replace the first call to each reference defined outside of the text for the full reference, to prevent undefined references -- Then prefix the page title to the reference names to prevent conflicts -- that is, replace  for  -- and also for -- also remove reference groups:  for  -- and  for -- @todo The current regex may fail in cases with both kinds of quotes, like  local function fixReferences(text, page, full) if not full then full = getText(page) end local refNames = {} local refName local refBody local position = 1 while position < mw.ustring.len(text) do		refName, position = mw.ustring.match(text, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?([^"\'>]+)["\']?[^>]*/%s*>', position)		if refName then			refName = mw.text.trim(refName)			if not refNames[refName] then -- make sure we process each ref name only once				table.insert(refNames, refName)				refName = mw.ustring.gsub(refName, '[%^%$%(%)%.%[%]%*%+%-%?%%]', '%%%0') -- escape special characters				refBody = mw.ustring.match(text, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?%s*' .. refName .. '%s*["\']?[^>/]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>')				if not refBody then -- the ref body is not in the excerpt					refBody = mw.ustring.match(full, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?%s*' .. refName .. '%s*["\']?[^/>]*>.-<%s*/%s*[Rr][Ee][Ff]%s*>')					if refBody then -- the ref body was found elsewhere						text = mw.ustring.gsub(text, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?%s*' .. refName .. '%s*["\']?[^>]*/?%s*>', refBody, 1)					end				end			end		else			position = mw.ustring.len(text)		end	end	text = mw.ustring.gsub(text, '<%s*[Rr][Ee][Ff][^>]*name%s*=%s*["\']?([^"\'>/]+)["\']?[^>/]*(/?)%s*>', '')	text = mw.ustring.gsub(text, '<%s*[Rr][Ee][Ff][^>]*group%s*=%s*["\']?[^"\'>/]+["\']%s*>', ' ')	return text end

-- Replace the bold title or synonym near the start of the page by a link to the page function linkBold(text, page) local lang = mw.language.getContentLanguage local position = mw.ustring.find(text, "" .. lang:ucfirst(page) .. "", 1, true) -- look for "Foo is..." (uc) or "A foo is..." (lc) or mw.ustring.find(text, "" .. lang:lcfirst(page) .. "", 1, true) -- plain search: special characters in page represent themselves if position then local length = mw.ustring.len(page) text = mw.ustring.sub(text, 1, position + 2) .. "" .. mw.ustring.sub(text, position + 3, position + length + 2) .. "" .. mw.ustring.sub(text, position + length + 3, -1) -- link it else -- look for anything unlinked in bold, assumed to be a synonym of the title (e.g. a person's birth name) text = mw.ustring.gsub(text, "(.-'*)", function(a, b)			if not mw.ustring.find(b, "%[") then -- if not wikilinked				return "" .. b .. "" -- replace Foo by Foo			else				return nil -- instruct gsub to make no change			end		 end, 1) -- "end" here terminates the anonymous replacement function(a, b) passed to gsub end return text end

-- Remove any self links function removeSelfLinks( text ) local lang = mw.language.getContentLanguage local page = escapeSpecials( mw.title.getCurrentTitle.prefixedText ) text = mw.ustring.gsub( text, '%[%[(' .. lang:ucfirst( page ) .. ')%]%]', '%1' )	text = mw.ustring.gsub( text, '%[%[(' .. lang:lcfirst( page ) .. ')%]%]', '%1' )	text = mw.ustring.gsub( text, '%[%[' .. lang:ucfirst( page ) .. '|([^]]+)%]%]', '%1' ) text = mw.ustring.gsub( text, '%[%[' .. lang:lcfirst( page ) .. '|([^]]+)%]%]', '%1' ) return text end

-- Main function for modules local function get(page, options) if not page or page == '' then return throwError('no-page') end local page, hash, section = mw.ustring.match(page, '([^#]+)(#?)([^#]*)') local text, page = getText(page) if not page then return throwError('no-page') end if not text then return throwError('page-not-found', page) end local full = text -- save the full text for fixReferences below

if not options then options = {} end

-- Get the requested lead section or section if is(options.fragment) then text = getFragment(text, options.fragment) elseif is(section) then text = getSection(text, section) if not is(options.subsections) then text = getLead(text) end elseif is(hash) then text = getLead(text) end

-- Keep only the requested elements local elements if options.lists or options.only == 'lists' then elements, text = getLists(text, options.lists) end if options.files or options.only == 'files' then elements, text = getFiles(text, options.files) end if options.tables or options.only == 'tables' then elements, text = getTables(text, options.tables) end if options.templates or options.only == 'templates' then elements, text = getTemplates(text, options.templates) end if options.paragraphs or options.only == 'paragraphs' then elements, text = getParagraphs(text, options.paragraphs) end if options.categories or options.only == 'categories' then elements, text = getCategories(text, options.categories) end if options.references or options.only == 'references' then elements, text = getReferences(text, options.references) end if options.only then text = table.concat(elements, '\n\n') end

-- Misc options if is(options.fixReferences) then text = fixReferences(text, page, full) end if is(options.linkBold) then text = linkBold(text, page) end if is(options.noSelfLinks) then text = removeSelfLinks(text) end if is(options.noBold) then text = mw.ustring.gsub(text, "'", ) end if is(options.noBehaviorSwitches) then text = mw.ustring.gsub(text, '__[A-Z]+__', '') end

-- Remove multiple newlines left over from removing elements text = mw.ustring.gsub(text, '\n\n\n+', '\n\n') text = mw.text.trim(text)

return text end

-- Main invocation function for templates local function main(frame) local args = parseArgs(frame) local page = args[1] local ok, text = pcall(get, page, args) if not ok then return getError(text) end return frame:preprocess(text) end

-- Entry points for templates function p.main(frame) return main(frame) end

-- Entry points for other Lua modules function p.get(page, options) return get(page, options) end function p.getText(page) return getText(page) end function p.getLead(text) return getLead(text) end function p.getSection(text, section) return getSection(text, section) end function p.getFragment(text, fragment) return getFragment(text, fragment) end function p.getParagraphs(text, flags) return getParagraphs(text, flags) end function p.getCategories(text, flags) return getCategories(text, flags) end function p.getReferences(text, flags) return getReferences(text, flags) end function p.getTemplates(text, flags) return getTemplates(text, flags) end function p.getTables(text, flags) return getTables(text, flags) end function p.getLists(text, flags) return getLists(text, flags) end function p.getFiles(text, flags) return getFiles(text, flags) end function p.getError(message, value) return getError(message, value) end

return p