Markup spec/EBNF

The following is a first draft trying to describe the Wikitext-Syntax with Extended Backus–Naur Form (EBNF).

Features of current parsing that must be incorporated (list is likely incomplete):


 * 1) Redirects
 * 2) Headers
 * 3) Paragraphs
 * 4) SGML (HTML, nowiki, math, plugins)
 * 5) Lists (unordered, ordered, definition, pseudo-definition)
 * 6) Initial spaces
 * 7) Bold/italics
 * 8) Templates (int:, msg:, msgnw:, raw:?)
 * 9) Template parameters (both on the sending and receiving sides)
 * 10) Horizontal rules
 * 11) Magic words
 * 12) Wikilinks (category, image, Media:)
 * 13) External links
 * 14) Plain URLs (e.g., http://blah)
 * 15) Tables
 * 16) Character entities (e.g., &amp;amp;, &amp;#1234;, &amp;#x1234;)
 * 17) Behavior switches
 * 18) Date reorganization based on user prefs
 * 19) ISBNs

General
digit        = ("1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"|"0"); URL          = { ASCII letter }, "://", { URL char }; ASCII letter = ("a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m"                | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z"                | "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M"                | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z"); URL char     = (ASCII letter | digit | "-" | "_" | "." | "~" | "!" | "*" | "'" | "(" | ")" | ";"                | ":" | "@" | "&" | "=" | "+" | "$" | "," | "/" | "?" | "%" | "#" | "[" | "]"); Unicode char = (* Assume this is all valid Unicode characters. *); text         = { Unicode char };

full pagename = [ namespace, ":" | ":" ] pagename; namespace    = Unicode char, { Unicode char }; pagename     = Unicode char, { Unicode char };

Links
start link   = ""; end link      = ""; internal link = start link, full pagename, ["|", label], end link, label extension; external link = URL | (start link, URL, [whitespace Label], endLink, label extension); redirect     = "#REDIRECT", internal link; header link  = "/*", text, "*/"; ISBN link    = digit, ["-"|" "], 3 * digit, ["-"|" "], 5 * digit, [("-"|" "),(digit|"X"|"x")];

Headers
header end = [whitespace], line break; header6    = line break, "======", [whitespace], text, [whitespace], "======", header end; header5    = line break, "=====",  [whitespace], text, [whitespace], "=====",  header end; header4    = line break, "====",   [whitespace], text, [whitespace], "====",   header end; header3    = line break, "===",    [whitespace], text, [whitespace], "===",    header end; header2    = line break, "==",     [whitespace], text, [whitespace], "==",     header end; header1    = line break, "=",      [whitespace], text, [whitespace], "=",      header end; comment    = ""; Commentary = "", [Text], " "; (* This works? *)

Formatting
horizontal rule = "", {"-"}; bold italic text = "", text, ""; bold text       = "", text, ""; italic text     = "", text, ""; code line       = linebreak, " ", text; nowiki          = "&lt;nowiki&gt;", text, "&lt;/nowiki&gt;";

Lists
unordered list          = "*", text; continue unordered list = (unordered list|continue unordered list|":"|"*"|"#"), linebreak, unordered list; ordered list            = "#", text; continue ordered list   = (ordered list|continue ordered list|":"|"*"|"#"), linebreak, ordered list; definition list         = [text], ":", text; continue definition list = (definition list|continue definition list|":"|"*"|"#"), linebreak, definition list;

Signature
user signature          = ""; user signature with date = "~"; current date            = "";

Includes
include = ( template | tplarg ) ; template = "" ; tplarg  = "" ; part    = [ name, "=" ], value ; title   = balanced text ; name    = balanced text ; value   = balanced text ; balanced text = text without consecutive equal braces, { include, text without consecutive equal braces } ;

Behavior switches
place TOC           = {whitespace|linebreak}, "",           {whitespace|linebreak}; force TOC           = {whitespace|linebreak}, "",      {whitespace|linebreak}; disable TOC         = {whitespace|linebreak}, "",         {whitespace|linebreak}; disable section edit = {whitespace|linebreak}, "", {whitespace|linebreak};

Tables
table start      = "{|", {style|whitespace}, linebreak; table end        = "|}"; table header     = "|+", text, linebreak; (* What is this?  This exists? *) table header cell = (linebreak, "!", ({style|whitespace}- "|"), text) | (tablecell, ("!!" | "||"), ({style|whitespace}- "|"), text); table cell       = (linebreak, "|", ({style|whitespace}- "|"), text) | (table cell, "||", ({Style|WhiteSpace}- "|"), text); table row        = linebreak, "|-", {"-"}, {style|whitespace}, linebreak;

table body       = ( table header cell | table cell ), { table row, ( table header cell | table cell ) }; table            = table start, [table row], table body, table end;