User:Stevage/ANTLR
grammar headerline10;
/* Bugs:
at start of article
*/
options {output=AST;}
tokens {
ARTICLE;
START;
INTERNAL_LINK;
ENTITY;
RD;
H1;
H2;
H3;
H4;
H5;
H6;
HR;
P;
IMG;
NBSP;
PROTOCOL;
TEXT; // all subnodes should be concatenated
BALD_URL;
EXTERNAL_LINK;
IMG;
IMG_OPTION;
IMG_OPTION_THUMBNAIL;
ITALICS;
BOLD;
B_ON;
B_OFF;
BOLD_ITALICS;
I_ON;
I_OFF;
BR;
UL;
OL;
DL;
LI;
DD;
DT;
ISBN;
RFC;
PMID;
PRE;
NBSP160; // a real nbsp, like
}
@members {
String _mw_image_namespace = "image";
boolean in_external_link=false;
boolean in_header=false;
boolean in_internal_link_caption=false;
boolean in_defined_term=false;
boolean text_bold=false;
boolean text_italics=false;
boolean literal_whitespace=false;
int caption_levels = 0;
int text_levels =0;
boolean textis(String mw) {
return input.LT(1).getText().equalsIgnoreCase(mw);
}
}
@lexer::members {
boolean in_nowiki = false;
boolean in_noparse = false;
boolean in_pre = false;
boolean in_html = false;
boolean in_listprefix = false;
}
start : (redirect | article) -> ^(START redirect? article?);
//////////////////////////////////////////////////////////////////////
redirect: REDIRECT SPACE+ internal_link (ws? ((article)=>article)?)
-> ^(RD internal_link article?);
REDIRECT: {this.getCharPositionInLine()==0 && this.getLine()==1}? => '#REDIRECT';
//////////////////////////////////////////////////////////////////////
article: (N*) (line N paragraph_separator)*
-> ^(ARTICLE (line paragraph_separator)* );
paragraph_separator: pn*;
pn:
N close_bold_italics -> close_bold_italics BR ;
close_bold_italics
@after {text_bold=false; text_italics = false;}
: /*
{text_bold==true && text_italics==true}? => -> B_OFF I_OFF
|{text_bold==false && text_italics==true}? => -> I_OFF
|{text_bold==true && text_italics==false}? => -> B_OFF
*/
;
line:/* (simple_text) => paragraph^
| */
(headerline) => headerline^
| (listmarker) => listline^
| (hrline) => hrline^
| (spaceline) => spaceline^
| paragraph^ ;
////////////////////////// Lists ////////////////////////////////
listline:
bullet_list_item
|ordered_list_item
|indent_list_item
|definition_item
;
listprefix: (listmarker)+;
bullet_list_item:
ASTERISK (
(listmarker) => listline -> ^(UL listline)
| inline_text -> ^(UL inline_text)
| -> ^(UL) );
ordered_list_item:
HASH (
(listmarker) => listline -> ^(OL listline)
| inline_text -> ^(OL inline_text)
| -> ^(OL) );
indent_list_item:
COLON (
(listmarker) => listline -> ^(DD listline)
| ws? inline_text -> ^(DD inline_text)
| -> ^(DD) );
definition_item
@init {in_defined_term = true;}:
SEMICOLON ws? (
term=inline_text {in_defined_term=false;} ( (COLON) => COLON ws? def=inline_text -> ^(DT $term ^(DD $def))
| (N COLON) => N indent_list_item -> ^(DT $term indent_list_item)
| -> ^(DT $term) )
| -> ^(DT) );
finally {in_defined_term = false;}
listmarker: HASH | ASTERISK | COLON | SEMICOLON;
/////////////////////////// Space blocks ///////////////////
spaceline
@init {literal_whitespace = true;}
:
SPACE printing_ws? inline_text? -> ^(PRE printing_ws? inline_text);
finally {literal_whitespace = false;}
////////////////////////// Headers /////////////////////////////////
headerline:/* {this.in_header = true;}*/
( (header6) => header6^
| (header5) => header5^
| (header4) => header4^
| (header3) => header3^
| (header2) => header2^
| (header1) => header1^)
/* {this.in_header = false;} */
;
header6: EQUALS EQUALS EQUALS EQUALS EQUALS EQUALS a+=EQUALS* header_simple_text b+=EQUALS* EQUALS EQUALS EQUALS EQUALS EQUALS EQUALS
-> ^(H6 $a* header_simple_text $b*);
header5: EQUALS EQUALS EQUALS EQUALS EQUALS a+=EQUALS* header_simple_text b+=EQUALS* EQUALS EQUALS EQUALS EQUALS EQUALS
-> ^(H5 $a* header_simple_text $b*);
header4: EQUALS EQUALS EQUALS EQUALS a+=EQUALS* header_simple_text b+=EQUALS* EQUALS EQUALS EQUALS EQUALS
-> ^(H4 $a* header_simple_text $b*);
header3: EQUALS EQUALS EQUALS a+=EQUALS* header_simple_text b+=EQUALS* EQUALS EQUALS EQUALS
-> ^(H3 $a* header_simple_text $b*);
header2: EQUALS EQUALS a+=EQUALS* header_simple_text b+=EQUALS* EQUALS EQUALS
-> ^(H2 $a* header_simple_text $b*);
header1: EQUALS a+=EQUALS* header_simple_text b+=EQUALS* EQUALS
-> ^(H1 $a* header_simple_text $b*);
hrline: HYPHEN HYPHEN HYPHEN HYPHEN HYPHEN*
-> ^(HR);
///////////////////////////////////////////////////////////////////////
internal_link: link_start pagename (PIPE link_caption)? link_end ((letters)=>link_trail)?
-> ^(INTERNAL_LINK pagename ^(TEXT link_caption? link_trail?));
link_caption
@init {this.caption_levels++; in_internal_link_caption = true;}:
simple_text;
finally {this.caption_levels--; in_internal_link_caption = false;}
link_trail: letters;
////////////////////////////////////////////////////////////////////////
////////////////////////// Images //////////////////////////////////////////
imageinline: (link_start image_namespace) =>
link_start image_namespace COLON ws? imagename ( PIPE optionorcaption )* link_end
-> ^(IMG ^(TEXT image_namespace COLON imagename) optionorcaption*);
imagename: pagename ws? DOT ws? imageextension;
/* Future passes/actions etc can readily retrieve the extension text, so just validate for now? */
imageextension:
{textis("jpeg")
| textis("jpg")
| textis("png")
| textis("svg")
| textis("gif")
| textis("bmp")}? letters;
optionorcaption
: (imagemodeautothumb (PIPE | link_end)) => imagemodeautothumb /* move it up here as it's so common */
| (SPACE | punctuation) => image_caption
| (imageoption (PIPE | link_end)) => imageoption
| image_caption;
image_caption
@init {this.caption_levels++;}
: inline_text?
-> ^(TEXT inline_text);
finally {this.caption_levels-- ;}
imageoption:
imagemodemanualthumb
| imagemodeautothumb
| imagemodeframe
| imagemodeframeless
//| imagemodepage /* something weird about this one but I don't know what. */
| imagemodeupright
| imagemodeborder
| imagesizeparameter
| imagealignleft
| imagealigncenter
| imagealignright
| imagealignnone
| imagevalignbaseline
| imagevalignsub
| imagevalignsuper
| imagevaligntop
| imagevaligntexttop
| imagevalignmiddle
| imagevalignbottom
| imagevaligntextbottom;
imagemodemanualthumb: mw_img_manualthumb;
imagemodeautothumb: mw_img_thumbnail;
imagemodeframe: mw_img_frame;
imagemodeframeless: mw_img_frameless;
imagemodepage: mw_img_page;
imagemodeupright: mw_img_upright;
imagemodeborder: mw_img_border;
imagesizeparameter: positive_int mw_img_width;
imagealignleft: mw_img_left ;
imagealigncenter: mw_img_center ;
imagealignright: mw_img_right ;
imagealignnone: mw_img_none;
imagevalignbaseline: mw_img_baseline ;
imagevalignsub: mw_img_sub;
imagevalignsuper: mw_img_super;
imagevaligntop: mw_img_top;
imagevaligntexttop: mw_img_text_top;
imagevalignmiddle: mw_img_middle;
imagevalignbottom: mw_img_bottom;
imagevaligntextbottom: mw_img_text_bottom;
/* default settings: */
/* Hmm, user-definable grammar seems to be a bad idea. Assume that the img_manualthumb is always something followed by the name. */
mw_img_manualthumb : {textis("thumbnail") | textis("thumb")}? mwletters EQUALS imagename -> ^(IMG_OPTION_THUMBNAIL imagename);
mw_img_thumbnail : {textis("thumbnail") | textis("thumb")}? mwletters -> ^(IMG_OPTION_THUMBNAIL);
mw_img_frame : {textis("framed") | textis("enframed") | textis("frame")}? mwletters; //'framed' | 'enframed' | 'frame';
mw_img_frameless : {textis("frameless")}? mwletters;
mw_img_page : {textis("page")}? mwletters (SPACE | EQUALS) mwletters; //'page=$1' | 'page $1' ; /*??? (where is this used?);*/
mw_img_upright : {textis("upright")}? mwletters EQUALS? positive_int?; //'upright' ( '='? POSITIVE_INT)?;
mw_img_border : {textis("border")}? mwletters;
mw_img_width : {textis("px")}? mwletters;
mw_img_baseline : {textis("baseline")}? mwletters;
mw_img_sub : {textis("sub")}? mwletters;
mw_img_super : {textis("super") | textis("sup")}? mwletters;
mw_img_top : {textis("top")}? mwletters;
mw_img_text_top : {textis("text-top")}? mwletters;
mw_img_middle : {textis("middle")}? mwletters;
mw_img_bottom : {textis("bottom")}? mwletters;
mw_img_text_bottom : {textis("text-bottom")}? mwletters;
mw_img_left : {textis("left")}? mwletters;
mw_img_center : {textis("center") | textis("centre")}? mwletters;
mw_img_right : {textis("right")}? mwletters;
mw_img_none : {textis("none")}? mwletters;
image_namespace : {textis(_mw_image_namespace)}? mwletters;
///////////////////////////// external links ///////////////////////////////
external_link:
bald_url -> ^(EXTERNAL_LINK ^(TEXT bald_url) ^(TEXT bald_url)) //attempt to use url as caption
| explicit_url -> ^(EXTERNAL_LINK explicit_url);
bald_url:
protocol COLON SLASH SLASH letters DOT letters // http://foo.com (minimum)
((DOT letters)=>DOT letters)* // .lom.wom ...
((SLASH letters)=>SLASH letters)* // /thing/other/docs ...
(SLASH)=>SLASH? // /
;
explicit_url: LEFT_BRACKET bald_url (ws external_link_caption)? RIGHT_BRACKET -> ^(TEXT bald_url) ^(TEXT external_link_caption?);
protocol: {textis("ftp") | textis("http")}? letters;
external_link_caption
@init {this.in_external_link=true;}
:
simple_text;
finally {this.in_external_link=false;}
//////////////////////////// magic links ////////////////////////////////
magic_link: isbn_link | pmid_link | rfc_link;
accidental_magic_link: isbn_accidental | pmid_accidental | rfc_accidental;
isbn_link: ISBN_LINK -> ^(ISBN ISBN_LINK);
isbn_accidental: ISBN_LINK -> ^(TEXT ISBN_LINK); // the TEXT node is possibly superfluous?
rfc_link: RFC_LINK -> ^(RFC RFC_LINK);
rfc_accidental: RFC_LINK -> ^(TEXT RFC_LINK);
pmid_link: PMID_LINK -> ^(PMID PMID_LINK);
pmid_accidental: PMID_LINK -> ^(TEXT PMID_LINK);
////////////////////////////////////////////////////////////////////////
paragraph: ws? /* !!!! */ inline_text -> ^(P inline_text);
inline_text
@init { text_levels++; }
:
(
/*(complex_inline_elem | simple_inline_elem) =>*/
/* (complex_inline_elem
|simple_inline_elem
)
ws?*/
// [[http://foo.com]] has to be treated as: [, [http;//foo.com], ]
((LEFT_BRACKET LEFT_BRACKET LEFT_BRACKET) => literal_left_bracket // try and save it some time on [[[foo]]]?
|(literal_left_bracket explicit_url) => literal_left_bracket
|/*(imageinline) =>*/ imageinline
|(external_link) => external_link
|(internal_link) => internal_link
|(magic_link) => magic_link
|pre_block
|(simple_inline_elem) =>simple_inline_elem
)
((nbsp_before_punctuation) => nbsp_before_punctuation)*
((ws) =>printing_ws)?
)+;
finally { text_levels --;}
simple_text
@init { text_levels++; } :
(
(simple_inline_elem) => simple_inline_elem
// (nbsp_before_punctuation)?
((printing_ws) => printing_ws)?
//ws?
)+;
finally { text_levels --; }
simple_inline_elem:
(
(accidental_magic_link) => accidental_magic_link
| punctuation_before_nbsp
|(APOSTROPHES) => bold_and_italics
| ((nbsp_before_punctuation) => nbsp_before_punctuation)+
| really_basic_elem
);
pre_block: PRE_OPEN pre_block_body PRE_CLOSE -> ^(PRE pre_block_body);
pre_block_body: (pre_ws* really_basic_elem*)+;
really_basic_elem:
(html_dangerous
|punctuation /* if punctuation+, risk of swallowing too many characters: [[[foo.jpg]]] needs to swallow just one */
|letters
|digits
);
/*textline: simple_text -> ^(P simple_text);*/
///////////////////////////////////////////////////////////////////////////
bold_and_italics:
{textis("''") && text_italics}? => APOSTROPHES {text_italics=false;} -> ^(I_OFF)
|{textis("''") && !text_italics}? => APOSTROPHES {text_italics=true;} -> ^(I_ON)
|{textis("'''") && text_bold}? => APOSTROPHES {text_bold=false;} -> ^(B_OFF)
|{textis("'''") && !text_bold}? => APOSTROPHES {text_bold=true;} -> ^(B_ON)
|{textis("''''") && text_bold}? => APOSTROPHES {text_bold=false;} -> APOSTROPHE ^(B_OFF)
|{textis("''''") && !text_bold}? => APOSTROPHES {text_bold=true;} -> APOSTROPHE ^(B_ON)
|{textis("'''''") && text_bold && text_italics}? => APOSTROPHES {text_bold=false; text_italics=false; } -> ^(B_OFF) ^(I_OFF)
|{textis("'''''") && text_bold && !text_italics}? => APOSTROPHES {text_bold=false; text_italics=true; } -> ^(B_OFF) ^(I_ON)
|{textis("'''''") && !text_bold && text_italics}? => APOSTROPHES {text_bold=true; text_italics=false; } -> ^(B_ON) ^(I_OFF)
|{textis("'''''") && !text_bold && !text_italics}? =>APOSTROPHES {text_bold=true; text_italics=true; } -> ^(B_ON) ^(I_ON)
// Hopefully we never get more than 6 or less than 2. The lexer should take care of that.
;
////////////////////////Nbsp punctuation/////////////////////////////////
nbsp_before_punctuation:
SPACE ('»' -> NBSP160 '»'
| QUESTION -> NBSP160 QUESTION
| COLON -> NBSP160 COLON
| SEMICOLON -> NBSP160 SEMICOLON
| '!' -> NBSP160 '!'
| '%' -> NBSP160 '%'
) ;
// SPACE x=('»' | QUESTION | COLON | SEMICOLON | '!' | '%') -> NBSP160 $x;
/*{input.LA(2) == '?'}? => */
// (SPACE ('»' | '?' | COLON | SEMICOLON | '!' | '%')) => SPACE -> NBSP160;
punctuation_before_nbsp:
'«' SPACE -> '«' NBSP160;
//«»
//////// //////////////////////////////////////////////////////////////////
pagename:
pagename_elem
((pagename_elem) => pagename_elem
|(SPACE) => SPACE
)*;
pagename_elem: (letters | accidental_magic_link/* | DIGITS | DOT | UNDERSCORE | HYPHEN | OPEN_PAREN | CLOSE_PAREN*/);
/////////////////////////////////// Very basic types ///////////////////////////////////////
/* Currently doesn't support equals during a header title...*/
header_simple_text
@init {this.in_header=true;}:
inline_text; /* Pretty much everything seems to be tolerated in headings. (!) */
finally {this.in_header=false;}
// any need for accidental_magic_link?
mwletters: letters (letters | HYPHEN | UNDERSCORE | (digits)=>positive_int)*;
/////////////////////////////////// Semi-literals, literal sets etc ///////////////////////////
punctuation :
DOT |digits|COMMA|OPEN_PAREN | CLOSE_PAREN | HASH | HYPHEN | ASTERISK
| UNDERSCORE | SLASH | SEMICOLON | APOSTROPHE | QUESTION |
literal_left_bracket | literal_right_bracket | literal_pipe | literal_equals | literal_colon |
'!' | '@' | '$' | '%' | '^' | '`' | '~' | '\\' | '«' | '»';
// |/*LINK_START |*/ /*LINK_END | */UNKNOWN ;
html_dangerous:
LT -> ^(ENTITY LT)
| GT -> ^(ENTITY GT)
| AMP -> ^(ENTITY AMP);
letters: (LETTERS);
positive_int: digits; /* needs to be refined to remove 0s at start */
literal_link_end: {caption_levels==0}? => link_end;
// Strangely enough, a literal pipe has to be allowed in an internal link caption: [[foo|bar|wa]]
// It would be good if this behaviour were proscribed to allow for future options
literal_pipe: {caption_levels==0 || in_internal_link_caption}? => PIPE;
/* Three ways of getting a literal right bracket:
1) You're neither in an external nor internal link: foo]
2) You're in an internal link, and not followed by another right bracket: [[Boop|here] see?]]
3) You're in a nowiki block: [http://square.bracket.com The <nowiki>] foundation.]
*/
literal_right_bracket: {!in_external_link && (caption_levels == 0 || input.LA(2)!= RIGHT_BRACKET)}? =>
RIGHT_BRACKET
| NOWIKI_RIGHT_BRACKET;
/* {!in_external_link}? => RIGHT_BRACKET
| NOWIKI_RIGHT_BRACKET;*/
literal_left_bracket: LEFT_BRACKET | NOWIKI_LEFT_BRACKET; /* Dodgy - doesn't really know whether it's a literal left bracket or not */
literal_colon: {!in_defined_term || text_levels > 1}? => COLON /* ;foo:blah is special. ;foo[blah|bl:ah] is not special. TODO: make sure this doesn't break namespaces in defs */
| NOWIKI_COLON;
// Only supports a single =. So no ==foo==blah==.
literal_equals:
{!in_header || input.LA(2) != EQUALS}? => EQUALS
;
link_start: LEFT_BRACKET LEFT_BRACKET;
link_end: RIGHT_BRACKET RIGHT_BRACKET;
// TODO: apparently image captions always treat spaces literally...
printing_ws:
{literal_whitespace && text_levels <= 1}? => (pre_ws) => pre_ws
| ws -> NBSP;
digits: digit+;
pre_ws: pre_ws_elem+;
pre_ws_elem:
SPACE -> SPACE
| NOWIKI ->
| NOWIKI_OFF ->;
ws: (SPACE
| NOWIKI!
| NOWIKI_OFF!)+ ;
//-----------------------------------------------------
/* ISBN magic links. Care will be needed to make sure they're treated as literals wherever they aren't supported. */
// Broken example: [http://ISBN 1234567890] - current parser does correctly. But does it matter?
ISBN_LINK: {!this.in_noparse}? =>
// Parser.php l081, ~DIGIT is actually regexp \b
((ISBN_LINK_ACTUAL ~DIGIT) => ISBN_LINK_ACTUAL
| LETTER { $type=LETTERS; }
);
fragment
ISBN_LINK_ACTUAL:
'ISBN'
' '+
('97' ('8' | '9'))?
((' ' | '-')? '0'..'9')
((' ' | '-')? '0'..'9')
((' ' | '-')? '0'..'9')
((' ' | '-')? '0'..'9')
((' ' | '-')? '0'..'9')
((' ' | '-')? '0'..'9')
((' ' | '-')? '0'..'9')
((' ' | '-')? '0'..'9')
((' ' | '-')? '0'..'9')
((' ' | '-')? ('0'..'9' | 'X' | 'x'));
RFC_LINK: {!this.in_noparse}? =>
((RFC_LINK_ACTUAL) => RFC_LINK_ACTUAL
| LETTER { $type=LETTERS; }
);
fragment
RFC_LINK_ACTUAL:
'RFC'
' '+
('0'..'9')+;
PMID_LINK : {!this.in_noparse}? =>
((PMID_LINK_ACTUAL) => PMID_LINK_ACTUAL
| LETTER { $type=LETTERS; }
);
fragment
PMID_LINK_ACTUAL:
'PMID'
' '+
('0'..'9')+;
///////////// /
NOWIKI: {!this.in_noparse}? =>
((NOWIKI_ACTUAL) => NOWIKI_ACTUAL { this.in_nowiki=true; this.in_noparse=true;}
| '<' { $type=LT; }
);
fragment
NOWIKI_ACTUAL: '' ;
NOWIKI_OFF: {this.in_nowiki}? =>
((NOWIKI_OFF_ACTUAL) => NOWIKI_OFF_ACTUAL { this.in_nowiki=false; }
| '<' { $type=LT; }
);
fragment
NOWIKI_OFF_ACTUAL: '' ;
/////////// <pre> /
PRE_OPEN: {!this.in_noparse}? =>
((PRE_OPEN_ACTUAL) => PRE_OPEN_ACTUAL { this.in_pre=true; this.in_noparse=true;}
| '<' { $type=LT; }
);
fragment
PRE_OPEN_ACTUAL: '
' ;
PRE_CLOSE: {this.in_pre}? =>
((PRE_CLOSE_ACTUAL) => PRE_CLOSE_ACTUAL { this.in_pre=false; this.in_noparse=false; }
| '<' { $type=LT; }
);
fragment
PRE_CLOSE_ACTUAL: '
' ;
LT: '<'; GT: '>'; AMP: '&';
//NOWIKI : {!this.in_nowiki}? => '<' 'nowiki' >'{ this.in_nowiki=true;} /*->*/ ; /* doesn't render as anything in particular */ //NOWIKI_OFF: {this.in_nowiki}? => '</nowiki>'{ this.in_nowiki=false;} /*->*/ ; /* NOWIKI: {
!this.in_nowiki &&
input.LA(1)=='<' &&
input.LA(2)=='n' &&
input.LA(3)=='o' &&
input.LA(4)=='w' &&
input.LA(5)=='i' &&
input.LA(6)=='k' &&
input.LA(7)=='i' &&
input.LA(8)=='>'
}? => '' { this.in_nowiki=true;};
*/
/*NOWIKI_OFF: {
this.in_nowiki &&
input.LA(1)=='<' &&
input.LA(2)=='/' &&
input.LA(3)=='n' &&
input.LA(4)=='o' &&
input.LA(5)=='w' &&
input.LA(6)=='i' &&
input.LA(7)=='k' &&
input.LA(8)=='i' &&
input.LA(9)=='>'
}? => '' { this.in_nowiki=false;};
- /
/* NOWIKI : {!this.in_nowiki}? => LT 'nowiki' GT { this.in_nowiki=true;} ; NOWIKI_OFF: {this.in_nowiki}? => LT '/nowiki' GT { this.in_nowiki=false;};*/
/* Can't make them tokens because of stupid [[1]] */
/*
LINK_START: {!this.in_nowiki}? => '[['; LINK_END: {!this.in_nowiki}? => ']]';
- /
PIPE: {!this.in_noparse}? => '|';
/*PRESPACE: { !this.in_nowiki && this.getCharPositionInLine()==0 }? => ' '; SPACE: {!(!this.in_nowiki && this.getCharPositionInLine()==0)}? => ' '+;*/ SPACE: ' ';
DOT : '.'; EQUALS : '='; UNDERSCORE : '_'; HYPHEN : '-'; COMMA : ','; OPEN_PAREN : '('; CLOSE_PAREN : ')'; SEMICOLON : ';'; QUESTION : '?';
/* It's a literal apostrophe if either the next character is *not* an apostrophe, or the next 5 characters *are* apostrophes. Yummy. */ fragment APOS : '\; APOSTROPHE : {
input.LA(1)=='\ && (
this.in_noparse || (
input.LA(2)!='\ ||
input.LA(3)=='\ &&
input.LA(4)=='\ &&
input.LA(5)=='\ &&
input.LA(6)=='\
)
)
}? => APOS;
/* It's a swarm of apostrophes if it is not the case that this and the next five characters are apostrophes, and there are at least two, and we're not in a nowiki.*/ APOSTROPHES : {
!this.in_noparse &&
input.LA(1)=='\ && !(
input.LA(2)=='\ &&
input.LA(3)=='\ &&
input.LA(4)=='\ &&
input.LA(5)=='\ &&
input.LA(6)=='\
)
}? => APOS APOS+ ;
LEFT_BRACKET : {!this.in_noparse}? => '['; NOWIKI_LEFT_BRACKET:
{this.in_noparse}? => '[';
RIGHT_BRACKET : {!this.in_noparse}? => ']'; NOWIKI_RIGHT_BRACKET:
{this.in_noparse}? => ']';
COLON : {!this.in_noparse}? => ':'; NOWIKI_COLON : {this.in_noparse}? => ':'; HASH : {!this.in_noparse}? => '#'; ASTERISK : {!this.in_noparse}? => '*'; SLASH : {!this.in_noparse}? => '/';
//fragment //DIGIT: D0 | D1 | D2 | D3 | D4 | D5 | D6 | D7 | D8 | D9; DIGIT: '0'..'9'; //DIGITS: DIGIT+; digit: DIGIT;
fragment LETTER : ('A'..'Z'|'a'..'z'); //HTTP: 'h' 't' 't' 'p';
LETTERS : LETTER+; // {if ($text.equals("http") || $text.equals("ftp")) $type=PROTOCOL;};
//HTML : '<html>' .* '</html>' /*-> ^(HTML $x)*/ ;
N : '\r'? '\n' {setText("\\n\n");}; //UNKNOWN : .;