User:Stevage/ANTLR

grammar headerline10; /* Bugs:  at start of article



options {output=AST;} tokens { ARTICLE; START; INTERNAL_LINK; ENTITY; RD; H1; H2; H3; H4; H5; H6; HR; P;  IMG; NBSP; PROTOCOL; TEXT; // all subnodes should be concatenated BALD_URL; EXTERNAL_LINK; IMG; IMG_OPTION; IMG_OPTION_THUMBNAIL; ITALICS; BOLD; B_ON; B_OFF; BOLD_ITALICS; I_ON; I_OFF; BR; UL; OL; DL; LI; DD; DT; ISBN; RFC; PMID; PRE; NBSP160; // a real nbsp, like &#160; }

@members { String _mw_image_namespace = "image"; boolean in_external_link=false; boolean in_header=false; boolean in_internal_link_caption=false; boolean in_defined_term=false; boolean text_bold=false; boolean text_italics=false; boolean literal_whitespace=false; int caption_levels = 0; int text_levels =0; boolean textis(String mw) { return input.LT(1).getText.equalsIgnoreCase(mw); } }

@lexer::members { boolean in_nowiki = false; boolean in_noparse = false; boolean in_pre = false; boolean in_html = false; boolean in_listprefix = false; }

start   : (redirect | article) -> ^(START redirect? article?);

//////////////////////////////////////////////////////////////////////

redirect:   REDIRECT SPACE+ internal_link (ws? ((article)=>article)?) -> ^(RD internal_link article?);

REDIRECT:    {this.getCharPositionInLine==0 && this.getLine==1}? => '#REDIRECT';

//////////////////////////////////////////////////////////////////////

article: (N*) (line N paragraph_separator)* -> ^(ARTICLE (line paragraph_separator)* );

paragraph_separator: pn*; pn: N close_bold_italics -> close_bold_italics BR ;

close_bold_italics @after {text_bold=false; text_italics = false;}

{text_bold==true && text_italics==true}? => -> B_OFF I_OFF |{text_bold==false && text_italics==true}? => -> I_OFF |{text_bold==true && text_italics==false}? => -> B_OFF ;

line:/* (simple_text) => paragraph^ | */     (headerline) => headerline^ | (listmarker) => listline^ | (hrline)    => hrline^ | (spaceline) => spaceline^ | paragraph^ ;

////////////////////////// Lists ////////////////////////////////

listline: bullet_list_item |ordered_list_item |indent_list_item |definition_item ;

listprefix: (listmarker)+;

bullet_list_item: ASTERISK (       (listmarker) => listline    -> ^(UL listline)       |                inline_text -> ^(UL inline_text)       |                            -> ^(UL) );

ordered_list_item: HASH (       (listmarker) => listline    -> ^(OL listline)       |                inline_text -> ^(OL inline_text)       |                            -> ^(OL) );

indent_list_item: COLON (       (listmarker) => listline    -> ^(DD listline)       |                ws? inline_text -> ^(DD inline_text)       |                            -> ^(DD) );

definition_item @init {in_defined_term = true;}: SEMICOLON ws? (      term=inline_text {in_defined_term=false;} ( (COLON) => COLON ws? def=inline_text -> ^(DT $term ^(DD $def)) | (N COLON) => N indent_list_item     -> ^(DT $term indent_list_item) |                                     -> ^(DT $term) )       |                                      -> ^(DT) ); finally {in_defined_term = false;}

listmarker:   HASH | ASTERISK | COLON | SEMICOLON;

/////////////////////////// Space blocks /////////////////// spaceline @init {literal_whitespace = true;} SPACE printing_ws? inline_text? -> ^(PRE printing_ws? inline_text); finally {literal_whitespace = false;} ////////////////////////// Headers /////////////////////////////////

headerline:/* {this.in_header = true;}*/ (     (header6) => header6^    | (header5) => header5^    | (header4) => header4^    | (header3) => header3^    | (header2) => header2^    | (header1) => header1^) /* {this.in_header = false;} */ ; header6:                             EQUALS EQUALS EQUALS EQUALS EQUALS EQUALS a+=EQUALS* header_simple_text b+=EQUALS*  EQUALS EQUALS EQUALS EQUALS EQUALS EQUALS -> ^(H6 $a* header_simple_text $b*);

header5:                                    EQUALS EQUALS EQUALS EQUALS EQUALS a+=EQUALS* header_simple_text b+=EQUALS* EQUALS EQUALS EQUALS EQUALS EQUALS -> ^(H5 $a* header_simple_text $b*);

header4:                                           EQUALS EQUALS EQUALS EQUALS a+=EQUALS* header_simple_text b+=EQUALS* EQUALS EQUALS EQUALS EQUALS -> ^(H4 $a* header_simple_text $b*);

header3:                                                  EQUALS EQUALS EQUALS a+=EQUALS* header_simple_text b+=EQUALS* EQUALS EQUALS EQUALS -> ^(H3 $a* header_simple_text $b*); header2:                                                         EQUALS EQUALS a+=EQUALS* header_simple_text b+=EQUALS* EQUALS EQUALS -> ^(H2 $a* header_simple_text $b*); header1:                                                                EQUALS a+=EQUALS* header_simple_text b+=EQUALS*  EQUALS -> ^(H1 $a* header_simple_text $b*);

hrline: HYPHEN HYPHEN HYPHEN HYPHEN HYPHEN* -> ^(HR);

///////////////////////////////////////////////////////////////////////

internal_link:   link_start pagename (PIPE link_caption)? link_end ((letters)=>link_trail)? -> ^(INTERNAL_LINK pagename ^(TEXT link_caption? link_trail?));

link_caption @init {this.caption_levels++; in_internal_link_caption = true;}: simple_text; finally {this.caption_levels--; in_internal_link_caption = false;}

link_trail: letters;

////////////////////////////////////////////////////////////////////////

////////////////////////// Images ////////////////////////////////////////// imageinline: (link_start image_namespace) => link_start image_namespace COLON ws? imagename ( PIPE optionorcaption )* link_end -> ^(IMG ^(TEXT image_namespace COLON imagename) optionorcaption*);

imagename: pagename ws? DOT ws? imageextension;

/* Future passes/actions etc can readily retrieve the extension text, so just validate for now? */ imageextension: {textis("jpeg") | textis("jpg") | textis("png") | textis("svg") | textis("gif") | textis("bmp")}? letters;

optionorcaption :   (imagemodeautothumb (PIPE | link_end)) => imagemodeautothumb /* move it up here as it's so common */ |   (SPACE | punctuation) => image_caption |   (imageoption (PIPE | link_end)) => imageoption |    image_caption; image_caption @init {this.caption_levels++;}
 * inline_text?

-> ^(TEXT inline_text); finally {this.caption_levels-- ;}

imageoption: imagemodemanualthumb | imagemodeautothumb | imagemodeframe | imagemodeframeless //| imagemodepage /* something weird about this one but I don't know what. */   | imagemodeupright | imagemodeborder | imagesizeparameter | imagealignleft | imagealigncenter | imagealignright | imagealignnone | imagevalignbaseline | imagevalignsub | imagevalignsuper | imagevaligntop | imagevaligntexttop | imagevalignmiddle | imagevalignbottom | imagevaligntextbottom;

imagemodemanualthumb:    mw_img_manualthumb; imagemodeautothumb:    mw_img_thumbnail; imagemodeframe:    mw_img_frame; imagemodeframeless:   mw_img_frameless; imagemodepage:        mw_img_page; imagemodeupright:      mw_img_upright; imagemodeborder:       mw_img_border; imagesizeparameter:    positive_int mw_img_width; imagealignleft:    mw_img_left ; imagealigncenter:    mw_img_center ; imagealignright:    mw_img_right ; imagealignnone:    mw_img_none;

imagevalignbaseline:    mw_img_baseline ; imagevalignsub:    mw_img_sub; imagevalignsuper:    mw_img_super; imagevaligntop:    mw_img_top; imagevaligntexttop:    mw_img_text_top; imagevalignmiddle:    mw_img_middle; imagevalignbottom:    mw_img_bottom; imagevaligntextbottom:    mw_img_text_bottom;

/* default settings: */ /* Hmm, user-definable grammar seems to be a bad idea. Assume that the img_manualthumb is always something followed by the name. */ mw_img_manualthumb   : {textis("thumbnail") | textis("thumb")}? mwletters EQUALS imagename -> ^(IMG_OPTION_THUMBNAIL imagename); mw_img_thumbnail     : {textis("thumbnail") | textis("thumb")}? mwletters -> ^(IMG_OPTION_THUMBNAIL); mw_img_frame         : {textis("framed") | textis("enframed") | textis("frame")}? mwletters; //'framed' | 'enframed' | 'frame'; mw_img_frameless     : {textis("frameless")}? mwletters; mw_img_page          : {textis("page")}? mwletters (SPACE | EQUALS) mwletters; //'page=$1' | 'page $1' ; /*??? (where is this used?);*/ mw_img_upright       : {textis("upright")}? mwletters EQUALS? positive_int?; //'upright' ( '='? POSITIVE_INT)?; mw_img_border        : {textis("border")}? mwletters; mw_img_width         : {textis("px")}? mwletters;

mw_img_baseline      : {textis("baseline")}? mwletters; mw_img_sub           : {textis("sub")}? mwletters; mw_img_super         : {textis("super") | textis("sup")}? mwletters; mw_img_top           : {textis("top")}? mwletters; mw_img_text_top      : {textis("text-top")}? mwletters; mw_img_middle        : {textis("middle")}? mwletters; mw_img_bottom        : {textis("bottom")}? mwletters; mw_img_text_bottom   : {textis("text-bottom")}? mwletters;

mw_img_left           : {textis("left")}? mwletters; mw_img_center         : {textis("center") | textis("centre")}? mwletters; mw_img_right          : {textis("right")}? mwletters; mw_img_none           : {textis("none")}? mwletters;

image_namespace       : {textis(_mw_image_namespace)}? mwletters;

///////////////////////////// external links /////////////////////////////// external_link: bald_url -> ^(EXTERNAL_LINK ^(TEXT bald_url) ^(TEXT bald_url)) //attempt to use url as caption | explicit_url -> ^(EXTERNAL_LINK explicit_url);

bald_url: protocol COLON SLASH SLASH letters DOT letters //    http://foo.com (minimum) ((DOT letters)=>DOT letters)*                  //    .lom.wom ...    ((SLASH letters)=>SLASH letters)*               //    /thing/other/docs ...    (SLASH)=>SLASH? //   /    ;

explicit_url:   LEFT_BRACKET bald_url (ws external_link_caption)? RIGHT_BRACKET -> ^(TEXT bald_url) ^(TEXT external_link_caption?);

protocol: {textis("ftp") | textis("http")}? letters;

external_link_caption @init {this.in_external_link=true;} simple_text; finally {this.in_external_link=false;}

//////////////////////////// magic links //////////////////////////////// magic_link: isbn_link | pmid_link | rfc_link; accidental_magic_link: isbn_accidental | pmid_accidental | rfc_accidental;

isbn_link: ISBN_LINK -> ^(ISBN ISBN_LINK); isbn_accidental: ISBN_LINK -> ^(TEXT ISBN_LINK); // the TEXT node is possibly superfluous?

rfc_link: RFC_LINK -> ^(RFC RFC_LINK); rfc_accidental: RFC_LINK -> ^(TEXT RFC_LINK);

pmid_link: PMID_LINK -> ^(PMID PMID_LINK); pmid_accidental: PMID_LINK -> ^(TEXT PMID_LINK); //////////////////////////////////////////////////////////////////////// paragraph: ws? /* !!!! */ inline_text -> ^(P inline_text);

inline_text @init { text_levels++; } (   /*(complex_inline_elem | simple_inline_elem) =>*/

/*   (complex_inline_elem    |simple_inline_elem    ) ws?*/ // [] has to be treated as: [, [http;//foo.com], ] ((LEFT_BRACKET LEFT_BRACKET LEFT_BRACKET) => literal_left_bracket // try and save it some time on [foo]?   |(literal_left_bracket explicit_url) => literal_left_bracket    |/*(imageinline)        =>*/ imageinline    |(external_link)      => external_link    |(internal_link)      => internal_link    |(magic_link)         => magic_link    |pre_block    |(simple_inline_elem) =>simple_inline_elem    ) ((nbsp_before_punctuation) => nbsp_before_punctuation)* ((ws) =>printing_ws)? )+; finally { text_levels --;}

simple_text @init { text_levels++; } : (   (simple_inline_elem) => simple_inline_elem //    (nbsp_before_punctuation)?    ((printing_ws) => printing_ws)?

//ws? )+; finally { text_levels --; }

simple_inline_elem: (     (accidental_magic_link) => accidental_magic_link    | punctuation_before_nbsp    |(APOSTROPHES) => bold_and_italics    | ((nbsp_before_punctuation) => nbsp_before_punctuation)+    | really_basic_elem    );

pre_block: PRE_OPEN pre_block_body PRE_CLOSE -> ^(PRE pre_block_body); pre_block_body: (pre_ws* really_basic_elem*)+; really_basic_elem: (html_dangerous   |punctuation /* if punctuation+, risk of swallowing too many characters: [foo.jpg] needs to swallow just one */    |letters    |digits    );

/*textline: simple_text -> ^(P simple_text);*/

/////////////////////////////////////////////////////////////////////////// bold_and_italics: {textis("''") && text_italics}? => APOSTROPHES {text_italics=false;} ->         ^(I_OFF) |{textis("''") && !text_italics}? => APOSTROPHES {text_italics=true;} ->         ^(I_ON) |{textis("'''") && text_bold}? => APOSTROPHES   {text_bold=false;} ->            ^(B_OFF) |{textis("'''") && !text_bold}? => APOSTROPHES  {text_bold=true;}  ->            ^(B_ON) |{textis("") && text_bold}? => APOSTROPHES  {text_bold=false;} -> APOSTROPHE ^(B_OFF) |{textis("") && !text_bold}? => APOSTROPHES {text_bold=true;}  -> APOSTROPHE ^(B_ON) |{textis("'") && text_bold && text_italics}? => APOSTROPHES {text_bold=false; text_italics=false; } -> ^(B_OFF) ^(I_OFF) |{textis("'") && text_bold && !text_italics}? => APOSTROPHES {text_bold=false; text_italics=true; } -> ^(B_OFF) ^(I_ON) |{textis("'") && !text_bold && text_italics}? => APOSTROPHES {text_bold=true; text_italics=false; } -> ^(B_ON)  ^(I_OFF) |{textis("'") && !text_bold && !text_italics}? =>APOSTROPHES {text_bold=true; text_italics=true; }  -> ^(B_ON)  ^(I_ON) // Hopefully we never get more than 6 or less than 2. The lexer should take care of that. ; ////////////////////////Nbsp punctuation///////////////////////////////// nbsp_before_punctuation: SPACE ('»'        -> NBSP160 '»'          | QUESTION   -> NBSP160 QUESTION          | COLON      -> NBSP160 COLON          | SEMICOLON  -> NBSP160 SEMICOLON          | '!'        -> NBSP160 '!'          | '%'        -> NBSP160 '%'          ) ;

//   SPACE x=('»' | QUESTION | COLON | SEMICOLON | '!' | '%') -> NBSP160 $x;

/*{input.LA(2) == '?'}? => */ //   (SPACE ('»' | '?' | COLON | SEMICOLON | '!' | '%')) => SPACE -> NBSP160;

punctuation_before_nbsp: '«' SPACE -> '«' NBSP160;

//«»

//////// ////////////////////////////////////////////////////////////////// pagename: pagename_elem ((pagename_elem) => pagename_elem              |(SPACE) => SPACE              )*;

pagename_elem: (letters | accidental_magic_link/* | DIGITS | DOT | UNDERSCORE | HYPHEN | OPEN_PAREN | CLOSE_PAREN*/);

/////////////////////////////////// Very basic types ///////////////////////////////////////

/* Currently doesn't support equals during a header title...*/ header_simple_text @init {this.in_header=true;}: inline_text; /* Pretty much everything seems to be tolerated in headings. (!) */ finally {this.in_header=false;} // any need for accidental_magic_link? mwletters:   letters (letters | HYPHEN | UNDERSCORE | (digits)=>positive_int)*;

/////////////////////////////////// Semi-literals, literal sets etc ///////////////////////////

punctuation : DOT |digits|COMMA|OPEN_PAREN | CLOSE_PAREN | HASH | HYPHEN | ASTERISK | UNDERSCORE | SLASH | SEMICOLON | APOSTROPHE | QUESTION | literal_left_bracket | literal_right_bracket | literal_pipe | literal_equals | literal_colon | '!' | '@' | '$' | '%' | '^' | '`' | '~' | '\\' | '«' | '»';       // |/*LINK_START |*/ /*LINK_END | */UNKNOWN ;

html_dangerous: LT -> ^(ENTITY LT) | GT -> ^(ENTITY GT) | AMP -> ^(ENTITY AMP);

letters: (LETTERS); positive_int: digits; /* needs to be refined to remove 0s at start */ literal_link_end:         {caption_levels==0}? => link_end;

// Strangely enough, a literal pipe has to be allowed in an internal link caption: bar|wa // It would be good if this behaviour were proscribed to allow for future options literal_pipe:             {caption_levels==0 || in_internal_link_caption}? => PIPE;

/* Three ways of getting a literal right bracket: 1) You're neither in an external nor internal link: foo] 2) You're in an internal link, and not followed by another right bracket: here] see? 3) You're in a nowiki block: The foundation.] literal_right_bracket:    {!in_external_link && (caption_levels == 0 || input.LA(2)!= RIGHT_BRACKET)}? =>         RIGHT_BRACKET       | NOWIKI_RIGHT_BRACKET;

/*       {!in_external_link}? => RIGHT_BRACKET | NOWIKI_RIGHT_BRACKET;*/ literal_left_bracket:     LEFT_BRACKET | NOWIKI_LEFT_BRACKET; /* Dodgy - doesn't really know whether it's a literal left bracket or not */

literal_colon:            {!in_defined_term || text_levels > 1}? => COLON /* ;foo:blah is special. ;foo[blah|bl:ah] is not special. TODO: make sure this doesn't break namespaces in defs */ | NOWIKI_COLON;

// Only supports a single =. So no ==foo==blah==. literal_equals: {!in_header || input.LA(2) != EQUALS}? => EQUALS

link_start: LEFT_BRACKET LEFT_BRACKET; link_end: RIGHT_BRACKET RIGHT_BRACKET;

// TODO: apparently image captions always treat spaces literally... printing_ws: {literal_whitespace && text_levels <= 1}? => (pre_ws) => pre_ws | ws -> NBSP;

digits:   digit+;

pre_ws: pre_ws_elem+; pre_ws_elem: SPACE     -> SPACE | NOWIKI    -> | NOWIKI_OFF ->;

ws: (SPACE   | NOWIKI!    | NOWIKI_OFF!)+ ;

//- /* ISBN magic links. Care will be needed to make sure they're treated as literals wherever they aren't supported. */ // Broken example: 1234567890 - current parser does correctly. But does it matter? ISBN_LINK: {!this.in_noparse}? => // Parser.php l081, ~DIGIT is actually regexp \b ((ISBN_LINK_ACTUAL ~DIGIT) => ISBN_LINK_ACTUAL | LETTER { $type=LETTERS; }  );

fragment ISBN_LINK_ACTUAL: 'ISBN' ' '+   ('97' ('8' | '9'))?    ((' ' | '-')? '0'..'9')    ((' ' | '-')? '0'..'9')    ((' ' | '-')? '0'..'9')    ((' ' | '-')? '0'..'9')    ((' ' | '-')? '0'..'9')    ((' ' | '-')? '0'..'9')    ((' ' | '-')? '0'..'9')    ((' ' | '-')? '0'..'9')    ((' ' | '-')? '0'..'9')    ((' ' | '-')? ('0'..'9' | 'X' | 'x')); RFC_LINK: {!this.in_noparse}? => ((RFC_LINK_ACTUAL) => RFC_LINK_ACTUAL  | LETTER { $type=LETTERS; }  ); fragment RFC_LINK_ACTUAL: 'RFC' ' '+   ('0'..'9')+;

PMID_LINK : {!this.in_noparse}? => ((PMID_LINK_ACTUAL) => PMID_LINK_ACTUAL  | LETTER { $type=LETTERS; }  ); fragment PMID_LINK_ACTUAL: 'PMID' ' '+   ('0'..'9')+;

///////////// /

NOWIKI: {!this.in_noparse}? =>   ((NOWIKI_ACTUAL) => NOWIKI_ACTUAL { this.in_nowiki=true; this.in_noparse=true;}    | '<' { $type=LT; }    );

fragment NOWIKI_ACTUAL: ' ' ;

NOWIKI_OFF: {this.in_nowiki}? =>   ((NOWIKI_OFF_ACTUAL) => NOWIKI_OFF_ACTUAL { this.in_nowiki=false; }    | '<' { $type=LT; }    );

fragment NOWIKI_OFF_ACTUAL: ' ' ;

/////////// /

PRE_OPEN: {!this.in_noparse}? =>   ((PRE_OPEN_ACTUAL) => PRE_OPEN_ACTUAL { this.in_pre=true; this.in_noparse=true;}    | '<' { $type=LT; }    );

fragment PRE_OPEN_ACTUAL: ' ' ;

PRE_CLOSE: {this.in_pre}? =>   ((PRE_CLOSE_ACTUAL) => PRE_CLOSE_ACTUAL { this.in_pre=false; this.in_noparse=false; }    | '<' { $type=LT; }    );

fragment PRE_CLOSE_ACTUAL: ' ' ;

LT:                  '<'; GT:                  '>'; AMP:                 '&';

//NOWIKI   :    {!this.in_nowiki}? => '<' 'nowiki' >'{ this.in_nowiki=true;} /*->*/ ; /* doesn't render as anything in particular */ //NOWIKI_OFF:   {this.in_nowiki}? => ' '{ this.in_nowiki=false;} /*->*/ ; /* NOWIKI: { !this.in_nowiki && input.LA(1)=='<' && input.LA(2)=='n' && input.LA(3)=='o' && input.LA(4)=='w' && input.LA(5)=='i' && input.LA(6)=='k' && input.LA(7)=='i' && input.LA(8)=='>' }? => ' ' { this.in_nowiki=true;}; /*NOWIKI_OFF: { this.in_nowiki && input.LA(1)=='<' && input.LA(2)=='/' && input.LA(3)=='n' && input.LA(4)=='o' && input.LA(5)=='w' && input.LA(6)=='i' && input.LA(7)=='k' && input.LA(8)=='i' && input.LA(9)=='>' }? => ' ' { this.in_nowiki=false;};

/* NOWIKI   :    {!this.in_nowiki}? => LT 'nowiki' GT { this.in_nowiki=true;} ; NOWIKI_OFF:   {this.in_nowiki}? => LT '/nowiki' GT { this.in_nowiki=false;};*/

/* Can't make them tokens because of stupid [] */

/* LINK_START: {!this.in_nowiki}? => ; LINK_END:   {!this.in_nowiki}? => ;

PIPE: {!this.in_noparse}? => '|';

/*PRESPACE:    {  !this.in_nowiki && this.getCharPositionInLine==0 }? => ' '; SPACE:       {!(!this.in_nowiki && this.getCharPositionInLine==0)}? => ' '+;*/ SPACE: ' ';

DOT            :    '.'; EQUALS         :    '='; UNDERSCORE     :    '_'; HYPHEN         :    '-'; COMMA          :    ','; OPEN_PAREN     :    '('; CLOSE_PAREN     :    ')'; SEMICOLON      :    ';'; QUESTION       :    '?';

/* It's a literal apostrophe if either the next character is *not* an apostrophe, or the next 5 characters *are* apostrophes. Yummy. */ fragment APOS           : '\''; APOSTROPHE     :  { input.LA(1)=='\'' && (         this.in_noparse || ( input.LA(2)!='\'' || input.LA(3)=='\'' && input.LA(4)=='\'' && input.LA(5)=='\'' && input.LA(6)=='\'' )     ) }? => APOS;

/* It's a swarm of apostrophes if it is not the case that this and the next five characters are apostrophes, and there are at least two, and we're not in a nowiki.*/ APOSTROPHES    : { !this.in_noparse && input.LA(1)=='\ && !(            input.LA(2)=='\ &&             input.LA(3)=='\ &&             input.LA(4)=='\ &&             input.LA(5)=='\ &&             input.LA(6)=='\        ) }? => APOS APOS+ ;

LEFT_BRACKET   : {!this.in_noparse}? => '['; NOWIKI_LEFT_BRACKET: {this.in_noparse}? => '['; RIGHT_BRACKET   : {!this.in_noparse}? => ']'; NOWIKI_RIGHT_BRACKET: {this.in_noparse}? => ']'; COLON           : {!this.in_noparse}? => ':'; NOWIKI_COLON   : {this.in_noparse}? => ':'; HASH            : {!this.in_noparse}? => '#'; ASTERISK       : {!this.in_noparse}? => '*'; SLASH          : {!this.in_noparse}? => '/';

//fragment //DIGIT: D0 | D1 | D2 | D3 | D4 | D5 | D6 | D7 | D8 | D9; DIGIT: '0'..'9'; //DIGITS: DIGIT+; digit: DIGIT;

fragment LETTER   :    ('A'..'Z'|'a'..'z'); //HTTP: 'h' 't' 't' 'p';

LETTERS   :    LETTER+; // {if ($text.equals("http") || $text.equals("ftp")) $type=PROTOCOL;};

//HTML   :    ' ' .* ' ' /*-> ^(HTML $x)*/    ;

N   :    '\r'? '\n' {setText("\\n\n");}; //UNKNOWN   :    .;