User:Stevage/ANTLR

From mediawiki.org
grammar headerline10;
/* Bugs:
 at start of article


*/


options {output=AST;}
tokens {
   ARTICLE;
   START;
   INTERNAL_LINK;
   ENTITY;
   RD;
   H1;
   H2;
   H3;
   H4;
   H5;
   H6;
   HR;
   P;
   IMG;
   NBSP;
   PROTOCOL;
   TEXT; // all subnodes should be concatenated
   BALD_URL;
   EXTERNAL_LINK;
   IMG;
   IMG_OPTION;
   IMG_OPTION_THUMBNAIL;
   ITALICS;
   BOLD;
   B_ON;
   B_OFF;
   BOLD_ITALICS;
   I_ON;
   I_OFF;
   BR;
   UL;
   OL;
   DL;
   LI;
   DD;
   DT;
   ISBN;
   RFC;
   PMID;
   PRE;
   NBSP160; // a real nbsp, like  
}

@members {
  String _mw_image_namespace = "image";
  boolean in_external_link=false;
  boolean in_header=false;
  boolean in_internal_link_caption=false;
  boolean in_defined_term=false;
  boolean text_bold=false;
  boolean text_italics=false;
  boolean literal_whitespace=false;
  int caption_levels = 0;
  int text_levels =0;
 
  boolean textis(String mw) {
    return input.LT(1).getText().equalsIgnoreCase(mw);
  }
 
 
}


@lexer::members {
  boolean in_nowiki = false;
  boolean in_noparse = false;
  boolean in_pre = false;
  boolean in_html = false;
  boolean in_listprefix = false;
}

start    : (redirect | article) -> ^(START redirect? article?);

//////////////////////////////////////////////////////////////////////

redirect:    REDIRECT SPACE+ internal_link (ws? ((article)=>article)?)
-> ^(RD internal_link article?);

REDIRECT:     {this.getCharPositionInLine()==0 && this.getLine()==1}? => '#REDIRECT';

//////////////////////////////////////////////////////////////////////

article: (N*) (line N paragraph_separator)*
-> ^(ARTICLE (line paragraph_separator)* );

paragraph_separator: pn*;
pn:
        N close_bold_italics -> close_bold_italics BR ;

close_bold_italics
@after {text_bold=false; text_italics = false;}
: /*
        {text_bold==true && text_italics==true}? =>  -> B_OFF I_OFF
       |{text_bold==false && text_italics==true}? => -> I_OFF
       |{text_bold==true && text_italics==false}? => -> B_OFF
*/       
       ;


line:/* (simple_text) => paragraph^
    | */
      (headerline) => headerline^
    | (listmarker) => listline^
    | (hrline)     => hrline^
    | (spaceline)  => spaceline^
    | paragraph^ ;

////////////////////////// Lists ////////////////////////////////

listline:
        bullet_list_item
       |ordered_list_item
       |indent_list_item
       |definition_item
       ;

listprefix: (listmarker)+;

bullet_list_item:
        ASTERISK (
        (listmarker) => listline    -> ^(UL listline)
       |                inline_text -> ^(UL inline_text)
       |                            -> ^(UL) );

ordered_list_item:
        HASH (
        (listmarker) => listline    -> ^(OL listline)
       |                inline_text -> ^(OL inline_text)
       |                            -> ^(OL) );

indent_list_item:
        COLON (
        (listmarker) => listline    -> ^(DD listline)
       |                ws? inline_text -> ^(DD inline_text)
       |                            -> ^(DD) );

definition_item
@init {in_defined_term = true;}:
        SEMICOLON  ws? (
       term=inline_text {in_defined_term=false;} ( (COLON) => COLON ws? def=inline_text -> ^(DT $term ^(DD $def))
                                                 | (N COLON) => N indent_list_item      -> ^(DT $term indent_list_item)
                                                 |                                      -> ^(DT $term) )
       |                                      -> ^(DT) );
finally {in_defined_term = false;}

listmarker:    HASH | ASTERISK | COLON | SEMICOLON;

/////////////////////////// Space blocks ///////////////////
spaceline
@init {literal_whitespace = true;}
:
    SPACE printing_ws? inline_text? -> ^(PRE printing_ws? inline_text);
finally {literal_whitespace = false;}
////////////////////////// Headers /////////////////////////////////

headerline:/* {this.in_header = true;}*/
(      (header6) => header6^
    | (header5) => header5^
    | (header4) => header4^
    | (header3) => header3^
    | (header2) => header2^
    | (header1) => header1^)
/* {this.in_header = false;}  */
    ;
   
header6:                              EQUALS EQUALS EQUALS EQUALS EQUALS EQUALS a+=EQUALS* header_simple_text b+=EQUALS*  EQUALS EQUALS EQUALS EQUALS EQUALS EQUALS
-> ^(H6 $a* header_simple_text $b*);

header5:                                     EQUALS EQUALS EQUALS EQUALS EQUALS a+=EQUALS* header_simple_text b+=EQUALS* EQUALS EQUALS EQUALS EQUALS EQUALS
-> ^(H5 $a* header_simple_text $b*);

header4:                                            EQUALS EQUALS EQUALS EQUALS a+=EQUALS* header_simple_text b+=EQUALS* EQUALS EQUALS EQUALS EQUALS
-> ^(H4 $a* header_simple_text $b*);

header3:                                                   EQUALS EQUALS EQUALS a+=EQUALS* header_simple_text b+=EQUALS* EQUALS EQUALS EQUALS
-> ^(H3 $a* header_simple_text $b*);
 
header2:                                                          EQUALS EQUALS a+=EQUALS* header_simple_text b+=EQUALS* EQUALS EQUALS
-> ^(H2 $a* header_simple_text $b*);
 
header1:                                                                 EQUALS a+=EQUALS* header_simple_text b+=EQUALS*  EQUALS
-> ^(H1 $a* header_simple_text $b*);

hrline: HYPHEN HYPHEN HYPHEN HYPHEN HYPHEN*
-> ^(HR);


///////////////////////////////////////////////////////////////////////

internal_link:    link_start pagename (PIPE link_caption)? link_end ((letters)=>link_trail)?
-> ^(INTERNAL_LINK pagename ^(TEXT link_caption? link_trail?));


link_caption
@init {this.caption_levels++; in_internal_link_caption = true;}:
        simple_text;
finally {this.caption_levels--; in_internal_link_caption = false;}

link_trail: letters;

////////////////////////////////////////////////////////////////////////

////////////////////////// Images //////////////////////////////////////////
imageinline: (link_start image_namespace) =>
link_start image_namespace COLON ws? imagename (  PIPE optionorcaption )* link_end
-> ^(IMG ^(TEXT image_namespace COLON imagename) optionorcaption*);

 imagename: pagename ws? DOT ws? imageextension;

/* Future passes/actions etc can readily retrieve the extension text, so just validate for now? */
 imageextension:
     {textis("jpeg")
     | textis("jpg")
     | textis("png")
     | textis("svg")
     | textis("gif")
     | textis("bmp")}? letters;


optionorcaption
     :    (imagemodeautothumb (PIPE | link_end)) => imagemodeautothumb /* move it up here as it's so common */
     |    (SPACE | punctuation) => image_caption
     |    (imageoption (PIPE | link_end)) => imageoption
     |     image_caption;
   
image_caption
@init {this.caption_levels++;}
: inline_text?
-> ^(TEXT inline_text);
finally {this.caption_levels-- ;}

imageoption:  
     imagemodemanualthumb
    | imagemodeautothumb
    | imagemodeframe
    | imagemodeframeless
    //| imagemodepage /* something weird about this one but I don't know what. */
    | imagemodeupright
    | imagemodeborder
    | imagesizeparameter
    | imagealignleft
    | imagealigncenter
    | imagealignright
    | imagealignnone
    | imagevalignbaseline
    | imagevalignsub
    | imagevalignsuper
    | imagevaligntop
    | imagevaligntexttop
    | imagevalignmiddle
    | imagevalignbottom
    | imagevaligntextbottom;

imagemodemanualthumb:     mw_img_manualthumb;
imagemodeautothumb:     mw_img_thumbnail;
imagemodeframe:     mw_img_frame;
imagemodeframeless:    mw_img_frameless;
imagemodepage:         mw_img_page;
imagemodeupright:       mw_img_upright;
imagemodeborder:        mw_img_border;
imagesizeparameter:     positive_int mw_img_width;  
imagealignleft:     mw_img_left ;
imagealigncenter:     mw_img_center ;
imagealignright:     mw_img_right ;
imagealignnone:     mw_img_none;

imagevalignbaseline:     mw_img_baseline ;
imagevalignsub:     mw_img_sub;
imagevalignsuper:     mw_img_super;
imagevaligntop:     mw_img_top;
imagevaligntexttop:     mw_img_text_top;
imagevalignmiddle:     mw_img_middle;
imagevalignbottom:     mw_img_bottom;
imagevaligntextbottom:     mw_img_text_bottom;

/* default settings: */
/* Hmm, user-definable grammar seems to be a bad idea. Assume that the img_manualthumb is always something followed by the name. */
 mw_img_manualthumb    : {textis("thumbnail") | textis("thumb")}? mwletters EQUALS imagename -> ^(IMG_OPTION_THUMBNAIL imagename);
 mw_img_thumbnail      : {textis("thumbnail") | textis("thumb")}? mwletters -> ^(IMG_OPTION_THUMBNAIL);
 mw_img_frame          : {textis("framed") | textis("enframed") | textis("frame")}? mwletters; //'framed' | 'enframed' | 'frame';
 mw_img_frameless      : {textis("frameless")}? mwletters;
 mw_img_page           : {textis("page")}? mwletters (SPACE | EQUALS) mwletters; //'page=$1' | 'page $1' ; /*??? (where is this used?);*/
 mw_img_upright        : {textis("upright")}? mwletters EQUALS? positive_int?; //'upright' (  '='? POSITIVE_INT)?;
 mw_img_border         : {textis("border")}? mwletters;
 mw_img_width          : {textis("px")}? mwletters;

 mw_img_baseline       : {textis("baseline")}? mwletters;
 mw_img_sub            : {textis("sub")}? mwletters;
 mw_img_super          : {textis("super") | textis("sup")}? mwletters;
 mw_img_top            : {textis("top")}? mwletters;
 mw_img_text_top       : {textis("text-top")}? mwletters;
 mw_img_middle         : {textis("middle")}? mwletters;
 mw_img_bottom         : {textis("bottom")}? mwletters;
 mw_img_text_bottom    : {textis("text-bottom")}? mwletters;

mw_img_left            : {textis("left")}? mwletters;
mw_img_center          : {textis("center") | textis("centre")}? mwletters;
mw_img_right           : {textis("right")}? mwletters;
mw_img_none            : {textis("none")}? mwletters;

image_namespace        : {textis(_mw_image_namespace)}? mwletters;



///////////////////////////// external links ///////////////////////////////
external_link:  
        bald_url -> ^(EXTERNAL_LINK ^(TEXT bald_url) ^(TEXT bald_url)) //attempt to use url as caption
        | explicit_url -> ^(EXTERNAL_LINK explicit_url);

bald_url:
    protocol COLON SLASH SLASH letters DOT letters  //    http://foo.com (minimum)
    ((DOT letters)=>DOT letters)*                   //    .lom.wom ...
    ((SLASH letters)=>SLASH letters)*               //    /thing/other/docs ...
    (SLASH)=>SLASH?                                          //    /
    ;

explicit_url:    LEFT_BRACKET bald_url (ws external_link_caption)? RIGHT_BRACKET -> ^(TEXT bald_url) ^(TEXT external_link_caption?);

protocol: {textis("ftp") | textis("http")}? letters;



external_link_caption
@init {this.in_external_link=true;}
:
simple_text;
finally {this.in_external_link=false;}

//////////////////////////// magic links ////////////////////////////////
magic_link: isbn_link | pmid_link | rfc_link;
accidental_magic_link: isbn_accidental | pmid_accidental | rfc_accidental;

isbn_link: ISBN_LINK -> ^(ISBN ISBN_LINK);
isbn_accidental: ISBN_LINK -> ^(TEXT ISBN_LINK); // the TEXT node is possibly superfluous?

rfc_link: RFC_LINK -> ^(RFC RFC_LINK);
rfc_accidental: RFC_LINK -> ^(TEXT RFC_LINK);

pmid_link: PMID_LINK -> ^(PMID PMID_LINK);
pmid_accidental: PMID_LINK -> ^(TEXT PMID_LINK);
        
////////////////////////////////////////////////////////////////////////
paragraph: ws? /* !!!! */ inline_text -> ^(P inline_text);

inline_text
@init { text_levels++; }
:
(
    /*(complex_inline_elem | simple_inline_elem) =>*/

/*    (complex_inline_elem
    |simple_inline_elem
    )
    ws?*/
    // [[http://foo.com]] has to be treated as: [, [http;//foo.com], ]
    ((LEFT_BRACKET LEFT_BRACKET LEFT_BRACKET) => literal_left_bracket // try and save it some time on [[[foo]]]?
    |(literal_left_bracket explicit_url) => literal_left_bracket
    |/*(imageinline)        =>*/ imageinline
    |(external_link)      => external_link
    |(internal_link)      => internal_link
    |(magic_link)         => magic_link
    |pre_block
    |(simple_inline_elem) =>simple_inline_elem
    )
     ((nbsp_before_punctuation) => nbsp_before_punctuation)*
    ((ws) =>printing_ws)?
   
   
)+;
finally { text_levels --;}

simple_text
@init { text_levels++; } :
(
    (simple_inline_elem) => simple_inline_elem
    
//    (nbsp_before_punctuation)?
    
    ((printing_ws) => printing_ws)?

    //ws?
)+;
finally { text_levels --; }


simple_inline_elem:
    (
      (accidental_magic_link) => accidental_magic_link
    | punctuation_before_nbsp
    |(APOSTROPHES) => bold_and_italics
    | ((nbsp_before_punctuation) => nbsp_before_punctuation)+
    | really_basic_elem
    );

pre_block: PRE_OPEN pre_block_body PRE_CLOSE -> ^(PRE pre_block_body);
pre_block_body: (pre_ws* really_basic_elem*)+;
    
really_basic_elem:
    (html_dangerous
    |punctuation /* if punctuation+, risk of swallowing too many characters: [[[foo.jpg]]] needs to swallow just one */
    |letters
    |digits
    );


/*textline: simple_text -> ^(P simple_text);*/

///////////////////////////////////////////////////////////////////////////
bold_and_italics:
     {textis("''") && text_italics}? => APOSTROPHES  {text_italics=false;} ->         ^(I_OFF)
    |{textis("''") && !text_italics}? => APOSTROPHES {text_italics=true;}  ->         ^(I_ON)
    |{textis("'''") && text_bold}? => APOSTROPHES    {text_bold=false;} ->            ^(B_OFF)
    |{textis("'''") && !text_bold}? => APOSTROPHES   {text_bold=true;}  ->            ^(B_ON)
    |{textis("''''") && text_bold}? => APOSTROPHES   {text_bold=false;} -> APOSTROPHE ^(B_OFF)
    |{textis("''''") && !text_bold}? => APOSTROPHES  {text_bold=true;}  -> APOSTROPHE ^(B_ON)
    |{textis("'''''") && text_bold && text_italics}? =>  APOSTROPHES {text_bold=false; text_italics=false; } -> ^(B_OFF) ^(I_OFF)
    |{textis("'''''") && text_bold && !text_italics}? => APOSTROPHES {text_bold=false; text_italics=true; }  -> ^(B_OFF) ^(I_ON)
    |{textis("'''''") && !text_bold && text_italics}? => APOSTROPHES {text_bold=true; text_italics=false; }  -> ^(B_ON)  ^(I_OFF)
    |{textis("'''''") && !text_bold && !text_italics}? =>APOSTROPHES {text_bold=true; text_italics=true; }   -> ^(B_ON)  ^(I_ON)
    // Hopefully we never get more than 6 or less than 2. The lexer should take care of that.
    ;
////////////////////////Nbsp punctuation/////////////////////////////////
nbsp_before_punctuation:
    
    SPACE ('»'         -> NBSP160 '»'
          | QUESTION   -> NBSP160 QUESTION
          | COLON      -> NBSP160 COLON
          | SEMICOLON  -> NBSP160 SEMICOLON
          | '!'        -> NBSP160 '!'
          | '%'        -> NBSP160 '%'
          ) ;        

//    SPACE x=('»' | QUESTION | COLON | SEMICOLON | '!' | '%') -> NBSP160 $x;


    /*{input.LA(2) == '?'}? => */
//    (SPACE ('»' | '?' | COLON | SEMICOLON | '!' | '%')) => SPACE -> NBSP160;

punctuation_before_nbsp:
    '«' SPACE -> '«' NBSP160;

//«»


//////// //////////////////////////////////////////////////////////////////
pagename:
        pagename_elem
              ((pagename_elem) => pagename_elem
               |(SPACE) => SPACE
              )*;

pagename_elem: (letters | accidental_magic_link/* | DIGITS | DOT | UNDERSCORE | HYPHEN | OPEN_PAREN | CLOSE_PAREN*/);




/////////////////////////////////// Very basic types ///////////////////////////////////////

/* Currently doesn't support equals during a header title...*/
header_simple_text
@init {this.in_header=true;}:
        inline_text; /* Pretty much everything seems to be tolerated in headings. (!) */
finally {this.in_header=false;}
        
// any need for accidental_magic_link?        
mwletters:    letters (letters | HYPHEN | UNDERSCORE | (digits)=>positive_int)*;

/////////////////////////////////// Semi-literals, literal sets etc ///////////////////////////


punctuation :   
    DOT |digits|COMMA|OPEN_PAREN | CLOSE_PAREN | HASH | HYPHEN | ASTERISK
    | UNDERSCORE | SLASH | SEMICOLON  | APOSTROPHE | QUESTION |
    literal_left_bracket | literal_right_bracket | literal_pipe | literal_equals | literal_colon |
    '!' | '@' | '$' | '%' | '^' | '`' | '~' | '\\' | '«' | '»';
       
        // |/*LINK_START |*/ /*LINK_END | */UNKNOWN ;

html_dangerous:   
        LT -> ^(ENTITY LT)
      | GT -> ^(ENTITY GT)
      | AMP -> ^(ENTITY AMP);

letters: (LETTERS);
positive_int: digits; /* needs to be refined to remove 0s at start */
literal_link_end:          {caption_levels==0}? => link_end;

// Strangely enough, a literal pipe has to be allowed in an internal link caption: [[foo|bar|wa]]
// It would be good if this behaviour were proscribed to allow for future options
literal_pipe:              {caption_levels==0 || in_internal_link_caption}? => PIPE;

/* Three ways of getting a literal right bracket:
1) You're neither in an external nor internal link: foo]
2) You're in an internal link, and not followed by another right bracket:  [[Boop|here] see?]]
3) You're in a nowiki block: [http://square.bracket.com The <nowiki>] foundation.]
*/
literal_right_bracket:     {!in_external_link && (caption_levels == 0 || input.LA(2)!= RIGHT_BRACKET)}? =>
         RIGHT_BRACKET
       | NOWIKI_RIGHT_BRACKET;

/*        {!in_external_link}? => RIGHT_BRACKET
        | NOWIKI_RIGHT_BRACKET;*/
literal_left_bracket:      LEFT_BRACKET | NOWIKI_LEFT_BRACKET; /* Dodgy - doesn't really know whether it's a literal left bracket or not */

literal_colon:             {!in_defined_term || text_levels > 1}? => COLON /* ;foo:blah is special. ;foo[blah|bl:ah] is not special. TODO: make sure this doesn't break namespaces in defs */
                           | NOWIKI_COLON;

// Only supports a single =. So no ==foo==blah==.
literal_equals:            
    {!in_header || input.LA(2) != EQUALS}? => EQUALS
;

link_start: LEFT_BRACKET LEFT_BRACKET;
link_end: RIGHT_BRACKET RIGHT_BRACKET;

// TODO: apparently image captions always treat spaces literally...
printing_ws:
    {literal_whitespace && text_levels <= 1}? => (pre_ws) => pre_ws
    | ws -> NBSP;

digits:    digit+;

pre_ws: pre_ws_elem+;
pre_ws_elem:
      SPACE      -> SPACE
    | NOWIKI     ->
    | NOWIKI_OFF ->;

ws: (SPACE
    | NOWIKI!
    | NOWIKI_OFF!)+ ;




//-----------------------------------------------------
/* ISBN magic links. Care will be needed to make sure they're treated as literals wherever they aren't supported. */
// Broken example: [http://ISBN 1234567890] - current parser does correctly. But does it matter?
ISBN_LINK: {!this.in_noparse}? =>
  // Parser.php l081, ~DIGIT is actually regexp \b
  ((ISBN_LINK_ACTUAL ~DIGIT) => ISBN_LINK_ACTUAL
  | LETTER { $type=LETTERS; }
  );

fragment
ISBN_LINK_ACTUAL:
    'ISBN'
    ' '+
    ('97' ('8' | '9'))?
    ((' ' | '-')? '0'..'9')
    ((' ' | '-')? '0'..'9')
    ((' ' | '-')? '0'..'9')
    ((' ' | '-')? '0'..'9')
    ((' ' | '-')? '0'..'9')
    ((' ' | '-')? '0'..'9')
    ((' ' | '-')? '0'..'9')
    ((' ' | '-')? '0'..'9')
    ((' ' | '-')? '0'..'9')
    ((' ' | '-')? ('0'..'9' | 'X' | 'x'));
    
    
RFC_LINK: {!this.in_noparse}? =>
  ((RFC_LINK_ACTUAL) => RFC_LINK_ACTUAL
  | LETTER { $type=LETTERS; }
  );
 
fragment
RFC_LINK_ACTUAL:
    'RFC'
    ' '+
    ('0'..'9')+;
    
    

PMID_LINK : {!this.in_noparse}? =>
  ((PMID_LINK_ACTUAL) => PMID_LINK_ACTUAL
  | LETTER { $type=LETTERS; }
  );
 
fragment
PMID_LINK_ACTUAL:
    'PMID'
    ' '+
    ('0'..'9')+;

/////////////  / 

NOWIKI: {!this.in_noparse}? =>
    ((NOWIKI_ACTUAL) => NOWIKI_ACTUAL { this.in_nowiki=true; this.in_noparse=true;}
    | '<' { $type=LT; }
    );

fragment
NOWIKI_ACTUAL: '' ;

NOWIKI_OFF: {this.in_nowiki}? =>
    ((NOWIKI_OFF_ACTUAL) => NOWIKI_OFF_ACTUAL { this.in_nowiki=false; }
    | '<' { $type=LT; }
    );

fragment
NOWIKI_OFF_ACTUAL: '' ;

/////////// <pre> / 

PRE_OPEN: {!this.in_noparse}? =>

   ((PRE_OPEN_ACTUAL) => PRE_OPEN_ACTUAL { this.in_pre=true; this.in_noparse=true;}
   | '<' { $type=LT; }
   );

fragment

PRE_OPEN_ACTUAL: '

' ;

PRE_CLOSE: {this.in_pre}? =>
    ((PRE_CLOSE_ACTUAL) => PRE_CLOSE_ACTUAL { this.in_pre=false; this.in_noparse=false; }
    | '<' { $type=LT; }
    );

fragment
PRE_CLOSE_ACTUAL: '

' ;



LT: '<'; GT: '>'; AMP: '&';

//NOWIKI  : {!this.in_nowiki}? => '<' 'nowiki' >'{ this.in_nowiki=true;} /*->*/ ; /* doesn't render as anything in particular */ //NOWIKI_OFF: {this.in_nowiki}? => '</nowiki>'{ this.in_nowiki=false;} /*->*/ ; /* NOWIKI: {

     !this.in_nowiki &&
     input.LA(1)=='<' &&
     input.LA(2)=='n' &&
     input.LA(3)=='o' &&
     input.LA(4)=='w' &&
     input.LA(5)=='i' &&
     input.LA(6)=='k' &&
     input.LA(7)=='i' &&
     input.LA(8)=='>'
    }? => '' { this.in_nowiki=true;};
*/
/*NOWIKI_OFF: {
      this.in_nowiki &&
      input.LA(1)=='<' &&
      input.LA(2)=='/' &&
      input.LA(3)=='n' &&
      input.LA(4)=='o' &&
      input.LA(5)=='w' &&
      input.LA(6)=='i' &&
      input.LA(7)=='k' &&
      input.LA(8)=='i' &&
      input.LA(9)=='>'
     }? => '' { this.in_nowiki=false;};
  • /

/* NOWIKI  : {!this.in_nowiki}? => LT 'nowiki' GT { this.in_nowiki=true;} ; NOWIKI_OFF: {this.in_nowiki}? => LT '/nowiki' GT { this.in_nowiki=false;};*/

/* Can't make them tokens because of stupid [[1]] */

/*

LINK_START: {!this.in_nowiki}? => '[['; LINK_END: {!this.in_nowiki}? => ']]';

  • /

PIPE: {!this.in_noparse}? => '|';


/*PRESPACE: { !this.in_nowiki && this.getCharPositionInLine()==0 }? => ' '; SPACE: {!(!this.in_nowiki && this.getCharPositionInLine()==0)}? => ' '+;*/ SPACE: ' ';

DOT  : '.'; EQUALS  : '='; UNDERSCORE  : '_'; HYPHEN  : '-'; COMMA  : ','; OPEN_PAREN  : '('; CLOSE_PAREN  : ')'; SEMICOLON  : ';'; QUESTION  : '?';

/* It's a literal apostrophe if either the next character is *not* an apostrophe, or the next 5 characters *are* apostrophes. Yummy. */ fragment APOS  : '\; APOSTROPHE  : {

     input.LA(1)=='\ && (
         this.in_noparse || (
             input.LA(2)!='\ ||
             input.LA(3)=='\ &&
             input.LA(4)=='\ &&
             input.LA(5)=='\ &&
             input.LA(6)=='\
         )
     )

}? => APOS;

/* It's a swarm of apostrophes if it is not the case that this and the next five characters are apostrophes, and there are at least two, and we're not in a nowiki.*/ APOSTROPHES  : {

       !this.in_noparse &&
       input.LA(1)=='\ && !(
            input.LA(2)=='\ &&
            input.LA(3)=='\ &&
            input.LA(4)=='\ &&
            input.LA(5)=='\ &&
            input.LA(6)=='\
       )

}? => APOS APOS+ ;

LEFT_BRACKET  : {!this.in_noparse}? => '['; NOWIKI_LEFT_BRACKET:

                 {this.in_noparse}? =>  '[';

RIGHT_BRACKET  : {!this.in_noparse}? => ']'; NOWIKI_RIGHT_BRACKET:

                 {this.in_noparse}? =>  ']';

COLON  : {!this.in_noparse}? => ':'; NOWIKI_COLON  : {this.in_noparse}? => ':'; HASH  : {!this.in_noparse}? => '#'; ASTERISK  : {!this.in_noparse}? => '*'; SLASH  : {!this.in_noparse}? => '/';

//fragment //DIGIT: D0 | D1 | D2 | D3 | D4 | D5 | D6 | D7 | D8 | D9; DIGIT: '0'..'9'; //DIGITS: DIGIT+; digit: DIGIT;

fragment LETTER  : ('A'..'Z'|'a'..'z'); //HTTP: 'h' 't' 't' 'p';

LETTERS  : LETTER+; // {if ($text.equals("http") || $text.equals("ftp")) $type=PROTOCOL;};

//HTML  : '<html>' .* '</html>' /*-> ^(HTML $x)*/  ;


N  : '\r'? '\n' {setText("\\n\n");}; //UNKNOWN  : .;