Markup spec/flex

/* Scanner for Wikipedia language. Built with flex. */

CARRIAGERETURN                      \r CARRIAGERETURN_DOUBLE               \r\n\r VALIDURLCHARS                       [a-z0-9\%\/\?\:\@\=\&\$\_\-\+\!\*\'\(\)\,\.] NEWPARAGRAPH                        \n\n MATH                                &lt;math&gt; MATH_END                            &lt;\/math&gt; NOWIKI                              &lt;nowiki&gt; NOWIKI_END                          &lt;\/nowiki&gt; GENERICLINK                         [a-z]+:\/\/{VALIDURLCHARS}+ TITLEDLINK                          \133{GENERICLINK}\ [^\133]*\135 WIKILINK                            \133{2}[^\135]+\135{2} CURRENTDAY                          \{\{CURRENTDAY\}\} CURRENTMONTH                        \{\{CURRENTMONTH\}\} CURRENTTIME                         \{\{CURRENTTIME\}\} CURRENTYEAR                         \{\{CURRENTYEAR\}\} LIST                                \n[\:\#\;\*] PRE                                 ^\040 PRE_END                             \n[^\040] HR                                  ^ H1                                  ={1} H2                                  ={2} H3                                  ={3} H4                                  ={4} H5                                  ={5} H6                                  ={6} EMPHASIZE                           '{2} SEMPHASIZE                          '{3} VSEMPHASIZE                         '{5} LESSERTHAN                          &lt; GREATERTHAN                         &gt;

%option caseless stack %s list pre %x math nowiki

%{
 * 1) include &lt;time.h&gt;
 * 2) include &lt;sys/types.h&gt;

%}
 * 1) define MAXLIST 32

%%

%{

/* State variable positions (int state[10]): 0 = pre 1 = h1 2 = h2 3 = h3 4 = h4 5 = h5 6 = h6 7 = emphasis 8 = strong emphasis 9 = very strong emphasis */ int state[10];

/* Temporary variables. */ int i; char j;

/* A string used for holding the current content of a list (like *#*) */ char listtext[MAXLIST] = "\0";

/* The variables needed for CURRENTTIME-like substitutions. */ time_t time_since_epoch; struct tm cur_time;

/* Set all state variables to 0. */ for (i=0; i&lt;10; i++) { state[i] = 0; }

/* Get the time once at execution of program, instead of every call. */ time(&time_since_epoch); gmtime_r(&time_since_epoch, &cur_time);

%}

{CARRIAGERETURN_DOUBLE}             { unput('\n'); } {CARRIAGERETURN}

{WIKILINK}                          { ECHO; }

{NOWIKI}                            { yy_push_state(nowiki); } &lt;nowiki&gt;{NOWIKI_END}                { yy_pop_state; } &lt;nowiki&gt;{LESSERTHAN}                { printf("&lt;"); } &lt;nowiki&gt;{GREATERTHAN}               { printf("&gt;"); }

{MATH}                              { yy_push_state(math); } &lt;math&gt;{MATH_END}                    { yy_pop_state; }

{PRE}                               { if (state[0] == 0) { printf("\n&lt;pre&gt;"); state[0]++; yy_push_state(pre); } } &lt;pre&gt;{PRE_END}                      { printf("&lt;/pre&gt;"); state[0]--; yyless(0); yy_pop_state; }

{HR}                                { printf("\n&lt;hr&gt;"); } {NEWPARAGRAPH}                      { printf("\n&lt;p&gt;"); unput('\n'); }

{VSEMPHASIZE}                       { if (state[9] == 0) { printf("&lt;strong&gt;&lt;em&gt;");  state[9]++; } else              { printf("&lt;/strong&gt;&lt;/em&gt;"); state[9]--; } } {SEMPHASIZE}                        { if (state[8] == 0) { printf("&lt;strong&gt;"); state[8]++; } else              { printf("&lt;/strong&gt;"); state[8]--; } } {EMPHASIZE}                         { if (state[7] == 0) { printf("&lt;em&gt;"); state[7]++; } else              { printf("&lt;/em&gt;"); state[7]--; } }

{H6}                                { if (state[6] == 0) { printf("&lt;h6&gt;"); state[6]++; } else              { printf("&lt;/h6&gt;"); state[6]--; } } {H5}                                { if (state[5] == 0) { printf("&lt;h5&gt;"); state[5]++; } else              { printf("&lt;/h5&gt;"); state[5]--; } } {H4}                                { if (state[4] == 0) { printf("&lt;h4&gt;"); state[4]++; } else              { printf("&lt;/h4&gt;"); state[4]--; } } {H3}                                { if (state[3] == 0) { printf("&lt;h3&gt;"); state[3]++; } else              { printf("&lt;/h3&gt;"); state[3]--; } } {H2}                                { if (state[2] == 0) { printf("&lt;h2&gt;"); state[2]++; } else              { printf("&lt;/h2&gt;"); state[2]--; } } {H1}                                { if (state[1] == 0) { printf("&lt;h1&gt;"); state[1]++; } else              { printf("&lt;/h1&gt;"); state[1]--; } }

{TITLEDLINK}                        { printf("&lt;a href=\"");                                    while (*++yytext != ' ') { printf("%c", *yytext); } /* Print everything up to first space */                                     printf("\"&gt;"); while (*++yytext != ']') { printf("%c", *yytext); } /* Print href text */ printf("&lt;/a&gt;"); } {GENERICLINK}                       { printf("&lt;a href=\"");                                    j = *(yytext + yyleng - 1);                                     /* If the last character of a URL is a '.' or a ',', assume it is punctuation. */                                     if ((j == '.') || (j == ','))                                       {                                       *(yytext + yyleng - 1) = '\0';                                       printf("%s\"&gt;%s&lt;/a&gt;%c", yytext, yytext, j); }                                    else { printf("%s\"&gt;%s&lt;/a&gt;", yytext, yytext); }                                     } {CURRENTTIME}                        { printf("%d:%d", cur_time.tm_hour, cur_time.tm_min); } {CURRENTDAY}                         { printf("%d", cur_time.tm_mday); } {CURRENTMONTH}                       { printf("%.2d", (cur_time.tm_mon + 1)); } {CURRENTYEAR}                        { printf("%d", (cur_time.tm_year + 1900)); } {LIST}                               {                                     if (strlen(yytext) &lt; MAXLIST)                                       {                                       strcpy(listtext, yytext);                                       /*                                        i = 0;                                       while(listtext[i] != '\0') {}                                       */ }                                    }

%%

int main (int argc, char **argv) { ++argv, --argc;  /* Don't care about name of program. */ yyin = fopen(argv[0], "r"); yylex; return 0; }