Markup spec/ANTLR/Images

The following code does a pretty good job of handling images. Obviously all the actual inline text processing and definitions of text are primitive.

Notes:
 * It can handle nested images
 * It can handle links in captions, which don't trouble it at all: [[image:foo.jpg|[[somelink]]]]
 * It recognises all the defined options. Unfortunately it's not quite as flexible as the currentparser in allowing ad hoc definitions like "thumb=$" etc.
 * Its treatment of options is basically, if the text doesn't match *exactly* a specified format (eg, "...|thumb=xxx|", then it's a caption. The following are teated as captions:
 * (possibly too harsh...)
 * It's case insensitive for all "magic word" matches, case sensitive otherwise.
 * It's case insensitive for all "magic word" matches, case sensitive otherwise.
 * It's case insensitive for all "magic word" matches, case sensitive otherwise.
 * It's case insensitive for all "magic word" matches, case sensitive otherwise.

Test cases
These test cases are parsed correctly:













grammar image4; @header { } /*

@members { String _mw_image_namespace = "image"; // This little inline function is crucial, allowing us to match the text of some token // with an expected piece of text. It would be easy to adapt it to fetching the actual magic words // from an external library, text file etc. boolean textis(String mw) { return input.LT(1).getText.equalsIgnoreCase(mw); } } start	:	(imageinline ws N*)/* * */; imageinline: LINK_START image_namespace COLON ws imagename ( PIPE optionorcaption )* LINK_END ; linkinline :	LINK_START (.*) LINK_END; /* very abbreviated ...*/

imagename: pagename ws DOT ws imageextension;

/* Future passes/actions etc can readily retrieve the extension text, so just validate for now? */ imageextension: {textis("jpeg") | textis("jpg") | textis("png") | textis("svg") | textis("gif") | textis("bmp")}? letters;

optionorcaption :	(imagemodeautothumb (PIPE | LINK_END)) => imagemodeautothumb /* move it up here as it's so common */ |	(SPACE) => caption |	(imageoption (PIPE | LINK_END)) => imageoption | 	caption; imageoption: imagemodemanualthumb | imagemodeautothumb | imagemodeframe | imagemodeframeless //| imagemodepage /* something weird about this one but I don't know what. */	| imagemodeupright | imagemodeborder | imagesizeparameter | imagealignleft | imagealigncenter | imagealignright | imagealignnone | imagevalignbaseline | imagevalignsub | imagevalignsuper | imagevaligntop | imagevaligntexttop | imagevalignmiddle | imagevalignbottom | imagevaligntextbottom;

imagemodemanualthumb: 	mw_img_manualthumb; imagemodeautothumb: 	mw_img_thumbnail; imagemodeframe: 	mw_img_frame; imagemodeframeless:	mw_img_frameless; imagemodepage:	 	mw_img_page; imagemodeupright:      mw_img_upright; imagemodeborder:       mw_img_border; imagesizeparameter: 	POSITIVE_INT mw_img_width; imagealignleft: 	mw_img_left ; imagealigncenter: 	mw_img_center ; imagealignright: 	mw_img_right ; imagealignnone: 	mw_img_none; imagevalignbaseline: 	mw_img_baseline ; imagevalignsub: 	mw_img_sub; imagevalignsuper: 	mw_img_super; imagevaligntop: 	mw_img_top; imagevaligntexttop: 	mw_img_text_top; imagevalignmiddle: 	mw_img_middle; imagevalignbottom: 	mw_img_bottom; imagevaligntextbottom: 	mw_img_text_bottom;

/* default settings: */ /* Hmm, user-definable grammar seems to be a bad idea. Assume that the img_manualthumb is always something followed by the name. */ mw_img_manualthumb	: {textis("thumbnail") | textis("thumb")}? mwletters EQUALS imagename; // don't forget thumb= mw_img_thumbnail	: {textis("thumbnail") | textis("thumb")}? mwletters; //'thumbnail' | 'thumb'; mw_img_frame		: {textis("framed") | textis("enframed") | textis("frame")}? mwletters; mw_img_frameless	: {textis("frameless")}? mwletters; mw_img_page		: {textis("page")}? mwletters (SPACE | EQUALS) mwletters; //'page=$1' | 'page $1' ; /*??? (where is this used?);*/ mw_img_upright		: {textis("upright")}? mwletters EQUALS? POSITIVE_INT?; //'upright' ( '='? POSITIVE_INT)?; mw_img_border		: {textis("border")}? mwletters; mw_img_width		: {textis("px")}? mwletters;

mw_img_baseline	: {textis("baseline")}? mwletters; mw_img_sub		: {textis("sub")}? mwletters; mw_img_super		: {textis("super") | textis("sup")}? mwletters; mw_img_top		: {textis("top")}? mwletters; mw_img_text_top	: {textis("text-top")}? mwletters; mw_img_middle		: {textis("middle")}? mwletters; mw_img_bottom		: {textis("bottom")}? mwletters; mw_img_text_bottom	: {textis("text-bottom")}? mwletters;

mw_img_left		: {textis("left")}? letters; mw_img_center		: {textis("center") | textis("centre")}? letters; mw_img_right		: {textis("right")}? letters; mw_img_none		: {textis("none")}? letters;

image_namespace	: {textis(_mw_image_namespace)}? mwletters;

LINK_START: ; LINK_END: ;

COLON	:	':'; PIPE	:	'|';

caption: inline_text; pagename: (letters | POSITIVE_INT) (letters | POSITIVE_INT | DOT | SPACE | UNDERSCORE | HYPHEN | OPEN_PAREN | CLOSE_PAREN) *;

inline_text:		/*letters*/ (letters | SPACE | DOT |POSITIVE_INT|COMMA|imageinline|linkinline|OPEN_PAREN | CLOSE_PAREN)*;

POSITIVE_INT:	'0'..'9'+;

DOT		:	'.'; SPACE  	:	' '; EQUALS		:	'='; UNDERSCORE	:	'_'; HYPHEN         :       '-'; COMMA		:	','; OPEN_PAREN	:	'('; CLOSE_PAREN    :	')';

LETTERS	:	LETTER+; fragment LETTER	:	('A'..'Z'|'a'..'z'); letters	:	LETTERS;

mwletters:	LETTERS (LETTERS | HYPHEN | UNDERSCORE | POSITIVE_INT)*;

N	:	'\r'? '\n'; ws	:	SPACE*;