%{ /* parser.leg -> parser.c -- Parse (Multi)Markdown plain text for conversion into other formats (c) 2013-2015 Fletcher T. Penney (http://fletcherpenney.net/). Derived from peg-multimarkdown, which was forked from peg-markdown, which is (c) 2008 John MacFarlane (jgm at berkeley dot edu), and licensed under GNU GPL or MIT. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License or the MIT license. See LICENSE for details. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. */ #include "parser.h" #include "writer.h" /* Define shortcuts to adding nodes, etc. */ #define node(x) mk_pos_node(x, NULL, thunk->begin, thunk->end) #define str(x) mk_pos_str(x, thunk->begin, thunk->end) #define list(x,y) mk_pos_list(x, y, thunk->begin, thunk->end) #define ext(x) extension(x,((parser_data *)G->data)->extensions) #define YY_INPUT(buf, result, max_size, D) yy_input_func(buf, &result, max_size, (parser_data *)G->data) /* redefine input buffer so that we draw from the specified source string to make it thread/reentrant safe */ void yy_input_func(char *buf, int *result, int max_size, parser_data *data) { \ int yyc; \ if (data->charbuf && (*(data->charbuf) != '\0')) { \ yyc = *(data->charbuf)++; \ } else { \ yyc= EOF; \ } \ (*result) = (EOF == yyc) ? 0 : (*(buf)= yyc, 1); \ } %} Doc = BOM? a:StartList b:StartList ( &{ !ext(EXT_COMPATIBILITY) && !ext(EXT_NO_METADATA) } &( (YAMLStart)? MetaDataKey Sp ':' Sp (!Newline)) MetaData { a = cons($$, a); b = node(FOOTER); } )? ( Block { a = cons($$, a); } )* BlankLine* { if (b!= NULL) a = cons(b,a); ((parser_data *)G->data)->result = reverse_list(a); } MetaData = a:StartList (( (YAMLStart) | !([A-Za-z]+ "://") !SetextHeading ) (MetaDataKeyValue { a = cons($$, a); })+ (YAMLStop)?) { $$ = list(METADATA, a); } MetaDataKeyValue = a:MetaDataKey Sp ':' Sp b:MetaDataValue { $$ = a; $$->children = b; } MetaDataKey = < !([A-Za-z]+ "://") AlphanumericAscii ( Sp ( AlphanumericAscii | '_' | ' ' | '-' | '.' )+)* > { $$ = str(yytext); $$->key = METAKEY; } SingleLineMetaKeyValue = MetaDataKey Sp ':' Sp (!Newline .)* MetaDataValue = a:StartList ((< (!Newline .)* > { a = cons(str(yytext), a); }) ((Newline &(!BlankLine !SingleLineMetaKeyValue Sp RawLine)) { a = cons(str("\n"), a); } | Newline) (!BlankLine !SingleLineMetaKeyValue !YAMLStop Sp RawLine { a = cons(str(yytext), a); } )* ) { $$ = mk_str_from_list(a, false); $$->key = METAVALUE; } YAMLStart = "---" BlankLine YAMLStop = ("---"|"...") BlankLine DocForMetaDataOnly = BOM? a:StartList ( &( YAMLStart? MetaDataKey Sp ':' Sp (!Newline)) MetaData { a = cons($$, a); } )? ( SkipBlock )* BlankLine* { ((parser_data *)G->data)->result = reverse_list(a); } SkipBlock = HtmlBlock { free_node_tree($$); } | ( !'#' !SetextBottom1 !SetextBottom2 !BlankLine RawLine )+ BlankLine* | BlankLine+ | RawLine Block = BlankLine* ( BlockQuote | &{ !ext(EXT_COMPATIBILITY) } Fenced | Verbatim | &{ !ext(EXT_COMPATIBILITY) } DefinitionList | &{ !ext(EXT_COMPATIBILITY) } Glossary | Note | LinkReference | &{ !ext(EXT_COMPATIBILITY) } Abbreviation | HorizontalRule | &{ ext(EXT_HEADINGSECTION) } HeadingSection | Heading | OrderedList | BulletList | HtmlBlock | MarkdownHtmlBlock | StyleBlock | &{ !ext(EXT_COMPATIBILITY) } Table | &{ !ext(EXT_COMPATIBILITY) } ImageBlock | &{ !ext(EXT_COMPATIBILITY) } TOC | !(Sp HtmlBlockInTags) Para | Plain ) HeadingSectionBlock = BlankLine* !Heading ( BlockQuote | &{ !ext(EXT_COMPATIBILITY) } Fenced | Verbatim | &{ !ext(EXT_COMPATIBILITY) } DefinitionList | &{ !ext(EXT_COMPATIBILITY) } Glossary | Note | LinkReference | &{ !ext(EXT_COMPATIBILITY) } Abbreviation | HorizontalRule | OrderedList | BulletList | HtmlBlock | MarkdownHtmlBlock | StyleBlock | &{ !ext(EXT_COMPATIBILITY) } Table | &{ !ext(EXT_COMPATIBILITY) } ImageBlock | &{ !ext(EXT_COMPATIBILITY) } TOC | !(Sp HtmlBlockInTags) Para | Plain ) TOC = "{{TOC}}" Sp Newline { $$ = mk_node(TOC); $$->children = mk_node(RAW); $$->children->str = markdown_to_string(((parser_data *)G->data)->original, 0, TOC_FORMAT); } Heading = SetextHeading | AtxHeading HeadingSection = a:StartList Heading { a = cons($$, a); } (HeadingSectionBlock {a = cons($$, a); })* { $$ = list(HEADINGSECTION, a); } AtxInline = !Newline !( &{ !ext(EXT_COMPATIBILITY) } Sp AutoLabel Sp '#'* Sp Newline) !(Sp '#'* Sp Newline) Inline AtxStart = NonindentSpace < ( "######" | "#####" | "####" | "###" | "##" | "#" ) > { $$ = node(H1 + ((int)strlen(yytext) - 1)); } AtxHeading = s:AtxStart Sp a:StartList ( AtxInline { a = cons($$, a); } )+ ( Sp b:AutoLabel { append_list(b,a);})? (Sp '#'* Sp)? Sp < Newline > # ensures that we count characters all the way to the end { $$ = list(s->key,a); free(s); } SetextHeading = NonindentSpace (SetextHeading1 | SetextHeading2) SetextBottom1 = NonindentSpace '='+ Sp Newline SetextBottom2 = NonindentSpace '-'+ Sp Newline SetextHeading1 = &(RawLine SetextBottom1) a:StartList ( !Endline !( &{ !ext(EXT_COMPATIBILITY) } Sp AutoLabel ) Inline { a = cons($$, a); } )+ ( Sp b:AutoLabel { append_list(b,a);} Sp )? Sp Newline { $$ = list(H1, a); } SetextHeading2 = &(RawLine SetextBottom2) a:StartList ( !Endline !( &{ !ext(EXT_COMPATIBILITY) } Sp AutoLabel ) Inline { a = cons($$, a); } )+ ( Sp b:AutoLabel { append_list(b,a)} Sp )? Sp Newline { $$ = list(H2, a); } BlockQuote = a:BlockQuoteRaw { $$ = list(BLOCKQUOTE,a); } BlockQuoteRaw = a:StartList x:StartList (( NonindentSpace b:BlockQuoteMarker Line { a = cons($$, a); x = cons(b, x); } ) ( !(NonindentSpace '>') !BlankLine Line { a = cons($$, a); } )* ( BlankLine { a = cons(mk_str("\n"), a); } )* )+ { $$ = x; free_node_tree(x->next); $$->next = NULL; node *raw; raw = mk_str_from_list(a, true); raw->key = RAW; $$->children = raw; } BlockQuoteMarker = < ">" ' '? > { $$ = str(yytext); $$->key = BLOCKQUOTEMARKER; } BOM = "\357\273\277" Eof = !. Newline = '\n' | '\r' '\n'? Line = RawLine { $$ = str(yytext); } RawLine = ( < (!'\r' !'\n' .)* Newline > | < .+ > Eof ) NonMatchingRawLine = ( (!'\r' !'\n' .)* Newline | .+ Eof ) BlankLine = Sp Newline Sp = Spacechar* Spnl = Sp (Newline Sp)? Spacechar = ' ' | '\t' Nonspacechar = !Spacechar !Newline . NormalChar = !( SpecialChar | Spacechar | Newline ) . SpecialChar = '*' | '_' | '`' | '&' | '[' | ']' | '(' | ')' | '<' | '!' | '#' | '\\' | '\'' | '"' | '?' | ',' | ';' | '/' | '.' | '。' | '、'| ExtendedSpecialChar ExtendedSpecialChar = &{ ext(EXT_SMART) } ('.' | '-' | '\'' | '"') | &{ ext(EXT_NOTES) } ( '^' ) | &{ ext(EXT_CRITIC) } ( '{' ) | &{ !ext(EXT_COMPATIBILITY) } ( '~' ) | &{ !ext(EXT_COMPATIBILITY) } ( '|' ) Punctuation = '.' | ',' | '?' | '!' | ';' | ':' | '。' | '、' NonPunctuation = !(Punctuation | SpecialChar | Spacechar | Newline) . Quoted = '"' (!'"' .)* '"' | '\'' (!'\'' .)* '\'' HtmlAttribute = (AlphanumericAscii | '-')+ Spnl ('=' Spnl (Quoted | (!'>' Nonspacechar)+))? Spnl HtmlComment = "" .)* "-->" HtmlTag = '<' Spnl '/'? AlphanumericAscii+ Spnl HtmlAttribute* '/'? Spnl '>' RawHtml = < (HtmlComment | HtmlBlockScript | HtmlTag) > { if (ext(EXT_FILTER_HTML)) { $$ = list(LIST, NULL); } else { $$ = str(yytext); $$->key = HTML; } } Alphanumeric = [0-9A-Za-z] | '\200' | '\201' | '\202' | '\203' | '\204' | '\205' | '\206' | '\207' | '\210' | '\211' | '\212' | '\213' | '\214' | '\215' | '\216' | '\217' | '\220' | '\221' | '\222' | '\223' | '\224' | '\225' | '\226' | '\227' | '\230' | '\231' | '\232' | '\233' | '\234' | '\235' | '\236' | '\237' | '\240' | '\241' | '\242' | '\243' | '\244' | '\245' | '\246' | '\247' | '\250' | '\251' | '\252' | '\253' | '\254' | '\255' | '\256' | '\257' | '\260' | '\261' | '\262' | '\263' | '\264' | '\265' | '\266' | '\267' | '\270' | '\271' | '\272' | '\273' | '\274' | '\275' | '\276' | '\277' | '\300' | '\301' | '\302' | '\303' | '\304' | '\305' | '\306' | '\307' | '\310' | '\311' | '\312' | '\313' | '\314' | '\315' | '\316' | '\317' | '\320' | '\321' | '\322' | '\323' | '\324' | '\325' | '\326' | '\327' | '\330' | '\331' | '\332' | '\333' | '\334' | '\335' | '\336' | '\337' | '\340' | '\341' | '\342' | '\343' | '\344' | '\345' | '\346' | '\347' | '\350' | '\351' | '\352' | '\353' | '\354' | '\355' | '\356' | '\357' | '\360' | '\361' | '\362' | '\363' | '\364' | '\365' | '\366' | '\367' | '\370' | '\371' | '\372' | '\373' | '\374' | '\375' | '\376' | '\377' AlphanumericAscii = [A-Za-z0-9] Digit = [0-9] HexEntity = < '&' '#' [Xx] [0-9a-fA-F]+ ';' > DecEntity = < '&' '#' [0-9]+ > ';' > CharEntity = < '&' [A-Za-z0-9]+ ';' > NonindentSpace = " " | " " | " " | "" Indent = "\t" | " " IndentedLine = Indent Line OptionallyIndentedLine = Indent? Line Symbol = < SpecialChar > { $$ = str(yytext); } EscapedChar = '\\' < [-\\`|*_{}[\]()#+.!$><'""\n~^] > { $$ = str(yytext); } Entity = ( HexEntity | DecEntity | CharEntity ) { $$ = str(yytext); $$->key = HTML; } Para = NonindentSpace a:Inlines BlankLine+ { $$ = a; $$->key = PARA; } Plain = a:Inlines { $$ = a; $$->key = PLAIN; } Inlines = a:StartList ( !Endline Inline { a = cons($$, a); } | c:Endline &Inline { a = cons(c, a); } )+ voidEndline? { $$ = list(LIST, a); } Inline = #&{ check_timeout((parser_data *)G->data) } # TODO: the check_timeout function still slows us down -- do we still need it?? &{ ext(EXT_CRITIC) } CriticMarkup | &{ !ext(EXT_COMPATIBILITY) } DollarMath | Str | &{ !ext(EXT_COMPATIBILITY) } MathSpan | Endline | UlOrStarLine | Space | StrongAndEmph | Strong | Emph | &{ !ext(EXT_COMPATIBILITY) } CitationReference | &{ !ext(EXT_COMPATIBILITY) } Variable | Image | Link | NoteReference | Code | MarkdownHtmlTagOpen | RawHtml | Entity | EscapedChar | Smart | Symbol InlineNoEmph = #&{ check_timeout((parser_data *)G->data) } # TODO: the check_timeout function still slows us down -- do we still need it?? &{ ext(EXT_CRITIC) } CriticMarkup | &{ !ext(EXT_COMPATIBILITY) } DollarMath | Str | &{ !ext(EXT_COMPATIBILITY) } MathSpan | Endline | UlOrStarLine | Space | Strong | &{ !ext(EXT_COMPATIBILITY) } CitationReference | &{ !ext(EXT_COMPATIBILITY) } Variable | Image | Link | NoteReference | Code | MarkdownHtmlTagOpen | RawHtml | Entity | EscapedChar | Smart | Symbol Space = Spacechar+ { $$ = str(" "); $$->key = SPACE; } Endline = LineBreak | TerminalEndline | NormalEndline NormalEndline = Sp Newline !BlankLine !'>' !AtxStart !(RawLine ('='+ | '-'+) Newline) { $$ = str("\n"); $$->key = SPACE; } TerminalEndline = Sp Newline Eof { $$ = NULL; } LineBreak = (" " | &{ !ext(EXT_COMPATIBILITY) && ext(EXT_ESCAPED_LINE_BREAKS) } Sp "\\") a:NormalEndline { $$ = a; $$->key = LINEBREAK; } voidEndline = ((" " | &{ !ext(EXT_COMPATIBILITY) && ext(EXT_ESCAPED_LINE_BREAKS) } Sp "\\") voidNormalEndline) | (Sp Newline Eof) | voidNormalEndline voidNormalEndline = Sp Newline !BlankLine !'>' !AtxStart !(RawLine ('='+ | '-'+) Newline) Str = a:StartList < NormalChar+ > { a = cons(str(yytext), a); } ( &{ !ext(EXT_COMPATIBILITY) } StrChunk { a = cons($$, a); } )* ( &{ !ext(EXT_COMPATIBILITY) } ( Superscript | Subscript) {a = cons($$, a); } )* { if (a->next == NULL) { $$ = a; } else { $$ = list(LIST, a); } } StrChunk = < (NormalChar | '_'+ !Punctuation &Alphanumeric)+ > { $$ = str(yytext); } | AposChunk AposChunk = &{ ext(EXT_SMART) } '\'' &Alphanumeric { $$ = node(APOSTROPHE); } Superscript = '^' < (((!'^' Nonspacechar)+ &'^') | (!'^' NonPunctuation)+ ) > '^'? { $$ = str(yytext); $$->key = SUPERSCRIPT; } Subscript = '~' < (((!'~' Nonspacechar)+ &'~') | (!'~' NonPunctuation)+ ) > '~'? { $$ = str(yytext); $$->key = SUBSCRIPT; } StartList = &. { $$ = NULL; } MathSpan = '\\' < ( ('\\[' (!'\\\\]' .)* '\\\\]') | ('\\(' (!'\\\\)' .)* '\\\\)') ) > { /* Basically, these delimiters indicate math in LaTeX syntax, and the delimiters are compatible with MathJax and LaTeX ASCIIMathML is *not* supported */ $$ = str(yytext); $$->key = MATHSPAN; } SingleDollarMathStart = '$' !(Spacechar | Newline | '$' ) SingleDollarMathEnd = (!'\\' !'$' Nonspacechar) '$' !NonPunctuation SingleDollarMath = < SingleDollarMathStart (!SingleDollarMathEnd !(BlankLine BlankLine) .)* SingleDollarMathEnd > { $$ = str(yytext); $$->key = MATHSPAN; } DoubleDollarMathStart = '$' '$' !(Spacechar | Newline) DoubleDollarMathEnd = (!'\\' Nonspacechar) '$' '$' !NonPunctuation DoubleDollarMath = < DoubleDollarMathStart (!DoubleDollarMathEnd !(BlankLine BlankLine) .)* DoubleDollarMathEnd > { $$ = str(yytext); $$->key = MATHSPAN; } DollarMath = SingleDollarMath | DoubleDollarMath # This keeps the parser from getting bogged down on long strings of '*' or '_', # or strings of '*' or '_' with space on each side: UlOrStarLine = (UlLine | StarLine) { $$ = str(yytext); } StarLine = < "****" '*'* > | < Spacechar '*'+ &Spacechar > UlLine = < "____" '_'* > | < Spacechar '_'+ &Spacechar > Emph = EmphStar | EmphUl Whitespace = Spacechar | Newline EmphStar = '*' !Whitespace a:StartList ( !'*' b:Inline { a = cons(b, a); } | b:StrongStar { a = cons(b, a); } )+ <'*'> { $$ = list(EMPH, a); } EmphUl = '_' !Whitespace a:StartList ( !'_' b:Inline { a = cons(b, a); } | b:StrongUl { a = cons(b, a); } )+ <'_'> { $$ = list(EMPH, a); } Strong = StrongStar | StrongUl StrongStar = "**" !Whitespace a:StartList ( !"**" b:Inline { a = cons(b, a); })+ <"**"> { $$ = list(STRONG, a); } StrongUl = "__" !Whitespace a:StartList ( !"__" b:Inline { a = cons(b, a); })+ <"__"> { $$ = list(STRONG, a); } StrongAndEmph = StrongAndEmphStar | StrongAndEmphUl | EmphAndStrongStar | EmphAndStrongUl StrongAndEmphStar = "***" !Whitespace a:StartList ( !"***" !"**" !"*" b:Inline { a = cons(b, a); })+ <"***"> { $$ = list(EMPH, a); $$ = list(STRONG, $$); } EmphAndStrongStar = "*" &(PossibleEmphStrongStar) a:StartList ( !('*' !'*') b:InlineNoEmph { a = cons(b, a); })+ <"*"> { $$ = list(EMPH, a); } PossibleEmphStrongStar = "**" (!'\n' !'*' .)+ "**" (!'\n' !'*' .)+ '*' StrongAndEmphUl = "___" !Whitespace a:StartList ( !"___" b:Inline { a = cons(b, a); })+ <"___"> { $$ = list(EMPH, a); $$ = list(STRONG, $$); } EmphAndStrongUl = "_" &(PossibleEmphStrongUl) a:StartList ( !('_' !'_') b:InlineNoEmph { a = cons(b, a); })+ <"_"> { $$ = list(EMPH, a); } PossibleEmphStrongUl = "__" (!'\n' !'_' .)+ "__" (!'\n' !'_' .)+ '_' Link = ExplicitLink | ReferenceLink | AutoLink ReferenceLink = ReferenceLinkDouble | ReferenceLinkSingle ReferenceLinkDouble = ( a:Label < Spnl > !"[]" b:Label ) { GString *text = g_string_new(""); print_raw_node_tree(text, b->children); $$ = mk_link(a, text->str, NULL, NULL, NULL); g_string_free(text, TRUE); free_node_tree(b); } ReferenceLinkSingle = ( a:Label < (Spnl "[]")? > ) { $$ = mk_link(a, NULL, NULL, NULL, NULL); /* stash copy of raw source in case we need if */ $$->str = strdup(yytext); } CitationReference = CitationReferenceDouble | CitationReferenceSingle CitationReferenceDouble = !"[]" a:Label < Spnl > !"[]" b:RawCitationReference { char *label; label = label_from_node_tree(a); $$ = mk_link(a, b->str, NULL, NULL, NULL); $$->key = CITATION; if (strcmp(label,"notcited") == 0) $$->key = NOCITATION; free(label); free_node_tree(b); } CitationReferenceSingle = < (( "[]" Spnl b:RawCitationReference ) | ( b:RawCitationReference (Spnl "[]")? )) > { $$ = mk_link(NULL, b->str, NULL, NULL, NULL); $$->key = CITATION; free_node_tree(b); } RawCitationReference = "[#" < ( !Newline !']' . )+ > ']' { $$ = str(yytext); } Variable = "[%" < (!Newline !']' .)+ > ']' { $$ = str(yytext); $$->key = VARIABLE; } ExplicitLink = l:Label Spnl '(' Sp s:Source Spnl t:Title Sp <')'> { $$ = mk_link(l, NULL, s->str, t->str, NULL); free_node_tree(s); free_node_tree(t); } Source = ( '<' < SourceContents > '>' | < SourceContents > ) { $$ = str(yytext); $$->key = SOURCE; } SourceContents = ( ( !'(' !')' !'>' Nonspacechar )+ | '(' SourceContents ')')* Title = ( TitleSingle | TitleDouble | < "" > ) { $$ = str(yytext); $$->key = TITLE; } TitleSingle = '\'' < ( !( '\'' Sp ( ')' | Newline ) ) . )* > '\'' TitleDouble = '"' < ( !( '"' Sp ( ')' | Newline ) ) . )* > '"' AutoLink = AutoLinkUrl | AutoLinkEmail AutoLinkUrl = '<' < [A-Za-z]+ "://" ( !Newline !'>' . )+ > '>' { $$ = mk_link(str(yytext), NULL, yytext, NULL, NULL); } AutoLinkEmail = '<' ( "mailto:" )? < [-A-Za-z0-9+_./!%~$]+ '@' ( !Newline !'>' . )+ > '>' { char *mailto = malloc(strlen(yytext) + 8); sprintf(mailto, "mailto:%s", yytext); $$ = mk_link(str(yytext), NULL, mailto, NULL, NULL); free(mailto); } LinkReference = a:StartList NonindentSpace !"[]" l:Label ':' Spnl s:RefSrc t:RefTitle ( &{ !ext(EXT_COMPATIBILITY) } BlankLine+ { /* Get label for referencing */ GString *text = g_string_new(""); char *clean; print_raw_node_tree(text, l->children); clean = clean_string(text->str); if (a == NULL) { /* no attributes */ $$ = mk_link(NULL, clean, s->str, t->str, NULL); } else { $$ = mk_link(NULL, clean, s->str, t->str, a); } g_string_free(text, TRUE); free_node(s); free_node(t); free_node_tree(l); free(clean); $$->key = LINKREFERENCE; } Attributes = a:StartList (Attribute { a = cons($$,a); })+ { $$ = a; } Attribute = Spnl a:AttrKey '=' b:AttrValue { $$ = a; $$->children = b; } AttrKey = < AlphanumericAscii+ > { char *lab; lab = label_from_string(yytext); $$ = mk_str(lab); $$->key = ATTRKEY; free(lab); } AttrValue = (QuotedValue | UnQuotedValue) { $$ = str(yytext); $$->key = ATTRVALUE; } RefSrc = ( '<' < (!'>' Nonspacechar)+ > '>' | < Nonspacechar+ > ) { char *tmp = strdup(yytext); if ((tmp[0] == '<') && (tmp[strlen(tmp) - 1] == '>')) { tmp[strlen(tmp) - 1] = '\0'; $$ = str(&tmp[1]); } else { $$ = str(yytext); } $$->key = HTML; free(tmp); } RefTitle = ( RefTitleSingle | RefTitleDouble | RefTitleParens | EmptyTitle ) { $$ = str(yytext); $$->key = RAW; } EmptyTitle = < "" > RefTitleSingle = Spnl '\'' < ( !( '\'' Sp Newline | Newline | &{ !ext(EXT_COMPATIBILITY) } '\'' Sp AlphanumericAscii+ '=' ) . )* > '\'' RefTitleDouble = Spnl '"' < ( !('"' Sp Newline | Newline | &{ !ext(EXT_COMPATIBILITY) } '"' Sp AlphanumericAscii+ '=' ) . )* > '"' RefTitleParens = Spnl '(' < ( !(')' Sp Newline | Newline | &{ !ext(EXT_COMPATIBILITY) } ')' Sp AlphanumericAscii+ '=' ) . )* > ')' QuotedValue = '"' < (!'"' .)* > '"' UnQuotedValue = < (AlphanumericAscii | '.')+ > Abbreviation = '*' l:Label Sp ':' Sp RawLine { $$ = str(yytext); $$->key = ABBREVIATION; $$->children = l; } ImageBlock = Image Sp Newline BlankLine+ { if ($$->key == IMAGE) $$->key = IMAGEBLOCK; } Image = '!' ( !AutoLink Link ) { $$->key = IMAGE; } Label = < "[" !'[' ( !'^' !'#' &{ ext(EXT_NOTES) } | &. &{ !ext(EXT_NOTES) } ) a:StartList ( !']' Inline { a = cons($$, a); } )* ']'> { $$ = list(LIST, a); } AutoLabel = '[' < (!Newline !'^' !'#' !'%' . )( !Newline !']' . )+ > ']' &(!(Sp ('(' | '['))) { node *ref; $$ = str(yytext); $$->key = AUTOLABEL; /* And create a LINKREFERENCE so we can use it */ char *label = label_from_string(yytext); GString *anchor = g_string_new(label); g_string_prepend(anchor, "#"); ref = mk_link(NULL, label, anchor->str, NULL, NULL); g_string_free(anchor, true); free(label); ((parser_data *)G->data)->autolabels = cons(ref,((parser_data *)G->data)->autolabels); ref->key = LINKREFERENCE; } RawInline = ( '[' (!']' .)* ']' ) | . NoteReference = &{ ext(EXT_NOTES) } ( "[^" ) < ( !(Newline BlankLine) !']' RawInline )+ > ']' { /* Copy entire label, and ensure we are treated as a paragraph */ GString *original = g_string_new(yytext); g_string_append_c(original,'\n'); g_string_append_c(original,'\n'); /* Create a note reference */ $$ = str(yytext); $$->key = NOTEREFERENCE; /* Include RAW version for parsing in case this is an inline footnote */ node *raw = str(original->str); raw->key = RAW; node *source = list(NOTESOURCE, raw); source->str = strdup(""); $$->children = source; g_string_free(original, true); } Glossary = &{ ext(EXT_NOTES) } a:StartList NonindentSpace ref:RawNoteReference ':' Sp "glossary:" Sp (GlossaryTerm { a = cons($$, a); }) (GlossarySortKey { a = cons($$, a); })? Newline ( RawNoteBlock { a = cons($$, a); } ) ( &Indent RawNoteBlock { a = cons($$, a); } )* { node *label; label = str(ref->str); label->key = GLOSSARYLABEL; a = cons(label,a); $$ = list(GLOSSARYSOURCE, a); $$->str = strdup(ref->str); free_node(ref); } GlossaryTerm = < (!Newline !'(' .)+ > { $$ = mk_list(LIST, NULL); $$->str = 0; $$->children = mk_str(yytext); $$->key = GLOSSARYTERM; } GlossarySortKey = '(' < (!')' !Newline .)* > ')' { $$ = mk_str(yytext); $$->key = GLOSSARYSORTKEY; } Ticks1 = "`" !'`' Ticks2 = "``" !'`' Ticks3 = "```" !'`' Ticks4 = "````" !'`' Ticks5 = "`````" !'`' Code = ( Ticks1 Sp < ( ( !'`' Nonspacechar )+ | !Ticks1 '`'+ | !( Sp Ticks1 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks1 | Ticks2 Sp < ( ( !'`' Nonspacechar )+ | !Ticks2 '`'+ | !( Sp Ticks2 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks2 | Ticks3 Sp < ( ( !'`' Nonspacechar )+ | !Ticks3 '`'+ | !( Sp Ticks3 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks3 | Ticks4 Sp < ( ( !'`' Nonspacechar )+ | !Ticks4 '`'+ | !( Sp Ticks4 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks4 | Ticks5 Sp < ( ( !'`' Nonspacechar )+ | !Ticks5 '`'+ | !( Sp Ticks5 ) ( Spacechar | Newline !BlankLine ) )+ > Sp Ticks5 ) { $$ = str(yytext); $$->key = CODE; } FenceType = Sp RawLine { $$ = str(yytext); $$->key = VERBATIMTYPE; } Fenced = NonindentSpace (( Ticks3 a:FenceType < ( (!(NonindentSpace Ticks3) NonMatchingRawLine)* ) > NonindentSpace Ticks3 Sp Newline ) | ( Ticks4 a:FenceType < ( (!(NonindentSpace Ticks4) NonMatchingRawLine)* ) > NonindentSpace Ticks4 Sp Newline ) | ( Ticks5 a:FenceType < ( (!(NonindentSpace Ticks5) NonMatchingRawLine)* ) > NonindentSpace Ticks5 Sp Newline ) ) { $$ = str(yytext); $$->key = VERBATIMFENCE; $$->children = a; } Smart = &{ ext(EXT_SMART) } ( Ellipsis | Dash | SingleQuoted | DoubleQuoted | BackTickQuoted | Apostrophe ) Apostrophe = <'\''> { $$ = str(yytext); $$->key = APOSTROPHE; } Ellipsis = < ("..." | ". . .") > { $$ = str(yytext); $$->key = ELLIPSIS; } Dash = EmDash | EnDash EnDash = < ( "--" | '-' &Digit) > { $$ = str(yytext); $$->key = ENDASH; } EmDash = ( <"---"> ) { $$ = str(yytext); $$->key = EMDASH; } SingleQuoteStart = '\'' !(Spacechar | Newline) SingleQuoteEnd = '\'' !Alphanumeric SingleQuoted = SingleQuoteStart a:StartList ( !SingleQuoteEnd b:Inline { a = cons(b, a); } )* SingleQuoteEnd { $$ = mk_list(SINGLEQUOTED, a); } DoubleQuoteStart = '"' DoubleQuoteEnd = '"' DoubleQuoted = DoubleQuoteStart a:StartList ( !DoubleQuoteEnd !BackTickEnd !BackTickStart b:Inline { a = cons(b, a); } )* DoubleQuoteEnd { $$ = mk_list(DOUBLEQUOTED, a); } BackTickStart = '``' BackTickEnd = '\'\'' BackTickQuoted = BackTickStart a:StartList ( !DoubleQuoteEnd !BackTickEnd b:Inline { a = cons(b, a); } )* BackTickEnd { $$ = mk_list(DOUBLEQUOTED, a); } NonblankIndentedLine = !BlankLine IndentedLine VerbatimChunk = a:StartList ( BlankLine { a = cons(mk_str("\n"), a); } )* ( NonblankIndentedLine { a = cons($$, a); } )+ { $$ = mk_str_from_list(a, false); } Verbatim = BlankLine* a:StartList ( VerbatimChunk { a = cons($$, a); } )+ BlankLine* { $$ = mk_str_from_list(a, false); $$->key = VERBATIM; } HorizontalRule = NonindentSpace ( '*' Sp '*' Sp '*' (Sp '*')* | '-' Sp '-' Sp '-' (Sp '-')* | '_' Sp '_' Sp '_' (Sp '_')*) Sp Newline BlankLine* { $$ = mk_node(HRULE); } DefinitionList = a:StartList &(TermLine+ Newline? NonindentSpace ':') ( (Term { a = cons($$, a); } )+ BlankLine? (Definition { a = cons($$, a);})+ BlankLine* )+ { $$ = mk_list(LIST, a); $$->key = DEFLIST; } TermLine = !':' !BlankLine (!Newline .)* Newline Term = a:StartList !BlankLine !':' (!Newline !Endline Inline {a = cons($$, a);} )+ Sp Newline { $$ = mk_list(TERM,a); } Definition = (a:StartList b:StartList (BlankLine { b = cons(mk_str("\n"),b); } )? ( NonindentSpace ':' Sp RawLine { a = cons(mk_str(yytext), a);}) ( !':' !BlankLine RawLine { a = cons(mk_str(yytext), a);})* ( BlankLine {a = cons(mk_str("\n"),a);} (IndentedLine { a = cons($$,a);})+ { a = cons(mk_str("\n"),a);} )* ) { if (b != NULL) { a = cons(b,a);} node *raw = mk_str_from_list(a, false); raw->key = RAW; $$ = list(DEFINITION,raw); } Bullet = !HorizontalRule NonindentSpace ('+' | '*' | '-') Spacechar+ BulletNoSpace = !HorizontalRule NonindentSpace ('+' | '*' | '-') BulletList = &Bullet (ListTight | ListLoose) { $$->key = BULLETLIST; } ListTight = a:StartList ( ListItemTight { a = cons($$, a); } )+ BlankLine* !(Bullet | Enumerator | BulletNoSpace &EmptyList | EnumeratorNoSpace &EmptyList ) { $$ = list(LIST, a); } ListLoose = a:StartList ( b:ListItem BlankLine* { node *li; li = b->children; size_t size = strlen(li->str); li->str = realloc(li->str, size + 3); strncat(li->str + size, "\n\n", 2); /* In loose list, \n\n added to end of each element */ a = cons(b, a); } )+ { $$ = list(LIST, a); } ListItem = < ( Bullet | Enumerator | BulletNoSpace &EmptyList | EnumeratorNoSpace &EmptyList )> a:StartList ( EmptyList { a = cons($$, a); } | ( ListBlock { a = cons($$, a); } ( ListContinuationBlock { a = cons($$, a); } )* )) { node *raw; raw = mk_str_from_list(a, false); raw->key = RAW; $$ = node(LISTITEM); $$->children = raw; } ListItemTight = ( Bullet | Enumerator | BulletNoSpace &EmptyList | EnumeratorNoSpace &EmptyList ) a:StartList ( EmptyList { a = cons($$, a); } | ( ListBlock { a = cons($$, a); } ( !BlankLine ListContinuationBlock { a = cons($$, a); } )* !ListContinuationBlock)) { node *raw; raw = mk_str_from_list(a, false); raw->key = RAW; $$ = node(LISTITEM); $$->children = raw; } EmptyList = BlankLine { $$ = mk_str(""); } ListBlock = a:StartList !BlankLine !Heading Line { a = cons($$, a); } ( ListBlockLine { a = cons($$, a); } )* { $$ = mk_str_from_list(a, false); } ListContinuationBlock = a:StartList ( < BlankLine* > { if (strlen(yytext) == 0) a = cons(str("\001"), a); /* block separator */ else a = cons(str(yytext), a); } ) ( Indent ListBlock { a = cons($$, a); } )+ { $$ = mk_str_from_list(a, false); } Enumerator = NonindentSpace [0-9]+ '.' Spacechar+ EnumeratorNoSpace = NonindentSpace [0-9]+ '.' OrderedList = &Enumerator (ListTight | ListLoose) { $$->key = ORDEREDLIST; } ListBlockLine = !BlankLine !Heading !( Indent? (Bullet | Enumerator | BulletNoSpace &EmptyList | EnumeratorNoSpace &EmptyList ) ) !HorizontalRule OptionallyIndentedLine # Parsers for different kinds of block-level HTML content. # This is repetitive due to constraints of PEG grammar. HtmlBlockOpenAddress = '<' Spnl ("address" | "ADDRESS") Spnl HtmlAttribute* '>' HtmlBlockCloseAddress = '<' Spnl '/' ("address" | "ADDRESS") Spnl '>' HtmlBlockAddress = HtmlBlockOpenAddress (HtmlBlockAddress | !HtmlBlockCloseAddress .)* HtmlBlockCloseAddress HtmlBlockOpenArticle = '<' Spnl ("article" | "ARTICLE") Spnl HtmlAttribute* '>' HtmlBlockCloseArticle = '<' Spnl '/' ("article" | "ARTICLE") Spnl '>' HtmlBlockArticle = HtmlBlockOpenArticle (HtmlBlockArticle | !HtmlBlockCloseArticle .)* HtmlBlockCloseArticle HtmlBlockOpenAside = '<' Spnl ("aside" | "ASIDE") Spnl HtmlAttribute* '>' HtmlBlockCloseAside = '<' Spnl '/' ("aside" | "ASIDE") Spnl '>' HtmlBlockAside = HtmlBlockOpenAside (HtmlBlockAside | !HtmlBlockCloseAside .)* HtmlBlockCloseAside HtmlBlockOpenBlockquote = '<' Spnl ("blockquote" | "BLOCKQUOTE") Spnl HtmlAttribute* '>' HtmlBlockCloseBlockquote = '<' Spnl '/' ("blockquote" | "BLOCKQUOTE") Spnl '>' HtmlBlockBlockquote = HtmlBlockOpenBlockquote (HtmlBlockBlockquote | !HtmlBlockCloseBlockquote .)* HtmlBlockCloseBlockquote HtmlBlockOpenCanvas = '<' Spnl ("canvas" | "CANVAS") Spnl HtmlAttribute* '>' HtmlBlockCloseCanvas = '<' Spnl '/' ("canvas" | "CANVAS") Spnl '>' HtmlBlockCanvas = HtmlBlockOpenCanvas (HtmlBlockCanvas | !HtmlBlockCloseCanvas .)* HtmlBlockCloseCanvas HtmlBlockOpenCenter = '<' Spnl ("center" | "CENTER") Spnl HtmlAttribute* '>' HtmlBlockCloseCenter = '<' Spnl '/' ("center" | "CENTER") Spnl '>' HtmlBlockCenter = HtmlBlockOpenCenter (HtmlBlockCenter | !HtmlBlockCloseCenter .)* HtmlBlockCloseCenter HtmlBlockOpenDir = '<' Spnl ("dir" | "DIR") Spnl HtmlAttribute* '>' HtmlBlockCloseDir = '<' Spnl '/' ("dir" | "DIR") Spnl '>' HtmlBlockDir = HtmlBlockOpenDir (HtmlBlockDir | !HtmlBlockCloseDir .)* HtmlBlockCloseDir HtmlBlockOpenDiv = '<' Spnl ("div" | "DIV") Spnl HtmlAttribute* '>' HtmlBlockCloseDiv = '<' Spnl '/' ("div" | "DIV") Spnl '>' HtmlBlockDiv = HtmlBlockOpenDiv (HtmlBlockDiv | !HtmlBlockCloseDiv .)* HtmlBlockCloseDiv HtmlBlockOpenDl = '<' Spnl ("dl" | "DL") Spnl HtmlAttribute* '>' HtmlBlockCloseDl = '<' Spnl '/' ("dl" | "DL") Spnl '>' HtmlBlockDl = HtmlBlockOpenDl (HtmlBlockDl | !HtmlBlockCloseDl .)* HtmlBlockCloseDl HtmlBlockOpenFieldset = '<' Spnl ("fieldset" | "FIELDSET") Spnl HtmlAttribute* '>' HtmlBlockCloseFieldset = '<' Spnl '/' ("fieldset" | "FIELDSET") Spnl '>' HtmlBlockFieldset = HtmlBlockOpenFieldset (HtmlBlockFieldset | !HtmlBlockCloseFieldset .)* HtmlBlockCloseFieldset HtmlBlockOpenFigure = '<' Spnl ("figure" | "FIGURE") Spnl HtmlAttribute* '>' HtmlBlockCloseFigure = '<' Spnl '/' ("figure" | "FIGURE") Spnl '>' HtmlBlockFigure = HtmlBlockOpenFigure (HtmlBlockFigure | !HtmlBlockCloseFigure .)* HtmlBlockCloseFigure HtmlBlockOpenFooter = '<' Spnl ("footer" | "FOOTER") Spnl HtmlAttribute* '>' HtmlBlockCloseFooter = '<' Spnl '/' ("footer" | "FOOTER") Spnl '>' HtmlBlockFooter = HtmlBlockOpenFooter (HtmlBlockFooter | !HtmlBlockCloseFooter .)* HtmlBlockCloseFooter HtmlBlockOpenForm = '<' Spnl ("form" | "FORM") Spnl HtmlAttribute* '>' HtmlBlockCloseForm = '<' Spnl '/' ("form" | "FORM") Spnl '>' HtmlBlockForm = HtmlBlockOpenForm (HtmlBlockForm | !HtmlBlockCloseForm .)* HtmlBlockCloseForm HtmlBlockOpenHeader = '<' Spnl ("header" | "HEADER") Spnl HtmlAttribute* '>' HtmlBlockCloseHeader = '<' Spnl '/' ("header" | "HEADER") Spnl '>' HtmlBlockHeader = HtmlBlockOpenHeader (HtmlBlockHeader | !HtmlBlockCloseHeader .)* HtmlBlockCloseHeader HtmlBlockOpenHgroup = '<' Spnl ("hgroup" | "HGROUP") Spnl HtmlAttribute* '>' HtmlBlockCloseHgroup = '<' Spnl '/' ("hgroup" | "HGROUP") Spnl '>' HtmlBlockHgroup = HtmlBlockOpenHgroup (HtmlBlockHgroup | !HtmlBlockCloseHgroup .)* HtmlBlockCloseHgroup HtmlBlockOpenH1 = '<' Spnl ("h1" | "H1") Spnl HtmlAttribute* '>' HtmlBlockCloseH1 = '<' Spnl '/' ("h1" | "H1") Spnl '>' HtmlBlockH1 = HtmlBlockOpenH1 (HtmlBlockH1 | !HtmlBlockCloseH1 .)* HtmlBlockCloseH1 HtmlBlockOpenH2 = '<' Spnl ("h2" | "H2") Spnl HtmlAttribute* '>' HtmlBlockCloseH2 = '<' Spnl '/' ("h2" | "H2") Spnl '>' HtmlBlockH2 = HtmlBlockOpenH2 (HtmlBlockH2 | !HtmlBlockCloseH2 .)* HtmlBlockCloseH2 HtmlBlockOpenH3 = '<' Spnl ("h3" | "H3") Spnl HtmlAttribute* '>' HtmlBlockCloseH3 = '<' Spnl '/' ("h3" | "H3") Spnl '>' HtmlBlockH3 = HtmlBlockOpenH3 (HtmlBlockH3 | !HtmlBlockCloseH3 .)* HtmlBlockCloseH3 HtmlBlockOpenH4 = '<' Spnl ("h4" | "H4") Spnl HtmlAttribute* '>' HtmlBlockCloseH4 = '<' Spnl '/' ("h4" | "H4") Spnl '>' HtmlBlockH4 = HtmlBlockOpenH4 (HtmlBlockH4 | !HtmlBlockCloseH4 .)* HtmlBlockCloseH4 HtmlBlockOpenH5 = '<' Spnl ("h5" | "H5") Spnl HtmlAttribute* '>' HtmlBlockCloseH5 = '<' Spnl '/' ("h5" | "H5") Spnl '>' HtmlBlockH5 = HtmlBlockOpenH5 (HtmlBlockH5 | !HtmlBlockCloseH5 .)* HtmlBlockCloseH5 HtmlBlockOpenH6 = '<' Spnl ("h6" | "H6") Spnl HtmlAttribute* '>' HtmlBlockCloseH6 = '<' Spnl '/' ("h6" | "H6") Spnl '>' HtmlBlockH6 = HtmlBlockOpenH6 (HtmlBlockH6 | !HtmlBlockCloseH6 .)* HtmlBlockCloseH6 HtmlBlockOpenMenu = '<' Spnl ("menu" | "MENU") Spnl HtmlAttribute* '>' HtmlBlockCloseMenu = '<' Spnl '/' ("menu" | "MENU") Spnl '>' HtmlBlockMenu = HtmlBlockOpenMenu (HtmlBlockMenu | !HtmlBlockCloseMenu .)* HtmlBlockCloseMenu HtmlBlockOpenNoframes = '<' Spnl ("noframes" | "NOFRAMES") Spnl HtmlAttribute* '>' HtmlBlockCloseNoframes = '<' Spnl '/' ("noframes" | "NOFRAMES") Spnl '>' HtmlBlockNoframes = HtmlBlockOpenNoframes (HtmlBlockNoframes | !HtmlBlockCloseNoframes .)* HtmlBlockCloseNoframes HtmlBlockOpenNoscript = '<' Spnl ("noscript" | "NOSCRIPT") Spnl HtmlAttribute* '>' HtmlBlockCloseNoscript = '<' Spnl '/' ("noscript" | "NOSCRIPT") Spnl '>' HtmlBlockNoscript = HtmlBlockOpenNoscript (HtmlBlockNoscript | !HtmlBlockCloseNoscript .)* HtmlBlockCloseNoscript HtmlBlockOpenOl = '<' Spnl ("ol" | "OL") Spnl HtmlAttribute* '>' HtmlBlockCloseOl = '<' Spnl '/' ("ol" | "OL") Spnl '>' HtmlBlockOl = HtmlBlockOpenOl (HtmlBlockOl | !HtmlBlockCloseOl .)* HtmlBlockCloseOl HtmlBlockOpenP = '<' Spnl ("p" | "P") Spnl HtmlAttribute* '>' HtmlBlockCloseP = '<' Spnl '/' ("p" | "P") Spnl '>' HtmlBlockP = HtmlBlockOpenP (HtmlBlockP | !HtmlBlockCloseP .)* HtmlBlockCloseP HtmlBlockOpenPre = '<' Spnl ("pre" | "PRE") Spnl HtmlAttribute* '>' HtmlBlockClosePre = '<' Spnl '/' ("pre" | "PRE") Spnl '>' HtmlBlockPre = HtmlBlockOpenPre (HtmlBlockPre | !HtmlBlockClosePre .)* HtmlBlockClosePre HtmlBlockOpenProgress = '<' Spnl ("progress" | "PROGRESS") Spnl HtmlAttribute* '>' HtmlBlockCloseProgress = '<' Spnl '/' ("progress" | "PROGRESS") Spnl '>' HtmlBlockProgress = HtmlBlockOpenProgress (HtmlBlockProgress | !HtmlBlockCloseProgress .)* HtmlBlockCloseProgress HtmlBlockOpenSection = '<' Spnl ("section" | "SECTION") Spnl HtmlAttribute* '>' HtmlBlockCloseSection = '<' Spnl '/' ("section" | "SECTION") Spnl '>' HtmlBlockSection = HtmlBlockOpenSection (HtmlBlockSection | !HtmlBlockCloseSection .)* HtmlBlockCloseSection HtmlBlockOpenTable = '<' Spnl ("table" | "TABLE") Spnl HtmlAttribute* '>' HtmlBlockCloseTable = '<' Spnl '/' ("table" | "TABLE") Spnl '>' HtmlBlockTable = HtmlBlockOpenTable (HtmlBlockTable | !HtmlBlockCloseTable .)* HtmlBlockCloseTable HtmlBlockOpenUl = '<' Spnl ("ul" | "UL") Spnl HtmlAttribute* '>' HtmlBlockCloseUl = '<' Spnl '/' ("ul" | "UL") Spnl '>' HtmlBlockUl = HtmlBlockOpenUl (HtmlBlockUl | !HtmlBlockCloseUl .)* HtmlBlockCloseUl HtmlBlockOpenVideo = '<' Spnl ("video" | "VIDEO") Spnl HtmlAttribute* '>' HtmlBlockCloseVideo = '<' Spnl '/' ("video" | "VIDEO") Spnl '>' HtmlBlockVideo = HtmlBlockOpenVideo (HtmlBlockVideo | !HtmlBlockCloseVideo .)* HtmlBlockCloseVideo HtmlBlockOpenDd = '<' Spnl ("dd" | "DD") Spnl HtmlAttribute* '>' HtmlBlockCloseDd = '<' Spnl '/' ("dd" | "DD") Spnl '>' HtmlBlockDd = HtmlBlockOpenDd (HtmlBlockDd | !HtmlBlockCloseDd .)* HtmlBlockCloseDd HtmlBlockOpenDt = '<' Spnl ("dt" | "DT") Spnl HtmlAttribute* '>' HtmlBlockCloseDt = '<' Spnl '/' ("dt" | "DT") Spnl '>' HtmlBlockDt = HtmlBlockOpenDt (HtmlBlockDt | !HtmlBlockCloseDt .)* HtmlBlockCloseDt HtmlBlockOpenFrameset = '<' Spnl ("frameset" | "FRAMESET") Spnl HtmlAttribute* '>' HtmlBlockCloseFrameset = '<' Spnl '/' ("frameset" | "FRAMESET") Spnl '>' HtmlBlockFrameset = HtmlBlockOpenFrameset (HtmlBlockFrameset | !HtmlBlockCloseFrameset .)* HtmlBlockCloseFrameset HtmlBlockOpenLi = '<' Spnl ("li" | "LI") Spnl HtmlAttribute* '>' HtmlBlockCloseLi = '<' Spnl '/' ("li" | "LI") Spnl '>' HtmlBlockLi = HtmlBlockOpenLi (HtmlBlockLi | !HtmlBlockCloseLi .)* HtmlBlockCloseLi HtmlBlockOpenTbody = '<' Spnl ("tbody" | "TBODY") Spnl HtmlAttribute* '>' HtmlBlockCloseTbody = '<' Spnl '/' ("tbody" | "TBODY") Spnl '>' HtmlBlockTbody = HtmlBlockOpenTbody (HtmlBlockTbody | !HtmlBlockCloseTbody .)* HtmlBlockCloseTbody HtmlBlockOpenTd = '<' Spnl ("td" | "TD") Spnl HtmlAttribute* '>' HtmlBlockCloseTd = '<' Spnl '/' ("td" | "TD") Spnl '>' HtmlBlockTd = HtmlBlockOpenTd (HtmlBlockTd | !HtmlBlockCloseTd .)* HtmlBlockCloseTd HtmlBlockOpenTfoot = '<' Spnl ("tfoot" | "TFOOT") Spnl HtmlAttribute* '>' HtmlBlockCloseTfoot = '<' Spnl '/' ("tfoot" | "TFOOT") Spnl '>' HtmlBlockTfoot = HtmlBlockOpenTfoot (HtmlBlockTfoot | !HtmlBlockCloseTfoot .)* HtmlBlockCloseTfoot HtmlBlockOpenTh = '<' Spnl ("th" | "TH") Spnl HtmlAttribute* '>' HtmlBlockCloseTh = '<' Spnl '/' ("th" | "TH") Spnl '>' HtmlBlockTh = HtmlBlockOpenTh (HtmlBlockTh | !HtmlBlockCloseTh .)* HtmlBlockCloseTh HtmlBlockOpenThead = '<' Spnl ("thead" | "THEAD") Spnl HtmlAttribute* '>' HtmlBlockCloseThead = '<' Spnl '/' ("thead" | "THEAD") Spnl '>' HtmlBlockThead = HtmlBlockOpenThead (HtmlBlockThead | !HtmlBlockCloseThead .)* HtmlBlockCloseThead HtmlBlockOpenTr = '<' Spnl ("tr" | "TR") Spnl HtmlAttribute* '>' HtmlBlockCloseTr = '<' Spnl '/' ("tr" | "TR") Spnl '>' HtmlBlockTr = HtmlBlockOpenTr (HtmlBlockTr | !HtmlBlockCloseTr .)* HtmlBlockCloseTr HtmlBlockOpenScript = '<' Spnl ("script" | "SCRIPT") Spnl HtmlAttribute* '>' HtmlBlockCloseScript = '<' Spnl '/' ("script" | "SCRIPT") Spnl '>' HtmlBlockScript = HtmlBlockOpenScript (!HtmlBlockCloseScript .)* HtmlBlockCloseScript HtmlBlockOpenHead = '<' Spnl ("head" | "HEAD") Spnl HtmlAttribute* '>' HtmlBlockCloseHead = '<' Spnl '/' ("head" | "HEAD") Spnl '>' HtmlBlockHead = HtmlBlockOpenHead (!HtmlBlockCloseHead .)* HtmlBlockCloseHead HtmlBlockOpenMain = '<' Spnl ("main" | "MAIN") Spnl HtmlAttribute* '>' HtmlBlockCloseMain = '<' Spnl '/' ("main" | "MAIN") Spnl '>' HtmlBlockMain = HtmlBlockOpenMain (!HtmlBlockCloseMain .)* HtmlBlockCloseMain HtmlBlockOpenNav = '<' Spnl ("nav" | "NAV") Spnl HtmlAttribute* '>' HtmlBlockCloseNav = '<' Spnl '/' ("nav" | "NAV") Spnl '>' HtmlBlockNav = HtmlBlockOpenNav (!HtmlBlockCloseNav .)* HtmlBlockCloseNav HtmlBlockOpenDel = '<' Spnl ("del" | "DEL") Spnl HtmlAttribute* '>' HtmlBlockCloseDel = '<' Spnl '/' ("del" | "DEL") Spnl '>' HtmlBlockDel = HtmlBlockOpenDel (!HtmlBlockCloseDel .)* HtmlBlockCloseDel HtmlBlockOpenIns = '<' Spnl ("ins" | "INS") Spnl HtmlAttribute* '>' HtmlBlockCloseIns = '<' Spnl '/' ("ins" | "INS") Spnl '>' HtmlBlockIns = HtmlBlockOpenIns (!HtmlBlockCloseIns .)* HtmlBlockCloseIns HtmlBlockOpenMark = '<' Spnl ("mark" | "MARK") Spnl HtmlAttribute* '>' HtmlBlockCloseMark = '<' Spnl '/' ("mark" | "MARK") Spnl '>' HtmlBlockMark = HtmlBlockOpenMark (!HtmlBlockCloseMark .)* HtmlBlockCloseMark HtmlBlockInTags = HtmlBlockAddress | HtmlBlockArticle | HtmlBlockAside | HtmlBlockCanvas | HtmlBlockBlockquote | HtmlBlockCenter | HtmlBlockDir | HtmlBlockDiv | HtmlBlockDl | HtmlBlockFieldset | HtmlBlockFigure | HtmlBlockFooter | HtmlBlockForm | HtmlBlockHeader | HtmlBlockHgroup | HtmlBlockH1 | HtmlBlockH2 | HtmlBlockH3 | HtmlBlockH4 | HtmlBlockH5 | HtmlBlockH6 | HtmlBlockMenu | HtmlBlockNoframes | HtmlBlockNoscript | HtmlBlockOl | HtmlBlockP | HtmlBlockPre | HtmlBlockProgress | HtmlBlockSection | HtmlBlockTable | HtmlBlockUl | HtmlBlockVideo | HtmlBlockDd | HtmlBlockDt | HtmlBlockFrameset | HtmlBlockLi | HtmlBlockTbody | HtmlBlockTd | HtmlBlockTfoot | HtmlBlockTh | HtmlBlockThead | HtmlBlockTr | HtmlBlockScript | HtmlBlockHead | HtmlBlockMain | HtmlBlockNav | HtmlBlockDel | HtmlBlockIns | HtmlBlockMark HtmlBlock = !MarkdownHtmlTagOpen < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing ) > BlankLine+ { if (ext(EXT_FILTER_HTML)) { $$ = mk_list(LIST, NULL); } else { $$ = str(yytext); if ( ext(EXT_PROCESS_HTML)) $$->key = RAW; else $$->key = HTMLBLOCK; } } MarkdownHtmlBlock = &MarkdownHtmlTagOpen < ( HtmlBlockInTags | HtmlComment | HtmlBlockSelfClosing) > BlankLine+ { $$ = str(yytext); $$->key = RAW; } MarkdownHtmlAttribute = ("markdown" | "MARKDOWN") Spnl '=' Spnl ('"' Spnl)? "1" (Spnl '"')? Spnl MarkdownHtmlTagOpen = a:StartList '<' {a = cons(mk_str("<"),a);} Spnl {a = cons(mk_str(yytext),a);} &Spacechar Spnl (!MarkdownHtmlAttribute {a = cons(mk_str(" "),a); a = cons(mk_str(yytext),a);})* MarkdownHtmlAttribute ( {a = cons(mk_str(" "),a); a = cons(mk_str(yytext),a);})* '>' { a = cons(mk_str(">"),a);} { $$ = mk_str_from_list(a,false); $$->key = HTML; } HtmlBlockSelfClosing = '<' Spnl HtmlBlockType Spnl HtmlAttribute* '/' Spnl '>' HtmlBlockType = "address" | "blockquote" | "center" | "dir" | "div" | "dl" | "fieldset" | "form" | "h1" | "h2" | "h3" | "h4" | "h5" | "h6" | "hr" | "isindex" | "menu" | "noframes" | "noscript" | "ol" | "pre" | "p" | "table" | "ul" | "dd" | "dt" | "frameset" | "li" | "tbody" | "td" | "tfoot" | "th" | "thead" | "tr" | "script" | "ADDRESS" | "BLOCKQUOTE" | "CENTER" | "DIR" | "DIV" | "DL" | "FIELDSET" | "FORM" | "H1" | "H2" | "H3" | "H4" | "H5" | "H6" | "HR" | "ISINDEX" | "MENU" | "NOFRAMES" | "NOSCRIPT" | "OL" | "PRE" | "P" | "TABLE" | "UL" | "DD" | "DT" | "FRAMESET" | "LI" | "TBODY" | "TD" | "TFOOT" | "TH" | "THEAD" | "TR" | "SCRIPT" StyleOpen = '<' Spnl ("style" | "STYLE") Spnl HtmlAttribute* '>' StyleClose = '<' Spnl '/' ("style" | "STYLE") Spnl '>' InStyleTags = StyleOpen (!StyleClose .)* StyleClose StyleBlock = < InStyleTags > BlankLine* { if (ext(EXT_FILTER_STYLES)) { $$ = mk_list(LIST, NULL); } else { $$ = mk_str(yytext); $$->key = HTMLBLOCK; } } Table = a:StartList b:StartList (TableCaption { b = cons($$, b);})? (TableBody { $$->key = TABLEHEAD; a = cons($$, a); })? (SeparatorLine { if (a == NULL) a = $$; else append_list($$,a); } ) (TableBody { a = cons($$, a);} ) (BlankLine !TableCaption TableBody { a = cons($$, a); } &(TableCaption | BlankLine | Heading) )* ( (TableCaption { b = cons($$, b);} &BlankLine) | &BlankLine | &Heading) # Requires blank line to end table "block" { if (b != NULL) { append_list(b,a); }; $$ = list(TABLE, a); } TableBody = a:StartList (TableRow {a = cons($$, a);})+ { $$ = list(TABLEBODY, a);} TableRow = a:StartList (!SeparatorLine &(TableLine) CellDivider? (TableCell { a = cons($$, a); })+ ) Sp { $$ = list(TABLEROW, a); } TableLine = (!Newline !CellDivider .)* CellDivider TableCell = ExtendedCell | EmptyCell | FullCell ExtendedCell = (EmptyCell | FullCell) { node *span; span = str(yytext); span->key = CELLSPAN; span->next = $$->children; $$->children = span; } FullCell = Sp a:StartList ((!Newline !Endline !CellDivider !(Sp &CellDivider) Inline ) { a = cons($$,a)})+ ( CellDivider )? { $$ = list(TABLECELL,a); } EmptyCell = CellDivider { $$ = node(TABLECELL); } SeparatorLine = a:StartList &(TableLine) CellDivider? ( &HeaderAlignmentCell AlignmentCell { a = cons(str("h"), a); a = cons($$, a);} | AlignmentCell { a = cons($$, a); })+ Sp Newline { $$ = mk_str_from_list(a,false); $$->key = TABLESEPARATOR; } HeaderAlignmentCell = Sp ':'? '='+ AlignmentCell = Sp (!CellDivider ( LeftAlignWrap | CenterAlignWrap | RightAlignWrap | LeftAlign | CenterAlign | RightAlign)) Sp ( CellDivider )? LeftAlignWrap = ':'? ('-'+ | '='+) '+' &(!'-' !'=' !':') { $$ = mk_str("L");} LeftAlign = ':'? ('-'+ | '='+) &(!'-' !':') { $$ = mk_str("l");} CenterAlignWrap = ':' ('-'+ | '='+)? '+' ':' &(!'-' !'=' !':') { $$ = mk_str("C");} CenterAlign = ':' ('-'+ | '='+)? ':' &(!'-' !'=' !':') { $$ = mk_str("c");} RightAlignWrap = ('-'+ | '='+) ':' '+' &(!'-' !'=' !':') { $$ = mk_str("R");} RightAlign = ('-'+ | '='+)':' &(!'-' !'=' !':') { $$ = mk_str("r");} CellDivider = '|' TableCaption = b:StartList a:Label ( c:AutoLabel { b = c; b->key = TABLELABEL;})? Sp Newline { $$ = a; $$->key = TABLECAPTION; if ( (b != NULL) && (b->key == TABLELABEL) ) { b->next = $$->children; $$->children = b; } } RawNoteReference = ( "[^" | "[#" ) < ( !Newline !']' . )+ > ']' { $$ = str(yytext); } Note = &{ ext(EXT_NOTES) } NonindentSpace ref:RawNoteReference ':' Sp a:StartList ( RawNoteBlock { a = cons($$, a); } ) ( &Indent RawNoteBlock { a = cons($$, a); } )* { node *label; label = str(ref->str); label->key = NOTELABEL; a = cons(label,a); $$ = list(NOTESOURCE, a); $$->str = strdup(ref->str); free_node(ref); } RawNoteBlock = a:StartList ( !BlankLine !(NonindentSpace RawNoteReference ':') OptionallyIndentedLine { a = cons($$, a); } )+ ( < BlankLine* > { a = cons(str(yytext), a); } ) { $$ = mk_str_from_list(a, true); $$->key = RAW; } DocForOPML = BOM? a:StartList ( &{ !ext(EXT_COMPATIBILITY) } &( (YAMLStart)? MetaDataKey Sp ':' Sp (!Newline)) MetaData { a = cons($$, a); })? ( OPMLBlock { a = cons($$, a); } )* BlankLine* { ((parser_data *)G->data)->result = reverse_list(a); } OPMLBlock = BlankLine* ( OPMLHeadingSection | OPMLPlain ) OPMLHeadingSection = a:StartList OPMLHeading { a = cons($$, a); } (OPMLSectionBlock {a = cons($$, a); })* { $$ = mk_list(HEADINGSECTION, a);} OPMLHeading = OPMLAtxHeading | OPMLSetextHeading OPMLAtxHeading = &(Heading) s:AtxStart Sp < (!Newline !(Sp '#'* Sp Newline) .)* > (Sp '#'+)? Sp Newline { $$ = str(yytext); $$->key = s->key; free_node(s); } OPMLSetextHeading = OPMLSetextHeading1 | OPMLSetextHeading2 OPMLSetextHeading1 = < (!'\r' !'\n' .)* > Newline SetextBottom1 { $$ = str(yytext); $$->key = H1; } OPMLSetextHeading2 = < (!'\r' !'\n' .)* > Newline SetextBottom2 { $$ = str(yytext); $$->key = H2; } OPMLSectionBlock = BlankLine* !OPMLHeading OPMLPlain OPMLPlain = a:StartList (!BlankLine !Heading Line { a = cons($$,a); })+ { $$ = mk_list(PLAIN, a); } DocForTOC = BOM? a: StartList (&( (YAMLStart)? MetaDataKey Sp ':' Sp (!Newline)) z:MetaData { free_node_tree(z); } )? ( TOCBlock { a = cons($$, a); } | TOCPlain)* BlankLine* { ((parser_data *)G->data)->result = reverse_list(a); } TOCBlock = BlankLine* TOCHeadingSection TOCHeadingSection = a:StartList Heading { a = cons($$, a); } (TOCSectionBlock )* { $$ = mk_list(HEADINGSECTION, a); } TOCSectionBlock = BlankLine* !Heading TOCPlain TOCPlain = ( (y:Fenced { free_node_tree(y); } ) | (!BlankLine !Heading z:Line { free_node_tree(z); }) )+ # Critic Markup CriticMarkup = CriticAddition | CriticDeletion | CriticSubstitution | CriticHighlight | CriticComment CriticAddition = ('{++' < (!'++}' .)* > '++}') { $$ = str(yytext); $$->key = CRITICADDITION; } CriticDeletion = ('{--' < (!'--}' .)* > '--}') { $$ = str(yytext); $$->key = CRITICDELETION; } CriticSubstitution = a:StartList ( '{~~' CriticSubstDel { a = cons($$,a); } '~>' CriticSubstAdd { a = cons($$,a); } '~~}') { $$ = list(CRITICSUBSTITUTION, a); } CriticSubstDel = < (!'~>' .)* > { $$ = str(yytext); $$->key = CRITICDELETION; } CriticSubstAdd = < (!'~~}' .)* > { $$ = str(yytext); $$->key = CRITICADDITION; } CriticHighlight = ('{==' < (!'==}' .)* > '==}') { $$ = str(yytext); $$->key = CRITICHIGHLIGHT; } CriticComment = ('{>>' < (!'<<}' .)* > '<<}') { $$ = str(yytext); $$->key = CRITICCOMMENT; } DocForCritic = BOM? a:StartList ( CriticString { a = cons($$, a); } | RawString { a = cons($$, a); } | FalseMatch { a = cons($$, a); } )* ( RawString { a = cons($$, a); } )* Eof { ((parser_data *)G->data)->result = reverse_list(a); } CriticString = CriticMarkup RawString = < (!( "{++" | "{--" | "{~~" | "{==" | "{>>" ) .)+ > { $$ = str(yytext); } FalseMatch = < ( "{++" | "{--" | "{~~" | "{==" | "{>>" ) > { $$ = str(yytext); } %% /* process_raw_blocks -- follow the tree and process any RAW nodes and insert them into the tree */ node * process_raw_blocks(node * n, unsigned long extensions) { /* from the parser data we get the parent node and pointer to reference list */ node *current = NULL; node *last_child = NULL; char *contents; char *saveptr = NULL; GREG g; current = n; while (current != NULL) { if (current->key == RAW) { /* Process this RAW block */ yyinit(&g); contents = strtok_r(current->str, "\001", &saveptr); current->key = LIST; g.data = mk_parser_data(contents, (extensions | EXT_NO_METADATA )); while (yyparse(&g)); current->children = ((parser_data *)g.data)->result; free((parser_data *)g.data); /* we're not using free_parser_data to preserve result tree */ yydeinit(&g); last_child = current->children; while ((contents = strtok_r(NULL, "\001", &saveptr))) { while (last_child->next != NULL) last_child = last_child->next; yyinit(&g); g.data = mk_parser_data(contents, (extensions | EXT_NO_METADATA )); while (yyparse(&g)); last_child->next = ((parser_data *)g.data)->result; free((parser_data *)g.data); yydeinit(&g); } free(current->str); current->str = NULL; } if (current->children != NULL) { /* Recurse into children */ current->children = process_raw_blocks(current->children, extensions); } current = current->next; } return n; } char * markdown_to_string(const char * source, unsigned long extensions, int format) { char *out; char *formatted; char *critic_resolved; char *target_meta_key = FALSE; char *temp; node *refined; GREG g; /* create parser context */ yyinit(&g); /* Check for beamer mode in metadata */ target_meta_key = extract_metadata_value(source, extensions, "latexmode"); if (target_meta_key != NULL) { temp = label_from_string(target_meta_key); if (strcmp(temp, "beamer") == 0) { extensions = extensions | EXT_HEADINGSECTION; } free(temp); } free(target_meta_key); /* Resolve Critic Markup before parsing */ if ((extensions & EXT_CRITIC_ACCEPT) || (extensions & EXT_CRITIC_REJECT)) { g.data = mk_parser_data(source, extensions); while (yyparse_from(&g, yy_DocForCritic)); if (extensions & EXT_CRITIC_REJECT) { if ((extensions & EXT_CRITIC_ACCEPT) && (format == HTML_FORMAT)) critic_resolved = export_node_tree(((parser_data *)g.data)->result, CRITIC_HTML_HIGHLIGHT_FORMAT, extensions); else critic_resolved = export_node_tree(((parser_data *)g.data)->result, CRITIC_REJECT_FORMAT, extensions); } else { critic_resolved = export_node_tree(((parser_data *)g.data)->result, CRITIC_ACCEPT_FORMAT , extensions); } free_parser_data((parser_data *)g.data); yydeinit(&g); yyinit(&g); formatted = preformat_text(critic_resolved); free(critic_resolved); } else { formatted = preformat_text(source); } g.data = mk_parser_data(formatted,extensions); if (format == OPML_FORMAT) { while (yyparse_from(&g, yy_DocForOPML)); /* We want simpler version */ } else if (format == TOC_FORMAT) { while (yyparse_from(&g, yy_DocForTOC)); /* We want simpler version */ } else { while (yyparse(&g)); /* parse */ } if (((parser_data *)g.data)->parse_aborted) { /* clean up */ free_parser_data((parser_data *)g.data); yydeinit(&g); free(formatted); out = strdup("MultiMarkdown was unable to parse this file."); return out; } refined = process_raw_blocks(((parser_data *)g.data)->result, extensions); /* iteratively parse RAW bits */ /* move autolabels to main parse tree */ if (((parser_data *)g.data)->autolabels != NULL) { // fprintf(stderr, "We have autolabels\n"); append_list(((parser_data *)g.data)->autolabels,refined); ((parser_data *)g.data)->autolabels = NULL; } else { // fprintf(stderr, "No autolabels\n"); } /* Show what we got */ out = export_node_tree(refined, format, extensions); /* clean up */ free_parser_data((parser_data *)g.data); yydeinit(&g); free(formatted); return out; } /* has_metadata -- determine whether metadata exists or not */ bool has_metadata(const char *source, unsigned long extensions) { char *formatted; GREG g; yyinit(&g); node *result; bool answer = FALSE; formatted = preformat_text(source); g.data = mk_parser_data(formatted, extensions); while (yyparse_from(&g, yy_DocForMetaDataOnly)); /* We want simpler version */ result = ((parser_data *)g.data)->result; if ((result != NULL) && (result->key == METADATA)) { answer = TRUE; } free_parser_data((parser_data *)g.data); yydeinit(&g); free(formatted); return answer; } /* extract_metadata_keys -- return list of metadata keys as "\n" separated list */ char * extract_metadata_keys(const char *source, unsigned long extensions) { char *out; char *formatted; GREG g; yyinit(&g); /* don't preformat if doing syntax highlighting */ formatted = preformat_text(source); g.data = mk_parser_data(formatted,extensions); while (yyparse_from(&g, yy_DocForMetaDataOnly)); /* We want simpler version */ if (((parser_data *)g.data)->parse_aborted) { out = strdup("MultiMarkdown was unable to parse this file."); } else { out = metadata_keys(((parser_data *)g.data)->result); } free_parser_data((parser_data *)g.data); yydeinit(&g); free(formatted); return out; } /* extract_metadata_value -- find the value and return it */ char * extract_metadata_value(const char *source, unsigned long extensions, char *key) { char *out; char *formatted; GREG g; yyinit(&g); /* don't preformat if doing syntax highlighting */ formatted = preformat_text(source); g.data = mk_parser_data(formatted,extensions); while (yyparse_from(&g, yy_DocForMetaDataOnly)); /* We want simpler version */ if (((parser_data *)g.data)->parse_aborted) { out = strdup("MultiMarkdown was unable to parse this file."); } else { out = metavalue_for_key(key,((parser_data *)g.data)->result); } free_parser_data((parser_data *)g.data); yydeinit(&g); free(formatted); return out; }