%option 8bit nodefault %{ #include #include #include #include #include "g2m.h" %} nl (\r\n|\r|\n) ws [ \t\r\n]+ open "<" close ">" namestart [A-Za-z\200-\377_] namechar [A-Za-z\200-\377_0-9.-:] name {namestart}{namechar}* comment {open}"!--"([^-]|"-"[^-])*"--"{close} string \"([^"])*\"|\'([^'])*\' data {string}|[^<]+ whatever {string}|{name} /* * The CONTENT mode is used for the content of elements, i.e., * between the ">" and "<" of element tags. * The INITIAL mode is used outside the top level element * and inside markup. */ %s CONTENT %% {ws} {yylval.s=strdup(yytext); return WS;} "/" {return SLASH;} "=" {return EQ;} {close} {BEGIN(CONTENT); return CLOSE;} {whatever} {yylval.s = strdup(yytext); return NAME;} {open}{name} {BEGIN(INITIAL); yylval.s=strdup(&yytext[1]); return START;} {open}"!"{name} {BEGIN(INITIAL); yylval.s=strdup(&yytext[2]); return STARTHDR;} {open}"/" {BEGIN(INITIAL); return END;} {comment} {yylval.s = strdup(yytext); return COMMENT;} {data} {yylval.s = strdup(yytext); return DATA;} . {fprintf(stderr, "!ERROR(%c)\n", *yytext);}