trfc822.y - plan9port - [fork] Plan 9 from user space
git clone git://src.adamsgaard.dk/plan9port
Log
Files
Refs
README
LICENSE
---
trfc822.y (13421B)
---
     1 %{
     2 #include "common.h"
     3 #include "smtp.h"
     4 #include 
     5 
     6 char        *yylp;                /* next character to be lex'd */
     7 int        yydone;                /* tell yylex to give up */
     8 char        *yybuffer;        /* first parsed character */
     9 char        *yyend;                /* end of buffer to be parsed */
    10 Node        *root;
    11 Field        *firstfield;
    12 Field        *lastfield;
    13 Node        *usender;
    14 Node        *usys;
    15 Node        *udate;
    16 char        *startfield, *endfield;
    17 int        originator;
    18 int        destination;
    19 int        date;
    20 int        received;
    21 int        messageid;
    22 %}
    23 
    24 %term WORD
    25 %term DATE
    26 %term RESENT_DATE
    27 %term RETURN_PATH
    28 %term FROM
    29 %term SENDER
    30 %term REPLY_TO
    31 %term RESENT_FROM
    32 %term RESENT_SENDER
    33 %term RESENT_REPLY_TO
    34 %term SUBJECT
    35 %term TO
    36 %term CC
    37 %term BCC
    38 %term RESENT_TO
    39 %term RESENT_CC
    40 %term RESENT_BCC
    41 %term REMOTE
    42 %term PRECEDENCE
    43 %term MIMEVERSION
    44 %term CONTENTTYPE
    45 %term MESSAGEID
    46 %term RECEIVED
    47 %term MAILER
    48 %term BADTOKEN
    49 %start msg
    50 %%
    51 
    52 msg                : fields
    53                 | unixfrom '\n' fields
    54                 ;
    55 fields                : '\n'
    56                         { yydone = 1; }
    57                 | field '\n'
    58                 | field '\n' fields
    59                 ;
    60 field                : dates
    61                         { date = 1; }
    62                 | originator
    63                         { originator = 1; }
    64                 | destination
    65                         { destination = 1; }
    66                 | subject
    67                 | optional
    68                 | ignored
    69                 | received
    70                 | precedence
    71                 | error '\n' field
    72                 ;
    73 unixfrom        : FROM route_addr unix_date_time REMOTE FROM word
    74                         { freenode($1); freenode($4); freenode($5);
    75                           usender = $2; udate = $3; usys = $6;
    76                         }
    77                 ;
    78 originator        : REPLY_TO ':' address_list
    79                         { newfield(link3($1, $2, $3), 1); }
    80                 | RETURN_PATH ':' route_addr
    81                         { newfield(link3($1, $2, $3), 1); }
    82                 | FROM ':' mailbox_list
    83                         { newfield(link3($1, $2, $3), 1); }
    84                 | SENDER ':' mailbox
    85                         { newfield(link3($1, $2, $3), 1); }
    86                 | RESENT_REPLY_TO ':' address_list
    87                         { newfield(link3($1, $2, $3), 1); }
    88                 | RESENT_SENDER ':' mailbox
    89                         { newfield(link3($1, $2, $3), 1); }
    90                 | RESENT_FROM ':' mailbox
    91                         { newfield(link3($1, $2, $3), 1); }
    92                 ;
    93 dates                 : DATE ':' date_time
    94                         { newfield(link3($1, $2, $3), 0); }
    95                 | RESENT_DATE ':' date_time
    96                         { newfield(link3($1, $2, $3), 0); }
    97                 ;
    98 destination        : TO ':'
    99                         { newfield(link2($1, $2), 0); }
   100                 | TO ':' address_list
   101                         { newfield(link3($1, $2, $3), 0); }
   102                 | RESENT_TO ':'
   103                         { newfield(link2($1, $2), 0); }
   104                 | RESENT_TO ':' address_list
   105                         { newfield(link3($1, $2, $3), 0); }
   106                 | CC ':'
   107                         { newfield(link2($1, $2), 0); }
   108                 | CC ':' address_list
   109                         { newfield(link3($1, $2, $3), 0); }
   110                 | RESENT_CC ':'
   111                         { newfield(link2($1, $2), 0); }
   112                 | RESENT_CC ':' address_list
   113                         { newfield(link3($1, $2, $3), 0); }
   114                 | BCC ':'
   115                         { newfield(link2($1, $2), 0); }
   116                 | BCC ':' address_list
   117                         { newfield(link3($1, $2, $3), 0); }
   118                 | RESENT_BCC ':' 
   119                         { newfield(link2($1, $2), 0); }
   120                 | RESENT_BCC ':' address_list
   121                         { newfield(link3($1, $2, $3), 0); }
   122                 ;
   123 subject                : SUBJECT ':' things
   124                         { newfield(link3($1, $2, $3), 0); }
   125                 | SUBJECT ':'
   126                         { newfield(link2($1, $2), 0); }
   127                 ;
   128 received        : RECEIVED ':' things
   129                         { newfield(link3($1, $2, $3), 0); received++; }
   130                 | RECEIVED ':'
   131                         { newfield(link2($1, $2), 0); received++; }
   132                 ;
   133 precedence        : PRECEDENCE ':' things
   134                         { newfield(link3($1, $2, $3), 0); }
   135                 | PRECEDENCE ':'
   136                         { newfield(link2($1, $2), 0); }
   137                 ;
   138 ignored                : ignoredhdr ':' things
   139                         { newfield(link3($1, $2, $3), 0); }
   140                 | ignoredhdr ':'
   141                         { newfield(link2($1, $2), 0); }
   142                 ;
   143 ignoredhdr        : MIMEVERSION | CONTENTTYPE | MESSAGEID { messageid = 1; } | MAILER
   144                 ;
   145 optional        : fieldwords ':' things
   146                         { /* hack to allow same lex for field names and the rest */
   147                          if(badfieldname($1)){
   148                                 freenode($1);
   149                                 freenode($2);
   150                                 freenode($3);
   151                                 return 1;
   152                          }
   153                          newfield(link3($1, $2, $3), 0);
   154                         }
   155                 | fieldwords ':'
   156                         { /* hack to allow same lex for field names and the rest */
   157                          if(badfieldname($1)){
   158                                 freenode($1);
   159                                 freenode($2);
   160                                 return 1;
   161                          }
   162                          newfield(link2($1, $2), 0);
   163                         }
   164                 ;
   165 address_list        : address
   166                 | address_list ',' address
   167                         { $$ = link3($1, $2, $3); }
   168                 ;
   169 address                : mailbox
   170                 | group
   171                 ;
   172 group                : phrase ':' address_list ';'
   173                         { $$ = link2($1, link3($2, $3, $4)); }
   174                 | phrase ':' ';'
   175                         { $$ = link3($1, $2, $3); }
   176                 ;
   177 mailbox_list        : mailbox
   178                 | mailbox_list ',' mailbox
   179                         { $$ = link3($1, $2, $3); }
   180                 ;
   181 mailbox                : route_addr
   182                 | phrase brak_addr
   183                         { $$ = link2($1, $2); }
   184                 | brak_addr
   185                 ;
   186 brak_addr        : '<' route_addr '>'
   187                         { $$ = link3($1, $2, $3); }
   188                 | '<' '>'
   189                         { $$ = nobody($2); freenode($1); }
   190                 ;
   191 route_addr        : route ':' at_addr
   192                         { $$ = address(concat($1, concat($2, $3))); }
   193                 | addr_spec
   194                 ;
   195 route                : '@' domain
   196                         { $$ = concat($1, $2); }
   197                 | route ',' '@' domain
   198                         { $$ = concat($1, concat($2, concat($3, $4))); }
   199                 ;
   200 addr_spec        : local_part
   201                         { $$ = address($1); }
   202                 | at_addr
   203                 ;
   204 at_addr                : local_part '@' domain
   205                         { $$ = address(concat($1, concat($2, $3)));}
   206                 | at_addr '@' domain
   207                         { $$ = address(concat($1, concat($2, $3)));}
   208                 ;
   209 local_part        : word
   210                 ;
   211 domain                : word
   212                 ;
   213 phrase                : word
   214                 | phrase word
   215                         { $$ = link2($1, $2); }
   216                 ;
   217 things                : thing
   218                 | things thing
   219                         { $$ = link2($1, $2); }
   220                 ;
   221 thing                : word | '<' | '>' | '@' | ':' | ';' | ','
   222                 ;
   223 date_time        : things
   224                 ;
   225 unix_date_time        : word word word unix_time word word
   226                         { $$ = link3($1, $3, link3($2, $6, link2($4, $5))); }
   227                 ;
   228 unix_time        : word
   229                 | unix_time ':' word
   230                         { $$ = link3($1, $2, $3); }
   231                 ;
   232 word                : WORD | DATE | RESENT_DATE | RETURN_PATH | FROM | SENDER
   233                 | REPLY_TO | RESENT_FROM | RESENT_SENDER | RESENT_REPLY_TO
   234                 | TO | CC | BCC | RESENT_TO | RESENT_CC | RESENT_BCC | REMOTE | SUBJECT
   235                 | PRECEDENCE | MIMEVERSION | CONTENTTYPE | MESSAGEID | RECEIVED | MAILER
   236                 ;
   237 fieldwords        : fieldword
   238                 | WORD
   239                 | fieldwords fieldword
   240                         { $$ = link2($1, $2); }
   241                 | fieldwords word
   242                         { $$ = link2($1, $2); }
   243                 ;
   244 fieldword        : '<' | '>' | '@' | ';' | ','
   245                 ;
   246 %%
   247 
   248 /*
   249  *  Initialize the parsing.  Done once for each header field.
   250  */
   251 void
   252 yyinit(char *p, int len)
   253 {
   254         yybuffer = p;
   255         yylp = p;
   256         yyend = p + len;
   257         firstfield = lastfield = 0;
   258         received = 0;
   259 }
   260 
   261 /*
   262  *  keywords identifying header fields we care about
   263  */
   264 typedef struct Keyword        Keyword;
   265 struct Keyword {
   266         char        *rep;
   267         int        val;
   268 };
   269 
   270 /* field names that we need to recognize */
   271 Keyword key[] = {
   272         { "date", DATE },
   273         { "resent-date", RESENT_DATE },
   274         { "return_path", RETURN_PATH },
   275         { "from", FROM },
   276         { "sender", SENDER },
   277         { "reply-to", REPLY_TO },
   278         { "resent-from", RESENT_FROM },
   279         { "resent-sender", RESENT_SENDER },
   280         { "resent-reply-to", RESENT_REPLY_TO },
   281         { "to", TO },
   282         { "cc", CC },
   283         { "bcc", BCC },
   284         { "resent-to", RESENT_TO },
   285         { "resent-cc", RESENT_CC },
   286         { "resent-bcc", RESENT_BCC },
   287         { "remote", REMOTE },
   288         { "subject", SUBJECT },
   289         { "precedence", PRECEDENCE },
   290         { "mime-version", MIMEVERSION },
   291         { "content-type", CONTENTTYPE },
   292         { "message-id", MESSAGEID },
   293         { "received", RECEIVED },
   294         { "mailer", MAILER },
   295         { "who-the-hell-cares", WORD }
   296 };
   297 
   298 /*
   299  *  Lexical analysis for an rfc822 header field.  Continuation lines
   300  *  are handled in yywhite() when skipping over white space.
   301  *
   302  */
   303 int
   304 yylex(void)
   305 {
   306         String *t;
   307         int quoting;
   308         int escaping;
   309         char *start;
   310         Keyword *kp;
   311         int c, d;
   312 
   313 /*        print("lexing\n"); /**/
   314         if(yylp >= yyend)
   315                 return 0;
   316         if(yydone)
   317                 return 0;
   318 
   319         quoting = escaping = 0;
   320         start = yylp;
   321         yylval = malloc(sizeof(Node));
   322         yylval->white = yylval->s = 0;
   323         yylval->next = 0;
   324         yylval->addr = 0;
   325         yylval->start = yylp;
   326         for(t = 0; yylp < yyend; yylp++){
   327                 c = *yylp & 0xff;
   328 
   329                 /* dump nulls, they can't be in header */
   330                 if(c == 0)
   331                         continue;
   332 
   333                 if(escaping) {
   334                         escaping = 0;
   335                 } else if(quoting) {
   336                         switch(c){
   337                         case '\\':
   338                                 escaping = 1;
   339                                 break;
   340                         case '\n':
   341                                 d = (*(yylp+1))&0xff;
   342                                 if(d != ' ' && d != '\t'){
   343                                         quoting = 0;
   344                                         yylp--;
   345                                         continue;
   346                                 }
   347                                 break;
   348                         case '"':
   349                                 quoting = 0;
   350                                 break;
   351                         }
   352                 } else {
   353                         switch(c){
   354                         case '\\':
   355                                 escaping = 1;
   356                                 break;
   357                         case '(':
   358                         case ' ':
   359                         case '\t':
   360                         case '\r':
   361                                 goto out;
   362                         case '\n':
   363                                 if(yylp == start){
   364                                         yylp++;
   365 /*                                        print("lex(c %c)\n", c); /**/
   366                                         yylval->end = yylp;
   367                                         return yylval->c = c;
   368                                 }
   369                                 goto out;
   370                         case '@':
   371                         case '>':
   372                         case '<':
   373                         case ':':
   374                         case ',':
   375                         case ';':
   376                                 if(yylp == start){
   377                                         yylp++;
   378                                         yylval->white = yywhite();
   379 /*                                        print("lex(c %c)\n", c); /**/
   380                                         yylval->end = yylp;
   381                                         return yylval->c = c;
   382                                 }
   383                                 goto out;
   384                         case '"':
   385                                 quoting = 1;
   386                                 break;
   387                         default:
   388                                 break;
   389                         }
   390                 }
   391                 if(t == 0)
   392                         t = s_new();
   393                 s_putc(t, c);
   394         }
   395 out:
   396         yylval->white = yywhite();
   397         if(t) {
   398                 s_terminate(t);
   399         } else                                /* message begins with white-space! */
   400                 return yylval->c = '\n';
   401         yylval->s = t;
   402         for(kp = key; kp->val != WORD; kp++)
   403                 if(cistrcmp(s_to_c(t), kp->rep)==0)
   404                         break;
   405 /*        print("lex(%d) %s\n", kp->val-WORD, s_to_c(t)); /**/
   406         yylval->end = yylp;
   407         return yylval->c = kp->val;
   408 }
   409 
   410 void
   411 yyerror(char *x)
   412 {
   413         USED(x);
   414 
   415         /*fprint(2, "parse err: %s\n", x);/**/
   416 }
   417 
   418 /*
   419  *  parse white space and comments
   420  */
   421 String *
   422 yywhite(void)
   423 {
   424         String *w;
   425         int clevel;
   426         int c;
   427         int escaping;
   428 
   429         escaping = clevel = 0;
   430         for(w = 0; yylp < yyend; yylp++){
   431                 c = *yylp & 0xff;
   432 
   433                 /* dump nulls, they can't be in header */
   434                 if(c == 0)
   435                         continue;
   436 
   437                 if(escaping){
   438                         escaping = 0;
   439                 } else if(clevel) {
   440                         switch(c){
   441                         case '\n':
   442                                 /*
   443                                  *  look for multiline fields
   444                                  */
   445                                 if(*(yylp+1)==' ' || *(yylp+1)=='\t')
   446                                         break;
   447                                 else
   448                                         goto out;
   449                         case '\\':
   450                                 escaping = 1;
   451                                 break;
   452                         case '(':
   453                                 clevel++;
   454                                 break;
   455                         case ')':
   456                                 clevel--;
   457                                 break;
   458                         }
   459                 } else {
   460                         switch(c){
   461                         case '\\':
   462                                 escaping = 1;
   463                                 break;
   464                         case '(':
   465                                 clevel++;
   466                                 break;
   467                         case ' ':
   468                         case '\t':
   469                         case '\r':
   470                                 break;
   471                         case '\n':
   472                                 /*
   473                                  *  look for multiline fields
   474                                  */
   475                                 if(*(yylp+1)==' ' || *(yylp+1)=='\t')
   476                                         break;
   477                                 else
   478                                         goto out;
   479                         default:
   480                                 goto out;
   481                         }
   482                 }
   483                 if(w == 0)
   484                         w = s_new();
   485                 s_putc(w, c);
   486         }
   487 out:
   488         if(w)
   489                 s_terminate(w);
   490         return w;
   491 }
   492 
   493 /*
   494  *  link two parsed entries together
   495  */
   496 Node*
   497 link2(Node *p1, Node *p2)
   498 {
   499         Node *p;
   500 
   501         for(p = p1; p->next; p = p->next)
   502                 ;
   503         p->next = p2;
   504         return p1;
   505 }
   506 
   507 /*
   508  *  link three parsed entries together
   509  */
   510 Node*
   511 link3(Node *p1, Node *p2, Node *p3)
   512 {
   513         Node *p;
   514 
   515         for(p = p2; p->next; p = p->next)
   516                 ;
   517         p->next = p3;
   518 
   519         for(p = p1; p->next; p = p->next)
   520                 ;
   521         p->next = p2;
   522 
   523         return p1;
   524 }
   525 
   526 /*
   527  *  make a:b, move all white space after both
   528  */
   529 Node*
   530 colon(Node *p1, Node *p2)
   531 {
   532         if(p1->white){
   533                 if(p2->white)
   534                         s_append(p1->white, s_to_c(p2->white));
   535         } else {
   536                 p1->white = p2->white;
   537                 p2->white = 0;
   538         }
   539 
   540         s_append(p1->s, ":");
   541         if(p2->s)
   542                 s_append(p1->s, s_to_c(p2->s));
   543 
   544         if(p1->end < p2->end)
   545                 p1->end = p2->end;
   546         freenode(p2);
   547         return p1;
   548 }
   549 
   550 /*
   551  *  concatenate two fields, move all white space after both
   552  */
   553 Node*
   554 concat(Node *p1, Node *p2)
   555 {
   556         char buf[2];
   557 
   558         if(p1->white){
   559                 if(p2->white)
   560                         s_append(p1->white, s_to_c(p2->white));
   561         } else {
   562                 p1->white = p2->white;
   563                 p2->white = 0;
   564         }
   565 
   566         if(p1->s == nil){
   567                 buf[0] = p1->c;
   568                 buf[1] = 0;
   569                 p1->s = s_new();
   570                 s_append(p1->s, buf);
   571         }
   572 
   573         if(p2->s)
   574                 s_append(p1->s, s_to_c(p2->s));
   575         else {
   576                 buf[0] = p2->c;
   577                 buf[1] = 0;
   578                 s_append(p1->s, buf);
   579         }
   580 
   581         if(p1->end < p2->end)
   582                 p1->end = p2->end;
   583         freenode(p2);
   584         return p1;
   585 }
   586 
   587 /*
   588  *  look for disallowed chars in the field name
   589  */
   590 int
   591 badfieldname(Node *p)
   592 {
   593         for(; p; p = p->next){
   594                 /* field name can't contain white space */
   595                 if(p->white && p->next)
   596                         return 1;
   597         }
   598         return 0;
   599 }
   600 
   601 /*
   602  *  mark as an address
   603  */
   604 Node *
   605 address(Node *p)
   606 {
   607         p->addr = 1;
   608         return p;
   609 }
   610 
   611 /*
   612  *  case independent string compare
   613  */
   614 int
   615 cistrcmp(char *s1, char *s2)
   616 {
   617         int c1, c2;
   618 
   619         for(; *s1; s1++, s2++){
   620                 c1 = isupper(*s1) ? tolower(*s1) : *s1;
   621                 c2 = isupper(*s2) ? tolower(*s2) : *s2;
   622                 if (c1 != c2)
   623                         return -1;
   624         }
   625         return *s2;
   626 }
   627 
   628 /*
   629  *  free a node
   630  */
   631 void
   632 freenode(Node *p)
   633 {
   634         Node *tp;
   635 
   636         while(p){
   637                 tp = p->next;
   638                 if(p->s)
   639                         s_free(p->s);
   640                 if(p->white)
   641                         s_free(p->white);
   642                 free(p);
   643                 p = tp;
   644         }
   645 }
   646 
   647 
   648 /*
   649  *  an anonymous user
   650  */
   651 Node*
   652 nobody(Node *p)
   653 {
   654         if(p->s)
   655                 s_free(p->s);
   656         p->s = s_copy("pOsTmAsTeR");
   657         p->addr = 1;
   658         return p;
   659 }
   660 
   661 /*
   662  *  add anything that was dropped because of a parse error
   663  */
   664 void
   665 missing(Node *p)
   666 {
   667         Node *np;
   668         char *start, *end;
   669         Field *f;
   670         String *s;
   671 
   672         start = yybuffer;
   673         if(lastfield != nil){
   674                 for(np = lastfield->node; np; np = np->next)
   675                         start = np->end+1;
   676         }
   677 
   678         end = p->start-1;
   679 
   680         if(end <= start)
   681                 return;
   682 
   683         if(strncmp(start, "From ", 5) == 0)
   684                 return;
   685 
   686         np = malloc(sizeof(Node));
   687         np->start = start;
   688         np->end = end;
   689         np->white = nil;
   690         s = s_copy("BadHeader: ");
   691         np->s = s_nappend(s, start, end-start);
   692         np->next = nil;
   693 
   694         f = malloc(sizeof(Field));
   695         f->next = 0;
   696         f->node = np;
   697         f->source = 0;
   698         if(firstfield)
   699                 lastfield->next = f;
   700         else
   701                 firstfield = f;
   702         lastfield = f;
   703 }
   704 
   705 /*
   706  *  create a new field
   707  */
   708 void
   709 newfield(Node *p, int source)
   710 {
   711         Field *f;
   712 
   713         missing(p);
   714 
   715         f = malloc(sizeof(Field));
   716         f->next = 0;
   717         f->node = p;
   718         f->source = source;
   719         if(firstfield)
   720                 lastfield->next = f;
   721         else
   722                 firstfield = f;
   723         lastfield = f;
   724         endfield = startfield;
   725         startfield = yylp;
   726 }
   727 
   728 /*
   729  *  fee a list of fields
   730  */
   731 void
   732 freefield(Field *f)
   733 {
   734         Field *tf;
   735 
   736         while(f){
   737                 tf = f->next;
   738                 freenode(f->node);
   739                 free(f);
   740                 f = tf;
   741         }
   742 }
   743 
   744 /*
   745  *  add some white space to a node
   746  */
   747 Node*
   748 whiten(Node *p)
   749 {
   750         Node *tp;
   751 
   752         for(tp = p; tp->next; tp = tp->next)
   753                 ;
   754         if(tp->white == 0)
   755                 tp->white = s_copy(" ");
   756         return p;
   757 }
   758 
   759 void
   760 yycleanup(void)
   761 {
   762         Field *f, *fnext;
   763         Node *np, *next;
   764 
   765         for(f = firstfield; f; f = fnext){
   766                 for(np = f->node; np; np = next){
   767                         if(np->s)
   768                                 s_free(np->s);
   769                         if(np->white)
   770                                 s_free(np->white);
   771                         next = np->next;
   772                         free(np);
   773                 }
   774                 fnext = f->next;
   775                 free(f);
   776         }
   777         firstfield = lastfield = 0;
   778 }