proxy70

	trfc822.y - plan9port - [fork] Plan 9 from user space
	git clone git://src.adamsgaard.dk/plan9port
	Log
	Files
	Refs
	README
	LICENSE
	--- trfc822.y (13421B) --- 1 %{ 2 #include "common.h" 3 #include "smtp.h" 4 #include 5 6 char yylp; / next character to be lex'd / 7 int yydone; / tell yylex to give up / 8 char yybuffer; /* first parsed character / 9 char yyend; /* end of buffer to be parsed / 10 Node root; 11 Field firstfield; 12 Field lastfield; 13 Node usender; 14 Node usys; 15 Node udate; 16 char startfield, endfield; 17 int originator; 18 int destination; 19 int date; 20 int received; 21 int messageid; 22 %} 23 24 %term WORD 25 %term DATE 26 %term RESENT_DATE 27 %term RETURN_PATH 28 %term FROM 29 %term SENDER 30 %term REPLY_TO 31 %term RESENT_FROM 32 %term RESENT_SENDER 33 %term RESENT_REPLY_TO 34 %term SUBJECT 35 %term TO 36 %term CC 37 %term BCC 38 %term RESENT_TO 39 %term RESENT_CC 40 %term RESENT_BCC 41 %term REMOTE 42 %term PRECEDENCE 43 %term MIMEVERSION 44 %term CONTENTTYPE 45 %term MESSAGEID 46 %term RECEIVED 47 %term MAILER 48 %term BADTOKEN 49 %start msg 50 %% 51 52 msg : fields 53 \| unixfrom '\n' fields 54 ; 55 fields : '\n' 56 { yydone = 1; } 57 \| field '\n' 58 \| field '\n' fields 59 ; 60 field : dates 61 { date = 1; } 62 \| originator 63 { originator = 1; } 64 \| destination 65 { destination = 1; } 66 \| subject 67 \| optional 68 \| ignored 69 \| received 70 \| precedence 71 \| error '\n' field 72 ; 73 unixfrom : FROM route_addr unix_date_time REMOTE FROM word 74 { freenode($1); freenode($4); freenode($5); 75 usender = $2; udate = $3; usys = $6; 76 } 77 ; 78 originator : REPLY_TO ':' address_list 79 { newfield(link3($1, $2, $3), 1); } 80 \| RETURN_PATH ':' route_addr 81 { newfield(link3($1, $2, $3), 1); } 82 \| FROM ':' mailbox_list 83 { newfield(link3($1, $2, $3), 1); } 84 \| SENDER ':' mailbox 85 { newfield(link3($1, $2, $3), 1); } 86 \| RESENT_REPLY_TO ':' address_list 87 { newfield(link3($1, $2, $3), 1); } 88 \| RESENT_SENDER ':' mailbox 89 { newfield(link3($1, $2, $3), 1); } 90 \| RESENT_FROM ':' mailbox 91 { newfield(link3($1, $2, $3), 1); } 92 ; 93 dates : DATE ':' date_time 94 { newfield(link3($1, $2, $3), 0); } 95 \| RESENT_DATE ':' date_time 96 { newfield(link3($1, $2, $3), 0); } 97 ; 98 destination : TO ':' 99 { newfield(link2($1, $2), 0); } 100 \| TO ':' address_list 101 { newfield(link3($1, $2, $3), 0); } 102 \| RESENT_TO ':' 103 { newfield(link2($1, $2), 0); } 104 \| RESENT_TO ':' address_list 105 { newfield(link3($1, $2, $3), 0); } 106 \| CC ':' 107 { newfield(link2($1, $2), 0); } 108 \| CC ':' address_list 109 { newfield(link3($1, $2, $3), 0); } 110 \| RESENT_CC ':' 111 { newfield(link2($1, $2), 0); } 112 \| RESENT_CC ':' address_list 113 { newfield(link3($1, $2, $3), 0); } 114 \| BCC ':' 115 { newfield(link2($1, $2), 0); } 116 \| BCC ':' address_list 117 { newfield(link3($1, $2, $3), 0); } 118 \| RESENT_BCC ':' 119 { newfield(link2($1, $2), 0); } 120 \| RESENT_BCC ':' address_list 121 { newfield(link3($1, $2, $3), 0); } 122 ; 123 subject : SUBJECT ':' things 124 { newfield(link3($1, $2, $3), 0); } 125 \| SUBJECT ':' 126 { newfield(link2($1, $2), 0); } 127 ; 128 received : RECEIVED ':' things 129 { newfield(link3($1, $2, $3), 0); received++; } 130 \| RECEIVED ':' 131 { newfield(link2($1, $2), 0); received++; } 132 ; 133 precedence : PRECEDENCE ':' things 134 { newfield(link3($1, $2, $3), 0); } 135 \| PRECEDENCE ':' 136 { newfield(link2($1, $2), 0); } 137 ; 138 ignored : ignoredhdr ':' things 139 { newfield(link3($1, $2, $3), 0); } 140 \| ignoredhdr ':' 141 { newfield(link2($1, $2), 0); } 142 ; 143 ignoredhdr : MIMEVERSION \| CONTENTTYPE \| MESSAGEID { messageid = 1; } \| MAILER 144 ; 145 optional : fieldwords ':' things 146 { / hack to allow same lex for field names and the rest / 147 if(badfieldname($1)){ 148 freenode($1); 149 freenode($2); 150 freenode($3); 151 return 1; 152 } 153 newfield(link3($1, $2, $3), 0); 154 } 155 \| fieldwords ':' 156 { / hack to allow same lex for field names and the rest / 157 if(badfieldname($1)){ 158 freenode($1); 159 freenode($2); 160 return 1; 161 } 162 newfield(link2($1, $2), 0); 163 } 164 ; 165 address_list : address 166 \| address_list ',' address 167 { $$ = link3($1, $2, $3); } 168 ; 169 address : mailbox 170 \| group 171 ; 172 group : phrase ':' address_list ';' 173 { $$ = link2($1, link3($2, $3, $4)); } 174 \| phrase ':' ';' 175 { $$ = link3($1, $2, $3); } 176 ; 177 mailbox_list : mailbox 178 \| mailbox_list ',' mailbox 179 { $$ = link3($1, $2, $3); } 180 ; 181 mailbox : route_addr 182 \| phrase brak_addr 183 { $$ = link2($1, $2); } 184 \| brak_addr 185 ; 186 brak_addr : '<' route_addr '>' 187 { $$ = link3($1, $2, $3); } 188 \| '<' '>' 189 { $$ = nobody($2); freenode($1); } 190 ; 191 route_addr : route ':' at_addr 192 { $$ = address(concat($1, concat($2, $3))); } 193 \| addr_spec 194 ; 195 route : '@' domain 196 { $$ = concat($1, $2); } 197 \| route ',' '@' domain 198 { $$ = concat($1, concat($2, concat($3, $4))); } 199 ; 200 addr_spec : local_part 201 { $$ = address($1); } 202 \| at_addr 203 ; 204 at_addr : local_part '@' domain 205 { $$ = address(concat($1, concat($2, $3)));} 206 \| at_addr '@' domain 207 { $$ = address(concat($1, concat($2, $3)));} 208 ; 209 local_part : word 210 ; 211 domain : word 212 ; 213 phrase : word 214 \| phrase word 215 { $$ = link2($1, $2); } 216 ; 217 things : thing 218 \| things thing 219 { $$ = link2($1, $2); } 220 ; 221 thing : word \| '<' \| '>' \| '@' \| ':' \| ';' \| ',' 222 ; 223 date_time : things 224 ; 225 unix_date_time : word word word unix_time word word 226 { $$ = link3($1, $3, link3($2, $6, link2($4, $5))); } 227 ; 228 unix_time : word 229 \| unix_time ':' word 230 { $$ = link3($1, $2, $3); } 231 ; 232 word : WORD \| DATE \| RESENT_DATE \| RETURN_PATH \| FROM \| SENDER 233 \| REPLY_TO \| RESENT_FROM \| RESENT_SENDER \| RESENT_REPLY_TO 234 \| TO \| CC \| BCC \| RESENT_TO \| RESENT_CC \| RESENT_BCC \| REMOTE \| SUBJECT 235 \| PRECEDENCE \| MIMEVERSION \| CONTENTTYPE \| MESSAGEID \| RECEIVED \| MAILER 236 ; 237 fieldwords : fieldword 238 \| WORD 239 \| fieldwords fieldword 240 { $$ = link2($1, $2); } 241 \| fieldwords word 242 { $$ = link2($1, $2); } 243 ; 244 fieldword : '<' \| '>' \| '@' \| ';' \| ',' 245 ; 246 %% 247 248 / 249 * Initialize the parsing. Done once for each header field. 250 / 251 void 252 yyinit(char p, int len) 253 { 254 yybuffer = p; 255 yylp = p; 256 yyend = p + len; 257 firstfield = lastfield = 0; 258 received = 0; 259 } 260 261 /* 262 * keywords identifying header fields we care about 263 / 264 typedef struct Keyword Keyword; 265 struct Keyword { 266 char rep; 267 int val; 268 }; 269 270 /* field names that we need to recognize / 271 Keyword key[] = { 272 { "date", DATE }, 273 { "resent-date", RESENT_DATE }, 274 { "return_path", RETURN_PATH }, 275 { "from", FROM }, 276 { "sender", SENDER }, 277 { "reply-to", REPLY_TO }, 278 { "resent-from", RESENT_FROM }, 279 { "resent-sender", RESENT_SENDER }, 280 { "resent-reply-to", RESENT_REPLY_TO }, 281 { "to", TO }, 282 { "cc", CC }, 283 { "bcc", BCC }, 284 { "resent-to", RESENT_TO }, 285 { "resent-cc", RESENT_CC }, 286 { "resent-bcc", RESENT_BCC }, 287 { "remote", REMOTE }, 288 { "subject", SUBJECT }, 289 { "precedence", PRECEDENCE }, 290 { "mime-version", MIMEVERSION }, 291 { "content-type", CONTENTTYPE }, 292 { "message-id", MESSAGEID }, 293 { "received", RECEIVED }, 294 { "mailer", MAILER }, 295 { "who-the-hell-cares", WORD } 296 }; 297 298 / 299 * Lexical analysis for an rfc822 header field. Continuation lines 300 * are handled in yywhite() when skipping over white space. 301 * 302 / 303 int 304 yylex(void) 305 { 306 String t; 307 int quoting; 308 int escaping; 309 char start; 310 Keyword kp; 311 int c, d; 312 313 /* print("lexing\n"); /*/ 314 if(yylp >= yyend) 315 return 0; 316 if(yydone) 317 return 0; 318 319 quoting = escaping = 0; 320 start = yylp; 321 yylval = malloc(sizeof(Node)); 322 yylval->white = yylval->s = 0; 323 yylval->next = 0; 324 yylval->addr = 0; 325 yylval->start = yylp; 326 for(t = 0; yylp < yyend; yylp++){ 327 c = yylp & 0xff; 328 329 /* dump nulls, they can't be in header / 330 if(c == 0) 331 continue; 332 333 if(escaping) { 334 escaping = 0; 335 } else if(quoting) { 336 switch(c){ 337 case '\\': 338 escaping = 1; 339 break; 340 case '\n': 341 d = ((yylp+1))&0xff; 342 if(d != ' ' && d != '\t'){ 343 quoting = 0; 344 yylp--; 345 continue; 346 } 347 break; 348 case '"': 349 quoting = 0; 350 break; 351 } 352 } else { 353 switch(c){ 354 case '\\': 355 escaping = 1; 356 break; 357 case '(': 358 case ' ': 359 case '\t': 360 case '\r': 361 goto out; 362 case '\n': 363 if(yylp == start){ 364 yylp++; 365 /* print("lex(c %c)\n", c); /*/ 366 yylval->end = yylp; 367 return yylval->c = c; 368 } 369 goto out; 370 case '@': 371 case '>': 372 case '<': 373 case ':': 374 case ',': 375 case ';': 376 if(yylp == start){ 377 yylp++; 378 yylval->white = yywhite(); 379 / print("lex(c %c)\n", c); /*/ 380 yylval->end = yylp; 381 return yylval->c = c; 382 } 383 goto out; 384 case '"': 385 quoting = 1; 386 break; 387 default: 388 break; 389 } 390 } 391 if(t == 0) 392 t = s_new(); 393 s_putc(t, c); 394 } 395 out: 396 yylval->white = yywhite(); 397 if(t) { 398 s_terminate(t); 399 } else / message begins with white-space! / 400 return yylval->c = '\n'; 401 yylval->s = t; 402 for(kp = key; kp->val != WORD; kp++) 403 if(cistrcmp(s_to_c(t), kp->rep)==0) 404 break; 405 / print("lex(%d) %s\n", kp->val-WORD, s_to_c(t)); /*/ 406 yylval->end = yylp; 407 return yylval->c = kp->val; 408 } 409 410 void 411 yyerror(char x) 412 { 413 USED(x); 414 415 /fprint(2, "parse err: %s\n", x);// 416 } 417 418 / 419 * parse white space and comments 420 / 421 String 422 yywhite(void) 423 { 424 String w; 425 int clevel; 426 int c; 427 int escaping; 428 429 escaping = clevel = 0; 430 for(w = 0; yylp < yyend; yylp++){ 431 c = yylp & 0xff; 432 433 /* dump nulls, they can't be in header / 434 if(c == 0) 435 continue; 436 437 if(escaping){ 438 escaping = 0; 439 } else if(clevel) { 440 switch(c){ 441 case '\n': 442 / 443 * look for multiline fields 444 / 445 if((yylp+1)==' ' \|\| (yylp+1)=='\t') 446 break; 447 else 448 goto out; 449 case '\\': 450 escaping = 1; 451 break; 452 case '(': 453 clevel++; 454 break; 455 case ')': 456 clevel--; 457 break; 458 } 459 } else { 460 switch(c){ 461 case '\\': 462 escaping = 1; 463 break; 464 case '(': 465 clevel++; 466 break; 467 case ' ': 468 case '\t': 469 case '\r': 470 break; 471 case '\n': 472 / 473 * look for multiline fields 474 / 475 if((yylp+1)==' ' \|\| (yylp+1)=='\t') 476 break; 477 else 478 goto out; 479 default: 480 goto out; 481 } 482 } 483 if(w == 0) 484 w = s_new(); 485 s_putc(w, c); 486 } 487 out: 488 if(w) 489 s_terminate(w); 490 return w; 491 } 492 493 / 494 * link two parsed entries together 495 / 496 Node 497 link2(Node p1, Node p2) 498 { 499 Node p; 500 501 for(p = p1; p->next; p = p->next) 502 ; 503 p->next = p2; 504 return p1; 505 } 506 507 / 508 * link three parsed entries together 509 / 510 Node 511 link3(Node p1, Node p2, Node p3) 512 { 513 Node p; 514 515 for(p = p2; p->next; p = p->next) 516 ; 517 p->next = p3; 518 519 for(p = p1; p->next; p = p->next) 520 ; 521 p->next = p2; 522 523 return p1; 524 } 525 526 /* 527 * make a:b, move all white space after both 528 / 529 Node 530 colon(Node p1, Node p2) 531 { 532 if(p1->white){ 533 if(p2->white) 534 s_append(p1->white, s_to_c(p2->white)); 535 } else { 536 p1->white = p2->white; 537 p2->white = 0; 538 } 539 540 s_append(p1->s, ":"); 541 if(p2->s) 542 s_append(p1->s, s_to_c(p2->s)); 543 544 if(p1->end < p2->end) 545 p1->end = p2->end; 546 freenode(p2); 547 return p1; 548 } 549 550 /* 551 * concatenate two fields, move all white space after both 552 / 553 Node 554 concat(Node p1, Node p2) 555 { 556 char buf[2]; 557 558 if(p1->white){ 559 if(p2->white) 560 s_append(p1->white, s_to_c(p2->white)); 561 } else { 562 p1->white = p2->white; 563 p2->white = 0; 564 } 565 566 if(p1->s == nil){ 567 buf[0] = p1->c; 568 buf[1] = 0; 569 p1->s = s_new(); 570 s_append(p1->s, buf); 571 } 572 573 if(p2->s) 574 s_append(p1->s, s_to_c(p2->s)); 575 else { 576 buf[0] = p2->c; 577 buf[1] = 0; 578 s_append(p1->s, buf); 579 } 580 581 if(p1->end < p2->end) 582 p1->end = p2->end; 583 freenode(p2); 584 return p1; 585 } 586 587 /* 588 * look for disallowed chars in the field name 589 / 590 int 591 badfieldname(Node p) 592 { 593 for(; p; p = p->next){ 594 /* field name can't contain white space / 595 if(p->white && p->next) 596 return 1; 597 } 598 return 0; 599 } 600 601 / 602 * mark as an address 603 / 604 Node 605 address(Node p) 606 { 607 p->addr = 1; 608 return p; 609 } 610 611 / 612 * case independent string compare 613 / 614 int 615 cistrcmp(char s1, char s2) 616 { 617 int c1, c2; 618 619 for(; s1; s1++, s2++){ 620 c1 = isupper(s1) ? tolower(s1) : s1; 621 c2 = isupper(s2) ? tolower(s2) : s2; 622 if (c1 != c2) 623 return -1; 624 } 625 return s2; 626 } 627 628 / 629 * free a node 630 / 631 void 632 freenode(Node p) 633 { 634 Node tp; 635 636 while(p){ 637 tp = p->next; 638 if(p->s) 639 s_free(p->s); 640 if(p->white) 641 s_free(p->white); 642 free(p); 643 p = tp; 644 } 645 } 646 647 648 / 649 * an anonymous user 650 / 651 Node 652 nobody(Node p) 653 { 654 if(p->s) 655 s_free(p->s); 656 p->s = s_copy("pOsTmAsTeR"); 657 p->addr = 1; 658 return p; 659 } 660 661 / 662 * add anything that was dropped because of a parse error 663 / 664 void 665 missing(Node p) 666 { 667 Node np; 668 char start, end; 669 Field f; 670 String s; 671 672 start = yybuffer; 673 if(lastfield != nil){ 674 for(np = lastfield->node; np; np = np->next) 675 start = np->end+1; 676 } 677 678 end = p->start-1; 679 680 if(end <= start) 681 return; 682 683 if(strncmp(start, "From ", 5) == 0) 684 return; 685 686 np = malloc(sizeof(Node)); 687 np->start = start; 688 np->end = end; 689 np->white = nil; 690 s = s_copy("BadHeader: "); 691 np->s = s_nappend(s, start, end-start); 692 np->next = nil; 693 694 f = malloc(sizeof(Field)); 695 f->next = 0; 696 f->node = np; 697 f->source = 0; 698 if(firstfield) 699 lastfield->next = f; 700 else 701 firstfield = f; 702 lastfield = f; 703 } 704 705 / 706 * create a new field 707 / 708 void 709 newfield(Node p, int source) 710 { 711 Field f; 712 713 missing(p); 714 715 f = malloc(sizeof(Field)); 716 f->next = 0; 717 f->node = p; 718 f->source = source; 719 if(firstfield) 720 lastfield->next = f; 721 else 722 firstfield = f; 723 lastfield = f; 724 endfield = startfield; 725 startfield = yylp; 726 } 727 728 / 729 * fee a list of fields 730 / 731 void 732 freefield(Field f) 733 { 734 Field tf; 735 736 while(f){ 737 tf = f->next; 738 freenode(f->node); 739 free(f); 740 f = tf; 741 } 742 } 743 744 / 745 * add some white space to a node 746 / 747 Node 748 whiten(Node p) 749 { 750 Node tp; 751 752 for(tp = p; tp->next; tp = tp->next) 753 ; 754 if(tp->white == 0) 755 tp->white = s_copy(" "); 756 return p; 757 } 758 759 void 760 yycleanup(void) 761 { 762 Field f, fnext; 763 Node np, next; 764 765 for(f = firstfield; f; f = fnext){ 766 for(np = f->node; np; np = next){ 767 if(np->s) 768 s_free(np->s); 769 if(np->white) 770 s_free(np->white); 771 next = np->next; 772 free(np); 773 } 774 fnext = f->next; 775 free(f); 776 } 777 firstfield = lastfield = 0; 778 }