tregen.c - plan9port - [fork] Plan 9 from user space
git clone git://src.adamsgaard.dk/plan9port
Log
Files
Refs
README
LICENSE
---
tregen.c (2446B)
---
     1 #include 
     2 #include 
     3 #include 
     4 #include 
     5 #include "dfa.h"
     6 
     7 /***
     8  * Regular expression for matching.
     9  */
    10 
    11 char *ignore[] =
    12 {
    13         /* HTML that isn't A, IMG, or FONT */
    14         /* Must have a space somewhere to avoid catching  */
    15         "<[         \n\r]*("
    16                 "[^aif]|"
    17                 "a[^> \t\r\n]|"
    18                 "i[^mM \t\r\n]|"
    19                 "im[^gG \t\r\n]|"
    20                 "img[^> \t\r\n]|"
    21                 "f[^oO \t\r\n]|"
    22                 "fo[^Nn \t\r\n]|"
    23                 "fon[^tT \t\r\n]|"
    24                 "font[^> \r\t\n]"
    25         ")[^>]*[ \t\n\r][^>]*>",
    26         "<[         \n\r]*("
    27                 "i|im|f|fo|fon"
    28         ")[ \t\r\n][^>]*>",
    29 
    30         /* ignore html comments */
    31         "",
    32 
    33         /* random mail strings */
    34         "^message-id:.*\n([         ].*\n)*",
    35         "^in-reply-to:.*\n([         ].*\n)*",
    36         "^references:.*\n([         ].*\n)*",
    37         "^date:.*\n([         ].*\n)*",
    38         "^delivery-date:.*\n([         ].*\n)*",
    39         "e?smtp id .*",
    40         "^        id.*",
    41         "boundary=.*",
    42         "name=\"",
    43         "filename=\"",
    44         "news:<[^>]+>",
    45         "^--[^         ]*$",
    46 
    47         /* base64 encoding */
    48         "^[0-9a-zA-Z+\\-=/]+$",
    49 
    50         /* uu encoding */
    51         "^[!-Z]+$",
    52 
    53         /* little things */
    54         ".",
    55         "\n"
    56 };
    57 
    58 char *keywords[] =
    59 {
    60         "([a-zA-Z'`$!¡-￿]|[0-9]([.,][0-9])*)+"
    61 };
    62 
    63 int debug;
    64 
    65 Dreprog*
    66 dregcomp(char *buf)
    67 {
    68         Reprog *r;
    69         Dreprog *d;
    70 
    71         if(debug)
    72                 print(">>> '%s'\n", buf);
    73 
    74         r = regcomp(buf);
    75         if(r == nil)
    76                 sysfatal("regcomp");
    77         d = dregcvt(r);
    78         if(d == nil)
    79                 sysfatal("dregcomp");
    80         free(r);
    81         return d;
    82 }
    83 
    84 char*
    85 strcpycase(char *d, char *s)
    86 {
    87         int cc, esc;
    88 
    89         cc = 0;
    90         esc = 0;
    91         while(*s){
    92                 if(*s == '[')
    93                         cc++;
    94                 if(*s == ']')
    95                         cc--;
    96                 if(!cc && 'a' <= *s && *s <= 'z'){
    97                         *d++ = '[';
    98                         *d++ = *s;
    99                         *d++ = *s+'A'-'a';
   100                         *d++ = ']';
   101                 }else
   102                         *d++ = *s;
   103                 if(*s == '\\')
   104                         esc++;
   105                 else if(esc)
   106                         esc--;
   107                 s++;
   108         }
   109         return d;
   110 }
   111 
   112 void
   113 regerror(char *msg)
   114 {
   115         sysfatal("regerror: %s", msg);
   116 }
   117 
   118 void
   119 buildre(Dreprog *re[3])
   120 {
   121         int i;
   122         static char buf[16384], *s;
   123 
   124         re[0] = dregcomp("^From ");
   125 
   126         s = buf;
   127         for(i=0; i