toops - plan9port - [fork] Plan 9 from user space
git clone git://src.adamsgaard.dk/plan9port
Log
Files
Refs
README
LICENSE
---
commit 30d57c74899f4825349441e2b1e7a0bb5995c21f
parent 3e0d8fb3ea83b2b65a6425c65beda887140f9349
Author: rsc 
Date:   Tue, 27 Dec 2005 23:16:48 +0000

oops

Diffstat:
  A src/cmd/tcs/html.c                  |     330 +++++++++++++++++++++++++++++++

1 file changed, 330 insertions(+), 0 deletions(-)
---
diff --git a/src/cmd/tcs/html.c b/src/cmd/tcs/html.c
t@@ -0,0 +1,330 @@
+#include 
+#include 
+#include 
+#include "hdr.h"
+#include "conv.h"
+
+typedef struct Hchar Hchar;
+struct Hchar
+{
+        char *s;
+        Rune r;
+};
+
+/* <, >, ", & intentionally omitted */
+
+static Hchar byname[] =
+{
+        {"AElig", 198},
+        {"Aacute", 193},
+        {"Acirc", 194},
+        {"Agrave", 192},
+        {"Aring", 197},
+        {"Atilde", 195},
+        {"Auml", 196},
+        {"Ccedil", 199},
+        {"ETH", 208},
+        {"Eacute", 201},
+        {"Ecirc", 202},
+        {"Egrave", 200},
+        {"Euml", 203},
+        {"Iacute", 205},
+        {"Icirc", 206},
+        {"Igrave", 204},
+        {"Iuml", 207},
+        {"Ntilde", 209},
+        {"Oacute", 211},
+        {"Ocirc", 212},
+        {"Ograve", 210},
+        {"Oslash", 216},
+        {"Otilde", 213},
+        {"Ouml", 214},
+        {"THORN", 222},
+        {"Uacute", 218},
+        {"Ucirc", 219},
+        {"Ugrave", 217},
+        {"Uuml", 220},
+        {"Yacute", 221},
+        {"aacute", 225},
+        {"acirc", 226},
+        {"acute", 180},
+        {"aelig", 230},
+        {"agrave", 224},
+        {"alpha", 945},
+        {"aring", 229},
+        {"atilde", 227},
+        {"auml", 228},
+        {"beta", 946},
+        {"brvbar", 166},
+        {"ccedil", 231},
+        {"cdots", 8943},
+        {"cedil", 184},
+        {"cent", 162},
+        {"chi", 967},
+        {"copy", 169},
+        {"curren", 164},
+        {"ddots", 8945},
+        {"deg", 176},
+        {"delta", 948},
+        {"divide", 247},
+        {"eacute", 233},
+        {"ecirc", 234},
+        {"egrave", 232},
+        {"emdash", 8212},        /* non-standard but commonly used */
+        {"emsp", 8195},
+        {"endash", 8211},        /* non-standard but commonly used */
+        {"ensp", 8194},
+        {"epsilon", 949},
+        {"eta", 951},
+        {"eth", 240},
+        {"euml", 235},
+        {"frac12", 189},
+        {"frac14", 188},
+        {"frac34", 190},
+        {"gamma", 947},
+        {"iacute", 237},
+        {"icirc", 238},
+        {"iexcl", 161},
+        {"igrave", 236},
+        {"iota", 953},
+        {"iquest", 191},
+        {"iuml", 239},
+        {"kappa", 954},
+        {"lambda", 955},
+        {"laquo", 171},
+        {"ldquo", 8220},
+        {"ldots", 8230},
+        {"lsquo", 8216},
+        {"macr", 175},
+        {"mdash", 8212},
+        {"micro", 181},
+        {"middot", 183},
+        {"mu", 956},
+        {"nbsp", 160},
+        {"ndash", 8211},
+        {"not", 172},
+        {"ntilde", 241},
+        {"nu", 957},
+        {"oacute", 243},
+        {"ocirc", 244},
+        {"ograve", 242},
+        {"omega", 969},
+        {"omicron", 959},
+        {"ordf", 170},
+        {"ordm", 186},
+        {"oslash", 248},
+        {"otilde", 245},
+        {"ouml", 246},
+        {"para", 182},
+        {"phi", 966},
+        {"pi", 960},
+        {"plusmn", 177},
+        {"pound", 163},
+        {"psi", 968},
+        {"quad", 8193},
+        {"raquo", 187},
+        {"rdquo", 8221},
+        {"reg", 174},
+        {"rho", 961},
+        {"rsquo", 8217},
+        {"sect", 167},
+        {"shy", 173},
+        {"sigma", 963},
+        {"sp", 8194},
+        {"sup1", 185},
+        {"sup2", 178},
+        {"sup3", 179},
+        {"szlig", 223},
+        {"tau", 964},
+        {"theta", 952},
+        {"thinsp", 8201},
+        {"thorn", 254},
+        {"times", 215},
+        {"trade", 8482},
+        {"uacute", 250},
+        {"ucirc", 251},
+        {"ugrave", 249},
+        {"uml", 168},
+        {"upsilon", 965},
+        {"uuml", 252},
+        {"varepsilon", 8712},
+        {"varphi", 981},
+        {"varpi", 982},
+        {"varrho", 1009},
+        {"vdots", 8942},
+        {"vsigma", 962},
+        {"vtheta", 977},
+        {"xi", 958},
+        {"yacute", 253},
+        {"yen", 165},
+        {"yuml", 255},
+        {"zeta", 950}
+};
+
+static Hchar byrune[nelem(byname)];
+
+static int
+hnamecmp(const void *va, const void *vb)
+{
+        Hchar *a, *b;
+        
+        a = (Hchar*)va;
+        b = (Hchar*)vb;
+        return strcmp(a->s, b->s);
+}
+
+static int
+hrunecmp(const void *va, const void *vb)
+{
+        Hchar *a, *b;
+        
+        a = (Hchar*)va;
+        b = (Hchar*)vb;
+        return a->r - b->r;
+}
+
+static void
+html_init(void)
+{
+        static int init;
+        
+        if(init)
+                return;
+        init = 1;
+        memmove(byrune, byname, sizeof byrune);
+        qsort(byname, nelem(byname), sizeof byname[0], hnamecmp);
+        qsort(byrune, nelem(byrune), sizeof byrune[0], hrunecmp);
+}
+
+static Rune
+findbyname(char *s)
+{
+        Hchar *h;
+        int n, m, x;
+        
+        h = byname;
+        n = nelem(byname);
+        while(n > 0){
+                m = n/2;
+                x = strcmp(h[m].s, s);
+                if(x == 0)
+                        return h[m].r;
+                if(x < 0){
+                        h += m+1;
+                        n -= m+1;
+                }else
+                        n = m;
+        }
+        return Runeerror;
+}
+
+static char*
+findbyrune(Rune r)
+{
+        Hchar *h;
+        int n, m;
+
+        h = byrune;
+        n = nelem(byrune);
+        while(n > 0){
+                m = n/2;
+                if(h[m].r == r)
+                        return h[m].s;
+                if(h[m].r < r){
+                        h += m+1;
+                        n -= m+1;
+                }else
+                        n = m;
+        }
+        return nil;
+}
+
+void
+html_in(int fd, long *x, struct convert *out)
+{
+        char buf[100], *p;
+        Biobuf b;
+        Rune rbuf[N];
+        Rune *r, *er;
+        int c, i;
+        
+        USED(x);
+        
+        html_init();
+        r = rbuf;
+        er = rbuf+N;
+        Binit(&b, fd, OREAD);
+        while((c = Bgetrune(&b)) != Beof){
+                if(r >= er){
+                        OUT(out, rbuf, r-rbuf);
+                        r = rbuf;
+                }
+                if(c == '&'){
+                        buf[0] = c;
+                        for(i=1; i= NRUNE || c < 0)
+                                                goto bad;
+                                        *r++ = c;
+                                        continue;
+                                }
+                        }
+                bad:
+                        for(p=buf; p= er){
+                                        OUT(out, rbuf, r-rbuf);
+                                        r = rbuf;
+                                }
+                        }
+                        continue;
+                }
+                *r++ = c;
+        }
+        if(r > rbuf)
+                OUT(out, rbuf, r-rbuf);
+}
+
+/*
+ * use biobuf because can use more than UTFmax bytes per rune
+ */
+void
+html_out(Rune *r, int n, long *x)
+{
+        char *s;
+        Biobuf b;
+        Rune *er;
+        
+        html_init();
+        Binit(&b, 1, OWRITE);
+        er = r+n;
+        for(; r