<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
<meta http-equiv=Content-Type content="text/html; charset=utf8">
<title>/usr/web/sources/contrib/cross/field.c - Plan 9 from Bell Labs</title>
<!-- THIS FILE IS AUTOMATICALLY GENERATED. -->
<!-- EDIT sources.tr INSTEAD. -->
</meta>
</head>
<body>
<p style="margin-top: 0; margin-bottom: 0.17in"></p>
<p style="line-height: 1.2em; margin-left: 1.00in; text-indent: 0.00in; margin-right: 1.00in; margin-top: 0; margin-bottom: 0; text-align: center;">
<span style="font-size: 10pt"><a href="/plan9/">Plan 9 from Bell Labs</a>&rsquo;s /usr/web/sources/contrib/cross/field.c</span></p>
<p style="margin-top: 0; margin-bottom: 0.17in"></p>
<p style="margin-top: 0; margin-bottom: 0.17in"></p>
<center><font size=-1>
Copyright © 2009 Alcatel-Lucent.<br />
Distributed under the
<a href="/plan9/license.html">Lucent Public License version 1.02</a>.
<br />
<a href="/plan9/download.html">Download the Plan 9 distribution.</a>
</font>
</center>
<p style="margin-top: 0; margin-bottom: 0.17in"></p>
<table width="100%" cellspacing=0 border=0><tr><td align="center">
<table cellspacing=0 cellpadding=5 bgcolor="#eeeeff"><tr><td align="left">
<pre>
<!-- END HEADER -->
#include &lt;u.h&gt;
#include &lt;libc.h&gt;
#include &lt;bio.h&gt;
#include &lt;ctype.h&gt;
#include &lt;regexp.h&gt;

typedef struct Range Range;
typedef struct Slice Slice;
typedef struct Slices Slices;
typedef struct Token Token;

struct Range {
	int begin;
	int end;
};

struct Slice {
	char *begin;
	char *end;
};
#pragma varargck type "S" Slice

struct Slices {
	uint len;
	uint size;
	Slice *slices;
};

struct Token {
	int type;
	Slice slice;
};

enum {
	NF = 0x7FFFFFFF
};

Biobuf bin;
Biobuf bout;

int guesscollapse(const char *sep);
int Sfmt(Fmt *f);
Slice lex(char **sp);
Slice next(char **sp);
Slice peek(void);
void extend(Slice *slice, char **sp);
int tiseof(Slice *tok);
int tisdelim(Slice *tok);
int tisspace(Slice *tok);
int parseranges(char *src, Range **rv);
Range parserange(char **sp);
int stoi(Slice slice);
int parsenum(char **s);
void process(Biobuf *b, int rc, Range *rv, Reprog *delim, char *sep, int collapse);
void pprefix(char *prefix);
uint split(char *line, Reprog *delim, Slices *ss, int collapse);
void reset(Slices *ss);
void append(Slices *ss, char *begin, char *end);
void usage(void);

void
main(int argc, char *argv[])
{
	Range *rv;
	char *filename, *insep, *outsep;
	Reprog *delim;
	int rc, collapse, eflag, Eflag, oflag, zflag;

	insep = "[ \t\v\r]+";
	outsep = " ";
	Binit(&amp;bin, 0, OREAD);
	Binit(&amp;bout, 1, OWRITE);
	fmtinstall('S', Sfmt);

	zflag = 0;
	eflag = 0;
	Eflag = 0;
	oflag = 0;
	ARGBEGIN {
	case '0':
		outsep = "";
		zflag = 1;
		break;
	case 'e':
		eflag = 1;
		break;
	case 'E':
		Eflag = 1;
		break;
	case 'F':
		insep = EARGF(usage());
		break;
	case 'O':
		oflag = 1;
		outsep = EARGF(usage());
		break;
	default:
		usage();
		break;
	} ARGEND;
	if (eflag &amp;&amp; Eflag) {
		fprint(2, "flag conflict: -e and -E are mutually exclusive\n");
		usage();
	}
	if (oflag &amp;&amp; zflag) {
		fprint(2, "flag conflict: -0 and -O are mutually exclusive\n");
		usage();
	}
	if (argc &lt;= 0)
		usage();
	delim = regcomp(insep);
	if (delim == nil)
		sysfatal("bad input separator regexp '%s': %r", insep);
	rv = nil;
	rc = parseranges(*argv++, &amp;rv);
	if (rc &lt; 0)
		sysfatal("parseranges failed");
	collapse = guesscollapse(insep);
	if (eflag)
		collapse = 0;
	if (Eflag)
		collapse = 1;
	if (*argv == nil) {
		process(&amp;bin, rc, rv, delim, outsep, collapse);
	} else while ((filename = *argv++) != nil) {
		Biobuf *b;
		if (strcmp(filename, "-") == 0) {
			process(&amp;bin, rc, rv, delim, outsep, collapse);
			continue;
		}
		b = Bopen(filename, OREAD);
		if (b == nil)
			sysfatal("failure opening '%s': %r", filename);
		process(b, rc, rv, delim, outsep, collapse);
		Bterm(b);
	}

	exits(0);
}

int
guesscollapse(const char *sep)
{
	int len = utflen(sep);
	return len &gt; 1 &amp;&amp; (len != 2 || *sep != '\\');
}

int
Sfmt(Fmt *f)
{
	Slice s = va_arg(f-&gt;args, Slice);
	if (s.begin == nil || s.end == nil)
		return 0;
	return fmtprint(f, "%.*s", s.end - s.begin, s.begin);
}

/*
 * The field selection syntax is:
 *
 * fields := range [[delim] fields]
 * range := field | NUM '-' [field]
 * field := NUM | 'NF'
 * delim := ws+ | '|' | ','
 * ws := c such that `isspace(c)` is true.
 */
Slice
lex(char **sp)
{
	char *s;
	Slice slice;

	memset(&amp;slice, 0, sizeof(slice));
	s = *sp;
	slice.begin = s;
	while (isspace(*s))
		s++;
	if (s == *sp) {
		switch (*s) {
		case '\0':
			slice.begin = nil;
			break;
		case '-':
			s++;
			break;
		case 'N':
			if (*++s == 'F')
				s++;
			break;
		case ',':
		case '|':
			s++;
			break;
		default:
			if (!isdigit(*s))
				sysfatal("lexical error, c = %c", *s);
			while (isdigit(*s))
				s++;
			break;
		}
	}
	slice.end = s;
	*sp = s;

	return slice;
}

Slice current;

Slice
peek()
{
	return current;
}

Slice
next(char **sp)
{
	Slice tok = peek();
	current = lex(sp);
	return tok;
}

void
extend(Slice *slice, char **sp)
{
	Slice tok = next(sp);
	slice-&gt;end = tok.end;
}

int
stoi(Slice slice)
{
	char *s;
	int n = 0, sign = 1;

	s = slice.begin;
	if (*s == '-') {
		sign = -1;
		s++;
	}
	for (; s != slice.end; s++) {
		if (!isdigit(*s))
			sysfatal("stoi: bad number in '%S', c = %c", slice, *s);
		n = n * 10 + (*s - '0');
	}

	return sign * n;
}

int
tiseof(Slice *tok)
{
	return tok == nil || tok-&gt;begin == nil;
}

int
tisdelim(Slice *tok)
{
	return tiseof(tok) || tisspace(tok) || *tok-&gt;begin == ',' || *tok-&gt;begin == '|';
}

int
tisspace(Slice *tok)
{
	return !tiseof(tok) &amp;&amp; isspace(*tok-&gt;begin);
}

int
parseranges(char *src, Range **rv)
{
	char *s;
	Range *rs, *t;
	int n, m;
	Slice tok;

	rs = nil;
	m = 0;
	n = 0;
	s = src;
	if (s == nil || *s == '\0')
		return -1;
	next(&amp;s);
	do {
		tok = peek();
		while (tisspace(&amp;tok))
			tok = next(&amp;s);
		Range r = parserange(&amp;s);
		if (n &gt;= m) {
			m = 2*m;
			if (m == 0)
				m = 1;
			t = realloc(rs, sizeof(Range) * m);
			if (t == nil)
				sysfatal("realloc failed parsing ranges");
			rs = t;
		}
		rs[n++] = r;
 		tok = next(&amp;s);
		if (!tisdelim(&amp;tok))
			sysfatal("syntax error in field list");
	} while (!tiseof(&amp;tok));
	*rv = rs;

	return n;
}

int
tokeq(Slice *tok, const char *s)
{
	return !tiseof(tok) &amp;&amp; !strncmp(tok-&gt;begin, s, tok-&gt;end - tok-&gt;begin);
}

Range
parserange(char **sp)
{
	Range range;
	Slice tok;

	range.begin = range.end = NF;
	tok = peek();
	if (tokeq(&amp;tok, "NF")) {
		next(sp);
		return range;
	}
	range.begin = range.end = parsenum(sp);
	tok = peek();
	if (tokeq(&amp;tok, "-")) {
		next(sp);
		range.end = NF;
		tok = peek();
		if (tokeq(&amp;tok, "NF")) {
			next(sp);
			return range;
		}
		if (!tiseof(&amp;tok) &amp;&amp; !tisdelim(&amp;tok))
			range.end = parsenum(sp);
	}
	return range;
}

int
parsenum(char **sp)
{
	Slice tok;

	tok = next(sp);
	if (tiseof(&amp;tok))
		sysfatal("EOF in number parser");
	if (isdigit(*tok.begin))
		return stoi(tok);
	if (*tok.begin != '-')
		sysfatal("number parse error: unexpected '%S'", tok);
	extend(&amp;tok, sp);
	if (!isdigit(*(tok.begin + 1)))
		sysfatal("negative number parse error: unspected '%S'", tok);
	return stoi(tok);
}

void
process(Biobuf *b, int rc, Range *rv, Reprog *delim, char *outsep, int collapse)
{
	char *line, *prefix;
	const int nulldelim = 1;
	Slice *s;
	Slices ss;

	memset(&amp;ss, 0, sizeof(ss));
	while ((line = Brdstr(b, '\n', nulldelim)) != 0) {
		int printed = 0;
		uint nfields = split(line, delim, &amp;ss, collapse);
		s = ss.slices;
		prefix = nil;
		for (int k = 0; k &lt; rc; k++) {
			int begin = rv[k].begin;
			int end = rv[k].end;
			if (begin == 0) {
				pprefix(prefix);
				prefix = outsep;
				Bprint(&amp;bout, "%s", line);
				printed = 1;
				begin = 1;
			}
			if (begin == NF)
				begin = nfields;
			if (begin &lt; 0)
				begin += nfields + 1;
			begin--;
			if (end &lt; 0)
				end += nfields + 1;
			if (begin &lt; 0 || end &lt; 0 || end &lt; begin || nfields &lt; begin)
				continue;
			for (int f = begin; f &lt; end &amp;&amp; f &lt; nfields; f++) {
				pprefix(prefix);
				prefix = outsep;
				Bprint(&amp;bout, "%S", s[f]);
				printed = 1;
			}
		}
		if (rc != 0 &amp;&amp; (printed || !collapse))
			Bputc(&amp;bout, '\n');
		free(line);
	}
	free(ss.slices);
}

void
pprefix(char *prefix)
{
	if (prefix == nil)
		return;
	if (*prefix == '\0')
		Bputc(&amp;bout, '\0');
	else
		Bprint(&amp;bout, "%s", prefix);
}

void
reset(Slices *ss)
{
	ss-&gt;len = 0;
}

uint
split(char *line, Reprog *delim, Slices *ss, int collapse)
{
	char *s, *b, *e;
	Resub match[1];

	memset(match, 0, sizeof(match));
	reset(ss);
	b = nil;
	e = nil;
	s = line;
	while (regexec(delim, s, match, nelem(match))) {
		b = s;
		e = match[0].sp;
		s = match[0].ep;
		memset(match, 0, sizeof(match));
		if (collapse &amp;&amp; (e == line || b == e))
			continue;
		append(ss, b, e);
	}
	b = s;
	e = b + strlen(s);
	if (!collapse || b != e)
		append(ss, b, e);

	return ss-&gt;len;
}

void
append(Slices *ss, char *begin, char *end)
{
	if (ss-&gt;len &gt;= ss-&gt;size) {
		Slice *s;
		ss-&gt;size *= 2;
		if (ss-&gt;size == 0)
			ss-&gt;size = 1;
		s = realloc(ss-&gt;slices, ss-&gt;size * sizeof(Slice));
		if (s == nil)
			sysfatal("malloc failed appending slice: %r");
		ss-&gt;slices = s;
	}
	ss-&gt;slices[ss-&gt;len].begin = begin;
	ss-&gt;slices[ss-&gt;len++].end = end;
}

void
usage()
{
	sysfatal("usage: field [ -E | -e ] [ -F regexp ] [ -0 | -O delimiter ] &lt;field list&gt; [file...]");
}
<!-- BEGIN TAIL -->
</pre>
</td></tr></table>
</td></tr></table>
<p style="margin-top: 0; margin-bottom: 0.17in"></p>
<p style="line-height: 1.2em; margin-left: 1.00in; text-indent: 0.00in; margin-right: 1.00in; margin-top: 0; margin-bottom: 0; text-align: center;">
<span style="font-size: 10pt"></span></p>
<p style="margin-top: 0; margin-bottom: 0.50in"></p>
<p style="margin-top: 0; margin-bottom: 0.33in"></p>
<center><table border="0"><tr>
<td valign="middle"><a href="http://www.alcatel-lucent.com/"><img border="0" src="/plan9/img/logo_ft.gif" alt="Bell Labs" />
</a></td>
<td valign="middle"><a href="http://www.opensource.org"><img border="0" alt="OSI certified" src="/plan9/img/osi-certified-60x50.gif" />
</a></td>
<td><img style="padding-right: 45px;" alt="Powered by Plan 9" src="/plan9/img/power36.gif" />
</td>
</tr></table></center>
<p style="margin-top: 0; margin-bottom: 0.17in"></p>
<center>
<span style="font-size: 10pt">(<a href="/plan9/">Return to Plan 9 Home Page</a>)</span>
</center>
<p style="margin-top: 0; margin-bottom: 0.17in"></p>
<center><font size=-1>
<span style="font-size: 10pt"><a href="http://www.lucent.com/copyright.html">Copyright</a></span>
<span style="font-size: 10pt">© 2009 Alcatel-Lucent.</span>
<span style="font-size: 10pt">All Rights Reserved.</span>
<br />
<span style="font-size: 10pt">Comments to</span>
<span style="font-size: 10pt"><a href="mailto:webmaster@plan9.bell-labs.com">webmaster@plan9.bell-labs.com</a>.</span>
</font></center>
</body>
</html>