README: describe how to add new parsed tags and fields to sfeed.c - sfeed - RSS and Atom parser
git clone git://git.codemadness.org/sfeed
Log
Files
Refs
README
LICENSE
---
commit aeb1398411ce245fa7982365640f7852d63b3d52
parent dbb7f7b66d2d10a4bf14a404b66fa20cbf8a02ca
Author: Hiltjo Posthuma 
Date:   Sat,  4 Feb 2023 12:34:56 +0100

README: describe how to add new parsed tags and fields to sfeed.c

Diffstat:
  M README                              |      90 +++++++++++++++++++++++++++++++

1 file changed, 90 insertions(+), 0 deletions(-)
---
diff --git a/README b/README
@@ -1070,6 +1070,96 @@ file:
 
 - - -
 
+sfeed.c: adding new XML tags or sfeed(5) fields to the parser
+-------------------------------------------------------------
+
+sfeed.c contains definitions to parse XML tags and map them to sfeed(5) TSV
+fields. Parsed RSS and Atom tag names are first stored as a TagId, which is a
+number.  This TagId is then mapped to the output field index.
+
+* Add a new TagId enum for the tag.
+
+* (optional) Add a new FeedField* enum for the new output field or you can map
+  it to an existing field.
+
+* Add the new XML tag name to the array variable of parsed RSS or Atom
+  tags: rsstags[] or atomtags[].
+
+  These must be defined in alphabetical order, because a binary search is used
+  which uses the strcasecmp() function.
+
+* Add the parsed TagId to the output field in the array variable fieldmap[].
+
+  When another tag is also mapped to the same output field then the tag with
+  the highest TagId number value overrides the mapped field: the order is from
+  least important to high.
+
+* If this defined tag is just using the inner data of the XML tag, then this
+  definition is enough. If it for example has to parse a certain attribute you
+  have to add a check for the TagId to the xmlattr() callback function.
+
+* (optional) Print the new field in the printfields() function.
+
+Below is a patch example to add the MRSS "media:content" field as a new field:
+
+diff --git a/sfeed.c b/sfeed.c
+--- a/sfeed.c
++++ b/sfeed.c
+@@ -50,7 +50,7 @@ enum TagId {
+         RSSTagGuidPermalinkTrue,
+         /* must be defined after GUID, because it can be a link (isPermaLink) */
+         RSSTagLink,
+-        RSSTagEnclosure,
++        RSSTagMediaContent, RSSTagEnclosure,
+         RSSTagAuthor, RSSTagDccreator,
+         RSSTagCategory,
+         /* Atom */
+@@ -81,7 +81,7 @@ typedef struct field {
+ enum {
+         FeedFieldTime = 0, FeedFieldTitle, FeedFieldLink, FeedFieldContent,
+         FeedFieldId, FeedFieldAuthor, FeedFieldEnclosure, FeedFieldCategory,
+-        FeedFieldLast
++        FeedFieldMediaContent, FeedFieldLast
+ };
+ 
+ typedef struct feedcontext {
+@@ -137,6 +137,7 @@ static const FeedTag rsstags[] = {
+         { STRP("enclosure"),         RSSTagEnclosure         },
+         { STRP("guid"),              RSSTagGuid              },
+         { STRP("link"),              RSSTagLink              },
++        { STRP("media:content"),     RSSTagMediaContent      },
+         { STRP("media:description"), RSSTagMediaDescription  },
+         { STRP("pubdate"),           RSSTagPubdate           },
+         { STRP("title"),             RSSTagTitle             }
+@@ -180,6 +181,7 @@ static const int fieldmap[TagLast] = {
+         [RSSTagGuidPermalinkFalse] = FeedFieldId,
+         [RSSTagGuidPermalinkTrue]  = FeedFieldId, /* special-case: both a link and an id */
+         [RSSTagLink]               = FeedFieldLink,
++        [RSSTagMediaContent]       = FeedFieldMediaContent,
+         [RSSTagEnclosure]          = FeedFieldEnclosure,
+         [RSSTagAuthor]             = FeedFieldAuthor,
+         [RSSTagDccreator]          = FeedFieldAuthor,
+@@ -677,6 +679,8 @@ printfields(void)
+         string_print_uri(&ctx.fields[FeedFieldEnclosure].str);
+         putchar(FieldSeparator);
+         string_print_trimmed_multi(&ctx.fields[FeedFieldCategory].str);
++        putchar(FieldSeparator);
++        string_print_trimmed(&ctx.fields[FeedFieldMediaContent].str);
+         putchar('\n');
+ 
+         if (ferror(stdout)) /* check for errors but do not flush */
+@@ -718,7 +722,7 @@ xmlattr(XMLParser *p, const char *t, size_t tl, const char *n, size_t nl,
+         }
+ 
+         if (ctx.feedtype == FeedTypeRSS) {
+-                if (ctx.tag.id == RSSTagEnclosure &&
++                if ((ctx.tag.id == RSSTagEnclosure || ctx.tag.id == RSSTagMediaContent) &&
+                     isattr(n, nl, STRP("url"))) {
+                         string_append(&tmpstr, v, vl);
+                 } else if (ctx.tag.id == RSSTagGuid &&
+
+- - -
+
 Running custom commands inside the sfeed_curses program
 -------------------------------------------------------