#!/usr/bin/env python3 """ q2n - QEC to NNTP sync This script syncs QEC logs to NNTP. * everything configurable via `Config` * has a throttler so we don't accidentally submit too much at a time * has a dry-run for submission * it remembers what has already been submitted so they don't get submitted again * TODO: - read from argv or a config file - set up a cron job - put it on tildegit (once my application issue get sorted out) """ from dataclasses import dataclass import datetime as dt import io import logging import os import pickle import pwd import random import subprocess as sp import time import typing as t _LOGGER = logging.getLogger(__name__) Path = str User = str NntpArticleBody = str LogEntryHash = str @dataclass class Config: listing_dir: str listing_filename: str nntp_group: str nntp_server: str max_submission: int submission_store_dir: Path @classmethod def create(cls): return Config( listing_dir="/var/gopher/", listing_filename="listing.gophermap", nntp_server="localhost", # TODO: find more appropriate one nntp_group="cosmic.worldbuilding", max_submission=5, submission_store_dir="/var/tmp/q2n", ) @dataclass class Ship: name: str owner: User @dataclass class LogEntry: ship: Ship author: User title: str file_name: str class LogIterator(t.Protocol): def __call__(self) -> t.List[LogEntry]: ... class SubmitCondition(t.Protocol): def __call__(self, log_entry: LogEntry) -> bool: ... class LogSubmitter(t.Protocol): def __call__(self, log: LogEntry) -> None: ... @dataclass class Utils: config: Config def ship_owner(self, ship_name: str) -> User: return self._get_path_user( f"{self.config.listing_dir}/{ship_name}" ) def read_log_content(self, log: LogEntry) -> str: return self._read_log_entry( f"{self.config.listing_dir}/{log.ship.name}/{log.file_name}" ) @staticmethod def _read_log_entry(path: str) -> str: with open(path, "r", encoding="utf-8") as f: return f.read() @staticmethod def _get_path_user(fp: str) -> User: st = os.stat(fp) return pwd.getpwuid(st.st_uid).pw_name @dataclass class SubmittedLogsStore: store_dir: str def __post_init__(self): import subprocess as sp sp.check_call( f"mkdir -p {self.store_dir}", shell=True ) def record_submission(self, log: LogEntry): with open(f"{self.store_dir}/{self.checksum(log)}", "wb") as f: pickle.dump(log, f) def load_submitted_logs(self) -> t.List[LogEntryHash]: return os.listdir(self.store_dir) @staticmethod def checksum(log: LogEntry) -> LogEntryHash: import hashlib checked_str = f"{log.ship.name}{log.file_name}" return hashlib.md5(checked_str.encode("utf-8")).hexdigest() # Throttles log entries to submit. Just in case there's a bug. # Usually we'd limit logs to submit to a small number, and maybe also # send out some alert. SubmissionThrottle = t.Callable[[t.List[LogEntry]], t.List[LogEntry]] @dataclass class ListingFileLogIterator(LogIterator): listing_dir: str listing_filename: str utils: Utils def __call__(self) -> t.List[LogEntry]: with open( f"{self.listing_dir}/{self.listing_filename}", "r", encoding="utf-8" ) as f: entries = f.readlines() return [self._parse(ent) for ent in entries] def _parse(self, entry: str) -> LogEntry: """Parse a listing file entry into a `LogEntry` An entry looks like this: 0betsy - About QEC /betsy/qec.txt I.e. 0<ship> - <title><TAB><file_path> Note: * <file_path> is rooted at /var/gohper, i.e., where the listing file resides. """ import re res = re.match(r"^0(.+?) - (.+)\t(.+)$", entry) if not res: raise ValueError(f"Cannot parse: {entry}") # It's more robust to use the file path (/ship/fn.txt) to obtain ship's # name, rather than res.group(1). This is b/c there're duplicated # entries in the listing: # 0Polonia - 24131 /Polonia-II/24131.txt # 0Polonia-II - 24131 /Polonia-II/24131.txt title = res.group(2) log_path = res.group(3) ship, log_fn = self._parse_log_file_name(log_path) ship_owner = self.utils.ship_owner(ship) return LogEntry( ship=Ship(name=ship, owner=ship_owner), author=ship_owner, title=title, file_name=log_fn, ) @staticmethod def _parse_log_file_name(ship_and_file: str) -> t.Tuple[str, str]: "/<ship>/file.txt -> (<ship>, file.txt)" return t.cast( t.Tuple[str, str], tuple(x for x in ship_and_file.split("/") if x), ) @dataclass class SubmitConditionImpl(SubmitCondition): submission_store: SubmittedLogsStore def __call__(self, log_entry: LogEntry) -> bool: return ( self.submission_store.checksum(log_entry) not in self.submission_store.load_submitted_logs() ) @dataclass class NntpLogSubmitter(LogSubmitter): @dataclass class NntpLogFormat: subject: str body: str from_: str submission_store: SubmittedLogsStore read_log_entry: t.Callable[[LogEntry], NntpArticleBody] nntp_group: str nntp_server: str dry_run: bool = False def __call__(self, log: LogEntry) -> None: self.nntp_submit(log) self.submission_store.record_submission(log) def add_envelope(self, article: str, log: LogEntry) -> str: return f"""\ TIMESTAMP: {int(time.time())} SGT AUTHOR: {log.author} ORIGINATING SHIP: {log.ship.name} QEC GATEWAY: QG-{random.randint(0, 31)} {article} """ def nntp_submit(self, log: LogEntry) -> None: import nntplib as nn s = nn.NNTP(self.nntp_server, readermode=True) article_body = self.read_log_entry(log) article_body = self.add_envelope(article_body, log) msg = f"""\ Newsgroups: {self.nntp_group} Subject: [QEC] {log.title} From: {log.author} "{log.author}@cosmic.voyage" {article_body} """ f = io.BytesIO(msg.encode("utf-8")) f.seek(0) _LOGGER.info(f"About to submit log:\n{msg}") if not self.dry_run: s.post(f) @dataclass class SubmissionThrottler: max_submission: int def __call__(self, logs: t.List[LogEntry]) -> t.List[LogEntry]: return logs[0:self.max_submission] def main(): logging.basicConfig() logging.root.setLevel(logging.INFO) config = Config.create() _LOGGER.info(f"Running with config: {config}") utils = Utils(config=config) iterate_logs = ListingFileLogIterator( listing_dir=config.listing_dir, listing_filename=config.listing_filename, utils=utils, ) throttler = SubmissionThrottler(config.max_submission) submission_store = SubmittedLogsStore(store_dir=config.submission_store_dir) should_submit = SubmitConditionImpl(submission_store=submission_store) submit_log = NntpLogSubmitter( submission_store=submission_store, read_log_entry=utils.read_log_content, nntp_group=config.nntp_group, nntp_server=config.nntp_server, dry_run=True, # TODO remove ) logs_to_submit = [log for log in iterate_logs() if should_submit(log)] ### # FOR TEST: remove - randomly choose one log ### logs_to_submit = logs_to_submit[random.randint(0, len(logs_to_submit)-2):][0:] logs_to_submit = throttler(logs_to_submit) _LOGGER.info(f"Submitting {len(logs_to_submit)} logs...") for log in logs_to_submit: submit_log(log) if __name__ == "__main__": main()