lawn-hark: Parallelization using xargs(1). - gopher-lawn - The gopher lawn gopher directory project.
git clone git://bitreich.org/gopher-lawn/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/gopher-lawn/
Log
Files
Refs
Tags
---
commit e3cc5279072ddbee7f2d58152732c759a9ede582
parent b9a49018389fa0126149e3cbef3ecdef4defe6ef
Author: Julian Schweinsberg 
Date:   Tue, 19 Dec 2023 20:25:02 +0100

lawn-hark: Parallelization using xargs(1).

Signed-off-by: Christoph Lohmann <20h@r-36.net>

Diffstat:
  M lawn-hark/lawn-hark.sh              |      73 +++++++++++++++++--------------

1 file changed, 41 insertions(+), 32 deletions(-)
---
diff --git a/lawn-hark/lawn-hark.sh b/lawn-hark/lawn-hark.sh
@@ -5,6 +5,7 @@
 
 errorthreshold=3
 timeout=15
+maxworkers=8
 onionsocksproxy="127.0.0.1:9050"
 
 function tcpdial() {
@@ -95,26 +96,14 @@ function checkraw() {
         return 0
 }
 
-checktime="$(date +%s)" 
-statedir="$1"
+program="$(readlink -f "$0")"
 
-if [ -z "${statedir}" ];
+if [ "${LAWNHARK_WORKER}" = "1" ];
 then
-        printf "You need to specify a state dir.\n" >&2
-        exit 1
-fi
-
-mkdir -p "${statedir}"
-if [ ! -d "${statedir}" ];
-then
-        printf "%s is not a directory! Aborting.\n" "${statedir}" >&2
-        exit 1
-fi
+        statedir="$1"
+        checktime="$2"
+        f="$3"
 
-shift
-
-for f;
-do
         type=""
         selector=""
         host=""
@@ -145,12 +134,10 @@ do
                 || [ -z "${host}" ] \
                 || [ -z "${port}" ];
         then
-                printf "ERROR\t%s\tInvalid entry!\n" "${f}" >&2
-                continue
+                flock -x "${program}" printf "ERROR\t%s\tInvalid entry!\n" "${f}" >&2
+                exit
         fi
 
-        #printf "DEBUG\t%s\tchecking\t%s\t%s\t%s\t%s\n" "${f}" "${type}" "${selector}" "${host}" "${port}"
-
         case "${type}" in
         cso )
                 error="$(checkcso "${host}" "${port}")"
@@ -182,8 +169,8 @@ do
                                 error="$(checkraw "${sshhost}" "${sshport}")"
                                 ;;
                         * )
-                                printf "TODO\t%s\tCan't handle %s\n" "${f}" "${url}"
-                                continue
+                                flock -x "${program}" printf "TODO\t%s\tCan't handle %s\n" "${f}" "${url}" >&2
+                                exit
                                 ;;
                         esac
                 else
@@ -215,20 +202,42 @@ do
 
         if [ ${errorcount} -ge ${errorthreshold} ];
         then
-                printf "ERROR\t%s\t%s\n" "${f}" "${error}" >&2
+                flock -x "${program}" printf "ERROR\t%s\t%s\n" "${f}" "${error}" >&2
         fi
 
         printf "%s\t%s\n" "${checktime}" "${errorcount}" > "${statefile}"
-done
+else
+        checktime="$(date +%s)" 
+        statedir="$1"
 
-# garbage collection
-find "${statedir}" -type f | while read -r f;
-do
-        IFS="        " read -r lastcheck errorcount < "${f}"
+        if [ -z "${statedir}" ];
+        then
+                printf "You need to specify a state dir.\n" >&2
+                exit 1
+        fi
 
-        if [ ${lastcheck} -ne ${checktime} ];
+        mkdir -p "${statedir}"
+        if [ ! -d "${statedir}" ];
         then
-                rm -f "${f}"
+                printf "%s is not a directory! Aborting.\n" "${statedir}" >&2
+                exit 1
         fi
-done
 
+        shift
+
+        for f;
+        do
+                printf "%s\0" "${f}"
+        done | LAWNHARK_WORKER=1 xargs -r -0 -P "${maxworkers}" -L1 "${program}" "${statedir}" "${checktime}"
+
+        # garbage collection
+        find "${statedir}" -type f | while read -r f;
+        do
+                IFS="        " read -r lastcheck errorcount < "${f}"
+
+                if [ ${lastcheck} -ne ${checktime} ];
+                then
+                        rm -f "${f}"
+                fi
+        done
+fi