;One Billion Row Challenge - Implement a silly and trivial programming challenge. ;Copyright (C) 2024 Prince Trippy <programmer@verisimilitudes.net>. ;This program is free software: you can redistribute it and/or modify it under the terms of the ;GNU Affero General Public License version 3 as published by the Free Software Foundation. ;This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without ;even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ;See the GNU Affero General Public License for more details. ;You should have received a copy of the GNU Affero General Public License along with this program. ;If not, see <http://www.gnu.org/licenses/>. (defun billion-row-challenge (pathname &optional (stream *standard-output*) &aux (*standard-output* stream) (set (make-array 10000 :element-type 'string :fill-pointer 0)) (hash-table (make-hash-table :test 'equal :size 11000))) "Process one billion lines, each containing a semicolon-separated station name and number. These station names are sorted and displayed in-order alongside minimums, maximums, and averages. If *FEATURES* contains a symbol :CAREFUL then interactive line checking and correction will be done. A hash-table containing the same values is returned." (symbol-macrolet ((minimum (aref elt 0)) (maximum (aref elt 1)) (sum (aref elt 2)) (total (aref elt 3))) (with-standard-io-syntax (let (*read-eval*) (with-open-file (*standard-input* pathname) (loop (let* ((read-line (read-line *standard-input* nil)) (position #0=(position #\; read-line :test 'char= :from-end t))) (unless read-line (return)) #+careful (loop (if position (return)) (restart-case (error "Invalid line: ~A" read-line) (store-value (use-value) :report "Provide a new line." :interactive (lambda () (list (read-line *query-io*))) (setq read-line use-value position #0#)))) (let* ((namestring (make-array position :element-type 'character :displaced-to read-line :displaced-index-offset 0)) (number (read-from-string read-line nil nil :start (1+ position))) (elt #1=(gethash namestring hash-table))) #+careful (loop (if (numberp number) (return)) (restart-case (error "Invalid line number: ~A" number) (store-value (use-value) :report "Provide a new number." :interactive (lambda () (list (read *query-io*))) (setq number use-value)))) (unless elt (setf elt (make-array 4 :element-type 'number :initial-contents (list number number 0 0))) (vector-push namestring set)) (setf minimum (min minimum number) maximum (max maximum number) sum (+ sum number) total (1+ total) #1# elt))))))) (sort set 'string<) (write-char #\{) (prog (nth elt (count -1)) :start (if (= (incf count) (length set)) (return)) (setf nth (aref set count) elt (gethash nth hash-table)) (format t "~A=~,1F/~,1F/~,1F~@[, ~]" nth minimum (/ sum total) maximum (/= (1+ count) (length set))) (go :start)) (write-char #\}) hash-table))