;One Billion Row Challenge - Implement a silly and trivial programming challenge.
;Copyright (C) 2024 Prince Trippy <programmer@verisimilitudes.net>.

;This program is free software: you can redistribute it and/or modify it under the terms of the
;GNU Affero General Public License version 3 as published by the Free Software Foundation.

;This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
;even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
;See the GNU Affero General Public License for more details.

;You should have received a copy of the GNU Affero General Public License along with this program.
;If not, see <http://www.gnu.org/licenses/>.

(defun billion-row-challenge (pathname &optional (stream *standard-output*)
                              &aux (*standard-output* stream)
                                   (set (make-array 10000 :element-type 'string :fill-pointer 0))
                                (hash-table (make-hash-table :test 'equal :size 11000)))
  "Process one billion lines, each containing a semicolon-separated station name and number.
   These station names are sorted and displayed in-order alongside minimums, maximums, and averages.
If *FEATURES* contains a symbol :CAREFUL then interactive line checking and correction will be done.
A hash-table containing the same values is returned."
  (symbol-macrolet ((minimum (aref elt 0)) (maximum (aref elt 1))
                    (sum (aref elt 2)) (total (aref elt 3)))
    (with-standard-io-syntax
      (let (*read-eval*)
        (with-open-file (*standard-input* pathname)
          (loop (let* ((read-line (read-line *standard-input* nil))
                       (position #0=(position #\; read-line :test 'char= :from-end t)))
                  (unless read-line (return))
                  #+careful (loop (if position (return))
                               (restart-case (error "Invalid line: ~A" read-line)
                                 (store-value (use-value) :report "Provide a new line."
                                              :interactive (lambda () (list (read-line *query-io*)))
                                              (setq read-line use-value position #0#))))
                  (let* ((namestring (make-array position :element-type 'character
                                                 :displaced-to read-line :displaced-index-offset 0))
                         (number (read-from-string read-line nil nil :start (1+ position)))
                         (elt #1=(gethash namestring hash-table)))
                    #+careful (loop (if (numberp number) (return))
                                 (restart-case (error "Invalid line number: ~A" number)
                                   (store-value (use-value) :report "Provide a new number."
                                                :interactive (lambda () (list (read *query-io*)))
                                                (setq number use-value))))
                    (unless elt
                      (setf elt (make-array 4 :element-type 'number
                                            :initial-contents (list number number 0 0)))
                      (vector-push namestring set))
                    (setf minimum (min minimum number)
                          maximum (max maximum number)
                          sum (+ sum number)
                          total (1+ total)
                          #1# elt)))))))
    (sort set 'string<)
    (write-char #\{)
    (prog (nth elt (count -1)) :start
          (if (= (incf count) (length set)) (return))
          (setf nth (aref set count) elt (gethash nth hash-table))
          (format t "~A=~,1F/~,1F/~,1F~@[, ~]"
                  nth minimum (/ sum total) maximum (/= (1+ count) (length set)))
          (go :start))
    (write-char #\})
    hash-table))