;
;   oloman.lisp
;
;   michael marking <marking@tatanka.com>
;   at
;       http://www.tatanka.com/software/oloman/
;   or
;       https://www.gihub.com/wakinyantanka/oloman/
;
;   copyright 2017 michael marking; all rights reserved.
;   released under GPL 3.
;
;   THIS IS UNFINISHED CODE. PLEASE SEE THE ACCOMPANYING README FILE.
;
;   bug reports, suggestions, & questions welcome!
;
;-----------------------------------------------------------------------
;
;   package configuration
;
;       the following package configuration includes a few things
;       specific to sbcl: needs to be generalized
;
(require :sb-posix)             ; needed by sbcl for stand-alone
                                ;   executable use
(require :sb-bsd-sockets)       ; (same)

(require "asdf")

(asdf:load-system "cl-interpol")
(asdf:load-system "cl-unicode")
(asdf:load-system "cl-ppcre")
(asdf:load-system "command-line-arguments")

(defpackage         :com.tatanka.lisp-packages.oloman
    (:nicknames     :oloman)
    (:use           :cl :cl-interpol :cl-ppcre :cl-unicode :uiop
                    :command-line-arguments)
    (:export        main))

(in-package :oloman)

(defparameter *oloman-version* "0.0.3-development-snapshot") ; 2017.03.01
(defparameter *logic-error* nil)

(interpol:enable-interpol-syntax)

;-----------------------------------------------------------------------
;
;   some utility macros & functions
;
(defmacro append-char-to-string (char1 string1)
    `(setf ,string1 (concatenate 'string ,string1 (string ,char1))))

(defun list-of-chars-to-string (list-of-chars)
    (let ((new-string (make-string (length list-of-chars)))
            (offset1 0))
        (dolist (string-char list-of-chars)
            (setf (char new-string offset1) string-char)
            (incf offset1))
        (values new-string)))

;-----------------------------------------------------------------------
;
;   *parse-state*
;
;       This is the current state of the xhtml parsing operation.
;
(defparameter *parse-state*
    (list '(:state . :initial)
        ;   possible values:
        ;       :initial                neutral, starting
        ;       :left-angle-bracket     seen a left angle bracket
        ;       :left-bang              seen <!
        ;       :ampersand              seen an ampersand
        ;       :xml-declaration        in xml declaration
        ;       :doctype-declaration    in doctype declaration
        ;       :comment                in comment
        ;       :cdata                  in CDATA
        ;       :tag                    in tag (begin or empty element)
        ;       :end-tag                in end tag
        ;       :content                in content
        ;       :entity                 in a predefined entity
        ;       :syntax-error           bad syntax encountered
        '(:prior-state . nil)
        ;   This is the state we were in previously. Not strictly,
        ;   because we skip the :left-angle-bracket, :left-bang, and
        ;   :ampersand states here when transitioning from them.
        '(:tag-start-line . 0)
        '(:tag-start-position . 0)
        ;   Where the current tag began.
        '(:consecutive-dashes 0)
        ;   This is used to look for the end of a comment.
        '(:escaped-text . "")
        ;   The raw, escaped value of text beginning with ampersand.
        ))

(define-symbol-macro parse-state (cdr (assoc ':state *parse-state*)))
(define-symbol-macro prior-parse-state
                (cdr (assoc ':prior-state *parse-state*)))
(define-symbol-macro invalid-syntax (eql parse-state :syntax-error))
(define-symbol-macro tag-start-line
                (cdr (assoc ':tag-start-line *parse-state*)))
(define-symbol-macro tag-start-position
                (cdr (assoc ':tag-start-position *parse-state*)))
(define-symbol-macro consecutive-dashes
                (cdr (assoc ':consecutive-dashes *parse-state*)))
(define-symbol-macro escaped-text
                (cdr (assoc ':escaped-text *parse-state*)))

;-----------------------------------------------------------------------
;
;   elements & content; the parse tree
;
;       document-tree-node represents a node in the document parse tree
;       graph.
;
;       Every node except the root node is contained by some other node,
;       and every node contains zero or more other nodes. The root node
;       represents the xhtml document itself, and, if the input is valid
;       and well-formed, contains exactly one other node, the html
;       element.
;
;       Simple text, contained within an element, is given its own node.
;       If text-value non-nil, then contained will be nil. Such a node
;       is therefore a leaf node.
;
;       To illustrate, parsing
;
;           <a>qwerty<b>uiop</b><a>
;
;       creates the following nodes:
;
;       node 1  for the element <a>...</a>, containing nodes 2 and 3
;       node 2  the leaf node having the text-value "qwerty", containing
;                   no other nodes
;       node 3  for the element <b>...</b>, containing only node 4
;       node 4  the leaf node having text-value "uiop", containing no
;                   other nodes
;
;       Or, expressed another way,
;
;           <a>...</a>
;               "qwerty"
;               <b>...</b>
;                   "uiop"
;
;       If there were an empty element node <c ... /c>, it'd be a leaf
;       node, having nil text-value.
;

(defclass document-tree-node ()
    (
    ;
    ; type of node
    (node-type
        :initarg    :node-type
        :initform   ""
        :accessor   node-type)
    ;
    ; how deep are we within the tree?
    (level
        :initarg    :level
        :initform   0
        :accessor   level)
    ;
    ; which nodes contain, or are contained by, this node?
    (container
        :initarg    :container
        :initform   nil
        :accessor   container)
    (contained
        :initarg    :contained
        :initform   nil
        :accessor   contained)
    ;
    ;   if this is a leaf, text-only node, what is the value of the
    ;   text?
    (text-value
        :initarg    :contained
        :initform   nil
        :accessor   text-value)
    ;
    ;   the beginning line number and character position in the source
    ;   file
    (line-number
        :initarg    :line-number
        :initform   0
        :accessor   line-number)
    (character-position
        :initarg    :character-position
        :initform   0
        :accessor   character-position)
    ;
    ;   if a tag, this is the text value of the tag (that is, everything
    ;   from "<" to ">"
    (tag-value
        :initarg    :tag-value
        :initform   ""
        :accessor   tag-value)
    ;
    ;   if a tag, the tag name
    (tag-name
        :initarg    :tag-name
        :initform   nil
        :accessor   tag-name)
    ;
    ;   if content, the value after converting the escaped characters
    ;   (those beginning with ampersand) to plain text
    (content-value
        :initarg    :content-value
        :initform   nil
        :accessor   content-value)
    ;
    ;   does the tag-name include all of the name characters? (that is,
    ;   have we processed the entire name yet?)
    (tag-name-complete?
        :initarg    :tag-name-complete?
        :initform   nil
        :accessor   tag-name-complete?)
    ;
    ;   if a tag which specifies the style or class of contained
    ;   content, what is the specified style or class?
    (specified-class
        :initarg    :specified-class
        :initform   nil
        :accessor   specified-class)
    ;
    ;   some properties of this node
    (tag-node?
        :initarg    :tag-node?
        :initform   nil
        :accessor   tag-node?)
    (content-node?
        :initarg    :content-node?
        :initform   nil
        :accessor   content-node?)
    (to-be-ignored?
        :initarg    :to-be-ignored?
        :initform   nil
        :accessor   to-be-ignored?)
    (last-non-space-character
        :initarg    :last-non-space-character
        :initform   ""
        :accessor   last-non-space-character)
    (root-node?
        :initarg    :root-node?
        :initform   nil
        :accessor   root-node?)))

;-----------------------------------------------------------------------
;
;   *document-tree*
;
;       This is the tree which comes from parsing the xhtml input.
;
(defparameter *document-tree* (make-instance 'document-tree-node
    :node-type "root" :root-node? t))

(defparameter *current-node* *document-tree*)

;-----------------------------------------------------------------------
;
;   create-new-document-branch
;
;       Create a new branch from the current node of the document tree.
;
(defmacro create-new-document-branch (&rest new-node-args)
    `(let ((new-node (make-instance 'document-tree-node ,@new-node-args
                :container *current-node*
                :level (1+ (level *current-node*)))))
        (push new-node (contained *current-node*))
        (setf *current-node* new-node)
        *current-node*))

;-----------------------------------------------------------------------
;
;   descend-document-tree
;
;       Finish the current node, descending to its container, toward the
;       document root.
;
(defmacro descend-document-tree ( )
    `(let ()
        (setf *current-node* (container *current-node*))))

;-----------------------------------------------------------------------
;
;   show-syntax-error
;
(defun show-syntax-error (input-character line-number char-position)
    "display error message: bad xhtml syntax"
    (let ( )
        (format *error-output*
            "ERROR: bad xhtml syntax: character \"~a\" line ~a position ~a~%"
            input-character line-number char-position)
        (setf parse-state :syntax-error)))

;-----------------------------------------------------------------------
;
;   whitespace-character?
;
(defun whitespace-character? (test-char)
    "is argument an xml whitespace character?"
    (let ((test-value (char-code test-char)))
        (cond
            ((= test-value #x20)
                t)
            ((= test-value #x9)
                t)
            ((= test-value #xd)
                t)
            ((= test-value #xa)
                t)
            (t
                nil))))

;-----------------------------------------------------------------------
;
;   name-initial-character?
;
;       Is the argument an XML name initial character? (That is, is it
;       valid as the first character of a name?)
;
;       Assumes Unicode.
;
(defun name-initial-character? (test-char)
    "is argument a valid initial character in an xml name?"
    (let ((test-value (char-code test-char)))
        (cond
            ((= test-value (char-code #\:))
                t)
            ((and (>= test-value (char-code #\A))
                    (<= test-value (char-code #\Z)))
                t) 
            ((= test-value (char-code #\_))
                t)
            ((and (>= test-value (char-code #\a))
                    (<= test-value (char-code #\z)))
                t) 
            ((and (>= test-value #xc0) (<= test-value #xd6))
                t)
            ((and (>= test-value #xd8) (<= test-value #xf6))
                t)
            ((and (>= test-value #xf8) (<= test-value #x2ff))
                t)
            ((and (>= test-value #x370) (<= test-value #x37d))
                t)
            ((and (>= test-value #x37f) (<= test-value #x1fff))
                t)
            ((and (>= test-value #x200c) (<= test-value #x200d))
                t)
            ((and (>= test-value #x2070) (<= test-value #x2184))
                t)
            ((and (>= test-value #x2c00) (<= test-value #x2fef))
                t)
            ((and (>= test-value #x3001) (<= test-value #xd7ff))
                t)
            ((and (>= test-value #xf900) (<= test-value #x4dcf))
                t)
            ((and (>= test-value #xfdf0) (<= test-value #xfffd))
                t)
            ((and (>= test-value #x10000) (<= test-value #xeffff))
                t)
            (t
                nil))))

;-----------------------------------------------------------------------
;
;   name-character?
;
;       Is the argument an XML name character? (That is, is it valid in
;       an XML name?)
;
;       Assumes Unicode.
;
(defun name-character? (test-char)
    "is argument a valid character in an xml name?"
    (if (name-initial-character? test-char)
        t
        (let ((test-value (char-code test-char)))
            (cond
                ((= test-value (char-code #\-))
                    t)
                ((= test-value (char-code #\.))
                    t)
                ((and (>= test-value (char-code #\0))
                        (<= test-value (char-code #\9)))
                    t)
                ((= test-value #xb7)
                    t)
                ((and (>= test-value #x300) (<= test-value #x36f))
                    t)
                ((and (>= test-value #x203f) (<= test-value #x2040))
                    t)
                (t
                    nil)))))

;-----------------------------------------------------------------------
;
;   process-start-of-content
;
(defun process-start-of-content (input-char line-number char-position)
    "process the first character of a content sequence"
    (let* ((tag-value (string input-char))
            (this-node (create-new-document-branch :node-type "content"
                :tag-value tag-value :content-node? t)))
        ;
        (if (char= input-char #\&)
            ;
            ;   This first character is an ampersand, then we begin an
            ;   escaped value.
            ;
            (progn
                (setf parse-state :ampersand)
                (setf prior-parse-state :uncertain)
                (setf (content-value *current-node*) "")
                (setf escaped-text "&"))
            ;
            ;   This first character is an ordinary character, part of
            ;   the content.
            ;
            (progn
                (setf parse-state :content)
                (setf prior-parse-state :uncertain)
                (setf (content-value *current-node*)
                    (string input-char))))))

;-----------------------------------------------------------------------
;
;   process-from-initial
;
;       Process an input character beginning at the :initial state.
;
(defun process-from-initial (input-char line-number char-position)
    "we are in the :initial state; process an input character"
    (let ( )
        (cond
            ((char= input-char #\<)
                (setf tag-start-line line-number)
                (setf tag-start-position char-position)
                (setf parse-state :left-angle-bracket)
                (setf prior-parse-state :initial))
            ;
            ;   Anything outside a tag pair is markup, although there
            ;   is nothing defined which can be there except maybe
            ;   whitespace. So we simply discard it.
            ;
            ((< (level *current-node*) 1)
                nil)
            ;
            ;   The only thing left is content, so this must be content.
            ;
            (t
                (process-start-of-content input-char line-number
                    char-position)))))

;-----------------------------------------------------------------------
;
;   process-content
;
;       Process the next character of a content sequence. We are not in
;       an escape sequence before this character.
;
(defun process-content (input-char line-number char-position)
    "process the next character of a content sequence"
    (let ()
        ;
        ;   When we see the beginning of a tag, then we've gone beyond
        ;   the end of the content.
        (if (char= input-char #\<)
            (progn
                ; (format t "~%end of content: ~a~%"
                ;   (content-value *current-node*))
                (descend-document-tree)
                (setf parse-state :initial)
                (return-from process-content (process-from-initial
                    input-char line-number char-position))))
        ;
        ;   We're still in content.
        ;
        (append-char-to-string input-char (tag-value *current-node*))
        (if (char= input-char #\&)
            ;
            ;   This input character is an ampersand: we begin an
            ;   escaped value.
            ;
            (progn
                (setf parse-state :ampersand)
                (setf prior-parse-state :content)
                (setf escaped-text "&"))
            ;
            ;   This first character is an ordinary character, part of
            ;   the content.
            ;
            (progn
                (append-char-to-string input-char
                    (content-value *current-node*))))))

;-----------------------------------------------------------------------
;
;   process-from-ampersand
;
;       We are in content, and the previous character was an ampersand.
;       Process the character immediately following the ampersand.
;
(defun process-from-ampersand (input-char line-number char-position)
    "process the character immediately after an ampersand"
    (let ( )
        ;
        ;   Regardless of what it is, this input character is part of
        ;   the value of the content, and also part of the escaped
        ;   text value.
        ;
        (append-char-to-string input-char (tag-value *current-node*))
        (append-char-to-string input-char escaped-text)
        ;
        ;   Escaped text ends with a semicolon. We don't check for
        ;   mal-formed or erroneous text.
        ;
        (if (char= input-char #\;)
            ;
            ;   We've reached the end of the escaped text. Decode it,
            ;   then append to the content value.
            ;
            (let ((decoded-character nil))
                ;
                ;   In any case, we return to the :content state.
                ;
                (setf parse-state :content)
                ;
                ;   Look first for the predefined entities.
                ;
                (cond
                    ((string= escaped-text "&lt;")
                        (setf decoded-character #\<))
                    ((string= escaped-text "&gt;")
                        (setf decoded-character #\>))
                    ((string= escaped-text "&amp;")
                        (setf decoded-character #\&))
                    ((string= escaped-text "&apos;")
                        (setf decoded-character #\'))
                    ((string= escaped-text "&quot;")
                        (setf decoded-character
                            (character-named "quotation mark")))
                    (t
                        nil))
                (if decoded-character
                    (progn
                        (append-char-to-string decoded-character
                            (content-value *current-node*))
                        (return-from process-from-ampersand nil))))
; FINISHME decode numerically-defined characters
            ;
            ;   We have not yet reached the end of the escaped text. We
            ;   appended already to the escaped-text, so there is
            ;   nothing more to do.
            (progn
                ))))

;-----------------------------------------------------------------------
;
;   process-start-of-tag
;
;       Process the first character of a tag name.
;
(defun process-start-of-tag (input-char line-number char-position)
    "process the first character of a tag name"
    (let* ((tag-value (list-of-chars-to-string (list #\< input-char)))
            (this-node (create-new-document-branch :node-type "tag"
                :tag-value tag-value
                :tag-name (string input-char) :tag-node? t)))
        (setf parse-state :tag)
        ))

;-----------------------------------------------------------------------
;
;   process-start-of-end-tag
;
;       Process the first character of an end tag name.
;
(defun process-start-of-end-tag (input-char line-number char-position)
    "process the first two characters </ of an end tag "
    (let* ((tag-value (list-of-chars-to-string
                (list #\< #\/ input-char)))
            (this-node (create-new-document-branch :node-type "end tag"
                :tag-value tag-value :tag-name "" :tag-node? t)))
        (setf parse-state :end-tag)))

;-----------------------------------------------------------------------
;
;   extract-style-or-class-designation
;
;       Extract any class or style designation in the current tag node,
;       which will be applied to this tag's content.
;
(defun extract-style-or-class-designation ( )
    (let ( )
        ;
        ;   First, we look for designations of the "manpage" styles,
        ;   which are defined by this oloman project.
        ;
        (multiple-value-bind
                (whole-match array-of-captures)
                (scan-to-strings "class=\"manpage([A-Za-z0-9]+)\""
                    (tag-value *current-node*))
            (if array-of-captures
                ; if we captured one...
                (progn
                    (setf (specified-class *current-node*)
                        (aref array-of-captures 0))
                        (return-from
                            extract-style-or-class-designation))))
        ; FINISHME look for standard markup
        ))

;-----------------------------------------------------------------------
;
;   process-continuation-of-tag
;
;       Process the next character of a tag.
;
(defun process-continuation-of-tag
        (input-char line-number char-position)
    "process the next character of a tag"
    (let ()
        ; regardless of what the input character is, it becomes part of
        ; the tag value
        (append-char-to-string input-char (tag-value *current-node*))
        ; remember the last non-space character (excepting the trailing
        ; angle bracket)
        (if (and (char/= input-char #\>) (char/= input-char #\Space))
            (setf (last-non-space-character *current-node*) input-char))
        ; if we don't yet have the entire tag name, then, if it's
        ; eligible, also append the character to the name
        (if (tag-name-complete? *current-node*)
            nil
            (if (name-character? input-char)
                (append-char-to-string input-char
                        (tag-name *current-node*))
                (setf (tag-name-complete? *current-node*) t)))
        ; when we see the right angle bracket, then we've reached the
        ; end of the tag
        (if (char= input-char #\>)
            (progn
                ; (format t "~%end of tag ~a~%" (tag-name *current-node*))
                (extract-style-or-class-designation)
                (if (char= (last-non-space-character *current-node*) #\/)
                    (descend-document-tree))
                (setf parse-state :initial)))))

;-----------------------------------------------------------------------
;
;   process-continuation-of-end-tag
;
;       Process the next character of an end tag.
;
(defun process-continuation-of-end-tag
        (input-char line-number char-position)
    "process the next character of an end tag"
    (let ()
        ; regardless of what the input character is, it becomes part of
        ; the tag value
        (append-char-to-string input-char (tag-value *current-node*))
        ; if we don't yet have the entire tag name, then, if it's
        ; eligible, also append the character to the name
        (if (tag-name-complete? *current-node*)
            nil
            (if (name-character? input-char)
                (append-char-to-string input-char
                        (tag-name *current-node*))
                (setf (tag-name-complete? *current-node*) t)))
        ; when we see the right angle bracket, then we've reached the
        ; end of the tag
        (if (char= input-char #\>)
            (progn
                ;(format t "~%end of end tag ~a~%" (tag-name *current-node*))
                (descend-document-tree)
                (descend-document-tree)
                (setf parse-state :initial)))))

;-----------------------------------------------------------------------
;
;   process-continuation-of-comment
;
;       Process the next character of a comment.
;
(defun process-continuation-of-comment
        (input-char line-number char-position)
    "process the next character of a comment"
    (let ()
        ; regardless of what the input character is, it becomes part of
        ; the comment value
        (append-char-to-string input-char (tag-value *current-node*))
        ; update the number of consecutive 
        (if (char= input-char #\-)
            (incf consecutive-dashes)
            (if (and (char= input-char #\>) (>= consecutive-dashes 2))
                nil
                (setf consecutive-dashes 0)))
            ; FIXME if the input is invalid, we can begin a comment with
            ; <!- (only one dash!) and get away with it



        ; when we see -->, then we've reached the end of the comment
        (if (and (char= input-char #\>) (>= consecutive-dashes 2))
            (progn
                ; (format t "~%end of comment~%")
                ; (descend-document-tree)
                (setf parse-state :initial)))))

;-----------------------------------------------------------------------
;
;   process-from-left-angle-bracket
;
;       Process an input character beginning at the :left-angle-bracket
;       state.
;
;       There are three valid possibilities:
;           <x ... >            start tag or empty element tag
;           </x ... >           end tag
;           <? ... ?>           xml declaration
;           <!-- ... -->        comment
;           <!doctype ... >     doctype declaration
;       Everything else is a syntax error.
;
(defun process-from-left-angle-bracket
        (input-char line-number char-position)
    "process the character following a left angle bracket"
    (cond
        ((char= input-char #\!)
            (setf parse-state :left-bang))
        ((char= input-char #\?)
            (setf parse-state :xml-declaration))
        ((char= input-char #\/)
            (process-start-of-end-tag input-char line-number
                char-position))
        ((name-initial-character? input-char)
            (process-start-of-tag input-char line-number
                char-position))
        (t
            (show-syntax-error input-char line-number
                char-position))))

;-----------------------------------------------------------------------
;
;   process-from-left-bang
;
;       Process the first character after the sequence "<!". We could
;       be beginning a comment or a doctype declaration, or maybe just
;       something invalid.
;
(defun process-from-left-bang
        (input-char line-number char-position)
    "process the character following <!"
    (cond
        ((or (char= input-char #\d) (char= input-char #\D))
            ;   Unless this is invalid xhtml, the "d" is the beginning
            ;   of the word "doctype". We'll assume it's valid.
            (setf parse-state :doctype-declaration))
        ((char= input-char #\-)
            ;   Unless this is invalid xhtml, this dash is followed by
            ;   another, and we begin a comment.
            (setf consecutive-dashes -1)
            (setf parse-state :comment))
        (t
            (show-syntax-error input-char line-number
                char-position))))

;-----------------------------------------------------------------------
;
;   process-doctype-declaration
;
;       We're processing a doctype declaration, <!doctype ... >. We
;       ignore the contents. We continue until we get to the closing
;       right angle bracket.
;
(defun process-doctype-declaration
        (input-char line-number char-position)
    "process a doctype declaration (look for end, ignore all else)"
    (cond
        ((char= input-char #\>)
            ; end of the doctype declaration
            ; (format t "~%end of doctype declaration~%")
            (setf parse-state :initial))
        (t
            nil)))

;-----------------------------------------------------------------------
;
;   process-input-char
;
;       Process a single character from the xhtml input file.
;
;       We don't do error checking: looking for bad syntax, invalid
;       characters, and so on. If it's a problem, we might catch it, but
;       mostly we just assume the input is well-formed and valid. There
;       is, however, a little sanity checking to verify our own logic.
;
(defun process-input-char (input-char line-number char-position)
    "process a character from the xhtml input file"
    (let ( )
        ; (format t " ~a " input-char)
        (cond
            ((eql parse-state :initial)
                (process-from-initial input-char line-number
                    char-position))
            ((eql parse-state :left-angle-bracket)
                (process-from-left-angle-bracket input-char line-number
                    char-position))
            ((eql parse-state :left-bang)
                (process-from-left-bang input-char line-number
                    char-position))
            ((eql parse-state :doctype-declaration)
                (process-doctype-declaration input-char line-number
                    char-position))
            ((eql parse-state :tag)
                (process-continuation-of-tag input-char line-number
                    char-position))
            ((eql parse-state :end-tag)
                (process-continuation-of-end-tag input-char line-number
                    char-position))
            ((eql parse-state :comment)
                (process-continuation-of-comment input-char line-number
                    char-position))
            ((eql parse-state :content)
                (process-content input-char line-number char-position))
            ((eql parse-state :ampersand)
                (process-from-ampersand input-char line-number
                    char-position))
            (t
                (progn
                    (format *error-output*
                        "LOGIC ERROR: unhandled parse state \"~a\"~%"
                        parse-state)
                    (setf *logic-error* t))))))

;-----------------------------------------------------------------------
;
;   open-and-read-xhtml-file
;
;       Returns errors?, a generalized boolean.
;
;       Man pages usually aren't very big, so to simplify things we just
;       read the xhtml file, parsing as we go, and put everything into
;       memory.
;
(defun open-and-read-xhtml-file (input-file-name)
    "open and read the xhtml input file"
    (let* ((line-number 1)
            (char-position 0)
            (encountered-error nil))
        (handler-case 
            (with-open-file (source-stream input-file-name)
                (do ((input-char nil)
                        (end-of-file? nil))
                    ((or invalid-syntax *logic-error* end-of-file?))
                    (setf input-char (read-char source-stream nil nil))
                    (if input-char
                        (progn
                            (if (char= input-char #\Newline)
                                (progn
                                    (incf line-number)
                                    (setf char-position 0))
                                (incf char-position))
                            (process-input-char input-char
                                line-number char-position)
                            (if (or invalid-syntax *logic-error*)
                                (setf encountered-error t)))
                        (progn
                            (setf end-of-file? t)))))
;            (error (x)
;                (progn
;                    (format *error-output*
;                        "ERROR: (~s) opening or reading file ~a~%"
;                        x
;                        (second command-line-arguments))
;                    (setf encountered-error t)))
)
        encountered-error))

;-----------------------------------------------------------------------
;
;   write-node-to-troff-file
;
(defun write-node-to-troff-file (node1 troff-stream)
    "write a node and its subnodes to the troff output stream"
    (let ( )
        (if (content-value node1)
            (format troff-stream "~a" (content-value node1)))
        (if (contained node1)
            (dolist (contained-node (reverse (contained node1)))
                (write-node-to-troff-file
                    contained-node troff-stream)))))

;-----------------------------------------------------------------------
;
;   open-and-write-troff-file
;
;       Returns errors?, a generalized boolean.
;
(defun open-and-write-troff-file (output-file-name)
    "open and write the troff output file"
    (let* ((encountered-error nil)
            (cursor *document-tree*))
        (handler-case 
            (with-open-file (destination-stream output-file-name
                    :direction :output :if-exists :supersede)
                (write-node-to-troff-file cursor destination-stream))
;            (error (x)
;                (progn
;                    (format *error-output*
;                        "ERROR: (~s) opening or reading file ~a~%"
;                        x
;                        (second command-line-arguments))
;                    (setf encountered-error t)))
)
        encountered-error))

;-----------------------------------------------------------------------
;
;   show-document-tree
;
;       This displays the document tree. It is intended for debug use.
;
(defun show-document-node (node1 level1)
    (let ((node-type (node-type node1)))
        (dotimes (j1 level1)
            (format t "  "))
        (format t "~a ~a ~a ~a~%" (level node1) node-type
            (cond
                ((or (string= node-type "tag")
                        (string= node-type "end tag"))
                    (tag-name node1))
                ((string= node-type "content")
                    (content-value node1))
                (t
                    ""))
            (if (specified-class node1)
                (specified-class node1)
                ""))
        (dolist (subnode (reverse (contained node1)))
            (show-document-node subnode (1+ level1)))))
(defun show-document-tree ( )
    (format t "~%document tree:~%")
    (show-document-node *document-tree* 1))

;-----------------------------------------------------------------------
;
;   +command-line-specification+
;
(defparameter +command-line-specification+
    (quote
        ((("verbose")       :type           boolean
                            :initial-value  nil
                            :documentation  "more explanatory output")
        (("version")        :type           boolean
                            :initial-value  nil
                            :documentation  "show program version")
        (("license")        :type           boolean
                            :initial-value  nil
                            :documentation  "show license information")
        (("dry-run")        :type           boolean
                            :initial-value  nil
                            :documentation
                                    "don't modify or create any files")
        (("help")           :type           boolean
                            :initial-value  nil
                            :documentation  "show help information")
    )))

;-----------------------------------------------------------------------
;
;   show-help
;
(defun show-help ( )
    "display help information in reponse to --help command line option"
    (format t
"Convert xhtml exported by OpenOffice/LibreOffice to troff for man page.
Usage:
    oloman [options*] infile outfile
    oloman [options*]               # for options without processing
where
    infile has been created in OpenOffice or LibreOffice using special
        manpage styles, and exported using strict xhtml 1.0 format
    outfile is a troff manpage converted from infile
and options include:
    --help              show help information
    --verbose           provide more details
    --license           show license information
    --dry-run           don't modify or create any files
    --version           show version details
You might also try \"man oloman\" or \"man 7 oloman\".
")     
    )

;-----------------------------------------------------------------------
;
;   show-license
;
(defun show-license ( )
    "display license and copyright information"
    (format t
"Copyright 2017 by Michael Marking <marking@tatanka.com>; all
    rights reserved.
Licensed under the terms of the GNU General Public License version 3.
For more information, go to http://www.tatanka.com/software/oloman/
or https://www.github.com/wakinyantanka/oloman.
")
    )

;-----------------------------------------------------------------------
;
;   oloman-function
;
(defun oloman-function (positional-arguments &key verbose version
        license dry-run help)
    "main function for converting libreoffice/openoffice xhtml to troff"
    (let ((info-shown nil))
        (format t "~%oloman ~a WARNING: INCOMPLETE SOFTWARE!~%"
            *oloman-version*)
        (when help
            (show-help)
;           (command-line-arguments:show-option-help
;               +command-line-specification+ :sort-names t)
;           (uiop:quit)
            (setf info-shown t))
        (when license
            (show-license)
            (setf info-shown t))
        (when version
            (setf info-shown t))
        (if (< (length positional-arguments) 2)
            (if info-shown
                (uiop:quit)
                (progn
                    (format t
"ERROR: no arguments provided; try \"oloman --help\" for explanation.~%")
                    (uiop:quit))))
        (progn
            (block processing-steps
                (if (open-and-read-xhtml-file
                        (first positional-arguments))
                    (return-from processing-steps))
                ; (show-document-tree)
                (if (open-and-write-troff-file
                        (second positional-arguments))
                    (return-from processing-steps))))))

;-----------------------------------------------------------------------
;
;   oloman-main
;
(defun oloman-main (command-line-arguments)
    "entry to program: process command line arguments, call oloman-function"
    (command-line-arguments:handle-command-line
        +command-line-specification+
        'oloman-function
        :command-line command-line-arguments
        :name "oloman"
        :positional-arity 0
        :rest-arity t))

(oloman-main (command-line-arguments:get-command-line-arguments))