; A utility function to find a list element that satisfies a predicate.
(define (find pred? list)
  (cond
    [(null? list) #f]
    [(pred? (car list)) (car list)]
    [else (find pred? (cdr list))]))

; This line and (make-inspector) below tells DrScheme to
; show a visual representation of the contents of structures
(print-struct #t)

; Three ways to construct a decision model, i.e., three kinds of nodes that the
; root node of a decision tree can be.  A reward node is a leaf node that just
; stores a utility value.  A decide node is an internal node with a label and a
; list of choices.  An observe node is an internal node with a label and a list
; of chances.
(define-struct reward (utility) (make-inspector))
(define-struct decide (variable choices) (make-inspector))
(define-struct observe (variable chances) (make-inspector))

; A choice is a branch off a choice node.  It stores an action to take and the
; consequent subtree.
(define-struct choice (value then) (make-inspector))
(define (find-choice value choices)
  (find (lambda (choice) (equal? value (choice-value choice))) choices))

; A chance is a branch off a chance node.  It stores a probability, an action
; to take, and the consequent subtree.
(define-struct chance (value probability then) (make-inspector))
(define (take-chance mass chances)
  (let ([remaining-mass (- mass (chance-probability (car chances)))])
    (if (positive? remaining-mass)
        (take-chance remaining-mass (cdr chances))
        (car chances))))

; A sample decision model
(define (dog)
  (make-decide 1 (list
   (make-choice 'f (make-reward .2))
   (make-choice 'w (make-observe 'X (list
                    (make-chance 'y .8 (make-decide 2 (list
                                        (make-choice 'f (make-reward .7))
                                        (make-choice 'h (make-reward .25)))))
                    (make-chance 'n .2 (make-decide 3 (list
                                        (make-choice 'f (make-reward .1))
                                        (make-choice 'h (make-reward .25))))))))
   (make-choice 'h (make-reward .3)))))

; The interpreter below simulates one run of a decision model using interactive
; user input and returns the utility rewarded.
(define (interact model)
  (cond
    [(reward? model)
     (reward-utility model)]
    [(decide? model)
     (display (decide-variable model))
     (display " ")
     (display (map choice-value (decide-choices model)))
     (display "? ")
     (let ([choice (find-choice (read) (decide-choices model))])
       (cond [choice (interact (choice-then choice))]
             [else (display "That is not a valid choice value.")
                   (newline)
                   (interact model)]))]
    [(observe? model)
     (display (observe-variable model))
     (display " = ")
     (let ([chance (take-chance (random) (observe-chances model))])
       (display (chance-value chance))
       (newline)
       (interact (chance-then chance)))]))

; Three kinds of policies.
(define-struct end () (make-inspector))
(define-struct do (value then) (make-inspector))
(define-struct switch (variable branches) (make-inspector))

; Each branch listed in a switch policy stores a potentially observable value
; and a consequent policy.
(define-struct branch (value then) (make-inspector))
(define (find-branch value branches)
  (find (lambda (branch) (equal? value (branch-value branch))) branches))

; The interpreter below simulates one run of a decision model using a policy
; (assumed feasible) and returns the utility rewarded.
(define (execute policy model)
  (cond
    [(reward? model)
     (reward-utility model)]
    [(decide? model)
     (display (decide-variable model))
     (display " ")
     (display (map choice-value (decide-choices model)))
     (display "? ")
     (display (do-value policy))
     (newline)
     (let ([choice (find-choice (do-value policy) (decide-choices model))])
       (execute (do-then policy) (choice-then choice)))]
    [(observe? model)
     (display (observe-variable model))
     (display " = ")
     (let ([chance (take-chance (random) (observe-chances model))])
       (display (chance-value chance))
       (newline)
       (execute (branch-then
                  (find-branch (chance-value chance) (switch-branches policy)))
                (chance-then chance)))]))

; The utility function checks whether a policy is feasible with respect to a
; decision model.  If the policy is feasible, then it returns the expected
; utility.
(define (utility policy model)
  (cond
    [(reward? model)
     (and (end? policy) (reward-utility model))]
    [(decide? model)
     (and (do? policy)
          (let ([choice
                 (find-choice (do-value policy) (decide-choices model))])
            (and choice
                 (utility (do-then policy) (choice-then choice)))))]
    [(observe? model)
     (and (switch? policy)
          (equal? (switch-variable policy) (observe-variable model))
          (utility-switch (switch-branches policy) (observe-chances model)))]))
(define (utility-switch branches chances)
  (cond
    [(null? branches)
     (and (null? chances) 0)]
    [else
     (and (not (null? chances))
          (equal? (branch-value (car branches)) (chance-value (car chances)))
          (let ([car-utility (utility (branch-then (car branches))
                                      (chance-then (car chances)))]
                [cdr-utility (utility-switch (cdr branches) (cdr chances))])
            (and car-utility
                 cdr-utility
                 (+ (* (chance-probability (car chances)) car-utility)
                    cdr-utility))))]))

; The interpreter below solves the decision model to yield a policy and an
; expected utility.
(define-struct solution (policy utility) (make-inspector))
(define (solve model)
  (cond
    [(reward? model)
     (make-solution (make-end) (reward-utility model))]
    [(decide? model)
     (let ([solutions (map solve-choice (decide-choices model))])
       (let ([utility (apply max (map solution-utility solutions))])
         (find (lambda (solution) (= utility (solution-utility solution)))
               solutions)))]
    [(observe? model)
     (let ([solutions (map solve (map chance-then (observe-chances model)))])
       (make-solution
         (make-switch (observe-variable model)
                      (map collect-branch (observe-chances model) solutions))
         (apply + (map collect-utility (observe-chances model) solutions))))]))
(define (solve-choice choice)
  (let ([solution (solve (choice-then choice))])
    (make-solution (make-do (choice-value choice) (solution-policy solution))
                   (solution-utility solution))))
(define (collect-branch chance solution)
  (make-branch (chance-value chance) (solution-policy solution)))
(define (collect-utility chance solution)
  (* (chance-probability chance) (solution-utility solution)))