(load "decision.scm")

; A utility function to find all list elements that satisfy a predicate.
(define (filter pred? list)
  (cond
    [(null? list) '()]
    [(pred? (car list)) (cons (car list) (filter pred? (cdr list)))]
    [else (filter pred? (cdr list))]))

; Replace "observe" nodes in our decision models by two new kinds of nodes,
; "confuse" and "discover".  Confusion arises when nature picks a value for a
; random variable without the agent knowing; this pick may depend on previous
; picks.  Later, revelation may let the agent discover the value.
(define-struct confuse (variable values influence then) (make-inspector))
(define-struct discover (variable choices) (make-inspector))

; An environment is a list of bindings.  A binding pairs a random variable with
; a value picked by nature.  For simplicity, we represent each binding by a
; two-element list.
(define make-binding list)
(define binding-variable car)
(define binding-value cadr)
(define find-binding assoc)
(define (extend-environment variable value environment)
  (cons (make-binding variable value) environment))

; An influence is a function that maps an environment to a discrete
; (conditional) probability distribution.  The distribution is a list of
; probabilities that sum up to one.
(define (pick-value mass probabilities values)
  (let ([remaining-mass (- mass (car probabilities))])
    (if (positive? remaining-mass)
        (pick-value remaining-mass (cdr probabilities) (cdr values))
        (car values))))
(define (normalize probabilities)
  (let ([sum (apply + probabilities)])
    (map (lambda (probability) (/ probability sum)) probabilities)))

; Change "interact" to deal with "confuse" and "discover" instead of "observe".
(define (interact environment model)
  (cond
    [(reward? model)
     (reward-utility model)]
    [(decide? model)
     (display (decide-variable model))
     (display " ")
     (display (map choice-value (decide-choices model)))
     (display "? ")
     (let ([choice (find-choice (read) (decide-choices model))])
       (cond [choice (interact environment (choice-then choice))]
             [else (display "That is not a valid choice value.")
                   (newline)
                   (interact environment model)]))]
    [(confuse? model)
     (interact
       (extend-environment
         (confuse-variable model)
         (let ([distribution ((confuse-influence model) environment)])
           (pick-value (random) distribution (confuse-values model)))
         environment)
       (confuse-then model))]
    [(discover? model)
     (let ([variable (discover-variable model)])
       (display variable)
       (display " = ")
       (let ([value (binding-value (find-binding variable environment))])
         (let ([choice (find-choice value (discover-choices model))])
           (display value)
           (newline)
           (interact environment (choice-then choice)))))]))

; Change "execute" to deal with "confuse" and "discover" instead of "observe".
(define (execute environment policy model)
  (cond
    [(reward? model)
     (reward-utility model)]
    [(decide? model)
     (display (decide-variable model))
     (display " ")
     (display (map choice-value (decide-choices model)))
     (display "? ")
     (display (do-value policy))
     (newline)
     (let ([choice (find-choice (do-value policy) (decide-choices model))])
       (execute environment (do-then policy) (choice-then choice)))]
    [(confuse? model)
     (execute
       (extend-environment
         (confuse-variable model)
         (let ([distribution ((confuse-influence model) environment)])
           (pick-value (random) distribution (confuse-values model)))
         environment)
       policy
       (confuse-then model))]
    [(discover? model)
     (let ([variable (discover-variable model)])
       (display variable)
       (display " = ")
       (let ([value (binding-value (find-binding variable environment))])
         (let ([branch (find-branch value (switch-branches policy))]
               [choice (find-choice value (discover-choices model))])
           (display value)
           (newline)
           (execute environment (branch-then branch) (choice-then choice)))))]))

; To keep track of a distribution of environments, we use a list of pairs of
; probabilities and environments.
(define initial-pes '((1 ())))
(define (confuse-pes pes variable values influence)
  (if (null? pes) '()
    (append
      (let ([probability (caar pes)] [environment (cadar pes)])
        (filter (lambda (p-e) (positive? (car p-e)))
                (map (lambda (value conditional-probability)
                       (list (* conditional-probability probability)
                             (extend-environment variable value environment)))
                     values
                     (influence environment))))
      (confuse-pes (cdr pes) variable values influence))))
(define (switch-pes pes variable value)
  (if (null? pes) '()
    (let ([probability (caar pes)] [environment (cadar pes)])
      (let ([binding (find-binding variable environment)])
        (and binding
             (if (equal? value (binding-value binding))
               (let ([rest-pes (switch-pes (cdr pes) variable value)])
                 (and rest-pes (cons (car pes) rest-pes)))
               (switch-pes (cdr pes) variable value)))))))

; Change "utility" to deal with "confuse" and "discover" instead of "observe".
(define (utility pes policy model)
  (cond
    [(reward? model)
     (and (end? policy)
          (* (apply + (map car pes)) (reward-utility model)))]
    [(decide? model)
     (and (do? policy)
          (let ([choice
                 (find-choice (do-value policy) (decide-choices model))])
            (and choice
                 (utility pes (do-then policy) (choice-then choice)))))]
    [(confuse? model)
     (utility (confuse-pes pes
                           (confuse-variable model)
                           (confuse-values model)
                           (confuse-influence model))
              policy
              (confuse-then model))]
    [(discover? model)
     (let ([variable (discover-variable model)])
       (and (switch? policy)
            (equal? variable (switch-variable policy))
            (let ([n-utility (utility-switch pes variable
                                             (switch-branches policy)
                                             (discover-choices model))])
              (and n-utility
                   (= (car n-utility) (length pes))
                   (cadr n-utility)))))]))
(define (utility-switch pes variable branches choices)
  (cond
    [(null? branches)
     (and (null? choices) '(0 0))]
    [else
     (let ([value (branch-value (car branches))])
       (and (not (null? choices))
            (equal? value (choice-value (car choices)))
            (let ([then-pes (switch-pes pes variable value)])
              (let ([car-utility
                      (utility then-pes
                               (branch-then (car branches))
                               (choice-then (car choices)))]
                    [cdr-n-utility
                      (utility-switch
                        pes variable (cdr branches) (cdr choices))])
                (and car-utility
                     cdr-n-utility
                     (list (+ (length then-pes) (car cdr-n-utility))
                           (+ car-utility (cadr cdr-n-utility))))))))]))

; Change "solve" to deal with "confuse" and "discover" instead of "observe".
(define (solve pes model)
  (cond
    [(reward? model)
     (make-solution (make-end)
                    (* (apply + (map car pes)) (reward-utility model)))]
    [(decide? model)
     (let ([solutions (map (solve-choice pes) (decide-choices model))])
       (let ([utility (apply max (map solution-utility solutions))])
         (find (lambda (solution) (= utility (solution-utility solution)))
               solutions)))]
    [(confuse? model)
     (solve (confuse-pes pes
                         (confuse-variable model)
                         (confuse-values model)
                         (confuse-influence model))
            (confuse-then model))]
    [(discover? model)
     (let ([variable (discover-variable model)]
           [choices (discover-choices model)])
       (let ([solutions (map (lambda (choice)
                               (solve
                                 (switch-pes pes variable (choice-value choice))
                                 (choice-then choice)))
                             choices)])
         (make-solution
           (make-switch variable (map collect-branch choices solutions))
           (apply + (map solution-utility solutions)))))]))
(define (solve-choice pes)
  (lambda (choice)
    (let ([solution (solve pes (choice-then choice))])
      (make-solution (make-do (choice-value choice) (solution-policy solution))
                     (solution-utility solution)))))
(define (collect-branch choice solution)
  (make-branch (choice-value choice) (solution-policy solution)))

; Redefine "make-observe" to use "confuse" and "discover" instead.
(define (make-observe variable chances)
  (make-confuse
    variable
    (map chance-value chances)
    (lambda (environment) (map chance-probability chances))
    (make-discover
      variable
      (map (lambda (c) (make-choice (chance-value c) (chance-then c)))
           chances))))

; A utility function to make make-decide and make-discover more convenient.
(define (among variable values maker continuation)
  (maker variable
         (map (lambda (value) (make-choice value (continuation value)))
              values)))

; The Monty Hall problem.
(define (monty-hall)
  (let ([doors '(a b c)])
    (make-confuse 'prize doors
      (lambda (environment) (normalize (map (lambda (prize) 1) doors)))
      (among 'initial doors make-decide (lambda (initial)
        (make-confuse 'open doors
          (lambda (environment)
            (let ([prize (binding-value (find-binding 'prize environment))])
              (normalize
                (map (lambda (open)
                       (if (or (equal? open initial) (equal? open prize)) 0 1))
                     doors))))
          (among 'open doors make-discover (lambda (open)
            (among 'final doors make-decide (lambda (final)
              (among 'prize doors make-discover (lambda (prize)
                (make-reward (if (equal? final prize) 1 0))))))))))))))