(load "decision.scm") ; A utility function to find all list elements that satisfy a predicate. (define (filter pred? list) (cond [(null? list) '()] [(pred? (car list)) (cons (car list) (filter pred? (cdr list)))] [else (filter pred? (cdr list))])) ; Replace "observe" nodes in our decision models by two new kinds of nodes, ; "confuse" and "discover". Confusion arises when nature picks a value for a ; random variable without the agent knowing; this pick may depend on previous ; picks. Later, revelation may let the agent discover the value. (define-struct confuse (variable values influence then) (make-inspector)) (define-struct discover (variable choices) (make-inspector)) ; An environment is a list of bindings. A binding pairs a random variable with ; a value picked by nature. For simplicity, we represent each binding by a ; two-element list. (define make-binding list) (define binding-variable car) (define binding-value cadr) (define find-binding assoc) (define (extend-environment variable value environment) (cons (make-binding variable value) environment)) ; An influence is a function that maps an environment to a discrete ; (conditional) probability distribution. The distribution is a list of ; probabilities that sum up to one. (define (pick-value mass probabilities values) (let ([remaining-mass (- mass (car probabilities))]) (if (positive? remaining-mass) (pick-value remaining-mass (cdr probabilities) (cdr values)) (car values)))) (define (normalize probabilities) (let ([sum (apply + probabilities)]) (map (lambda (probability) (/ probability sum)) probabilities))) ; Change "interact" to deal with "confuse" and "discover" instead of "observe". (define (interact environment model) (cond [(reward? model) (reward-utility model)] [(decide? model) (display (decide-variable model)) (display " ") (display (map choice-value (decide-choices model))) (display "? ") (let ([choice (find-choice (read) (decide-choices model))]) (cond [choice (interact environment (choice-then choice))] [else (display "That is not a valid choice value.") (newline) (interact environment model)]))] [(confuse? model) (interact (extend-environment (confuse-variable model) (let ([distribution ((confuse-influence model) environment)]) (pick-value (random) distribution (confuse-values model))) environment) (confuse-then model))] [(discover? model) (let ([variable (discover-variable model)]) (display variable) (display " = ") (let ([value (binding-value (find-binding variable environment))]) (let ([choice (find-choice value (discover-choices model))]) (display value) (newline) (interact environment (choice-then choice)))))])) ; Change "execute" to deal with "confuse" and "discover" instead of "observe". (define (execute environment policy model) (cond [(reward? model) (reward-utility model)] [(decide? model) (display (decide-variable model)) (display " ") (display (map choice-value (decide-choices model))) (display "? ") (display (do-value policy)) (newline) (let ([choice (find-choice (do-value policy) (decide-choices model))]) (execute environment (do-then policy) (choice-then choice)))] [(confuse? model) (execute (extend-environment (confuse-variable model) (let ([distribution ((confuse-influence model) environment)]) (pick-value (random) distribution (confuse-values model))) environment) policy (confuse-then model))] [(discover? model) (let ([variable (discover-variable model)]) (display variable) (display " = ") (let ([value (binding-value (find-binding variable environment))]) (let ([branch (find-branch value (switch-branches policy))] [choice (find-choice value (discover-choices model))]) (display value) (newline) (execute environment (branch-then branch) (choice-then choice)))))])) ; To keep track of a distribution of environments, we use a list of pairs of ; probabilities and environments. (define initial-pes '((1 ()))) (define (confuse-pes pes variable values influence) (if (null? pes) '() (append (let ([probability (caar pes)] [environment (cadar pes)]) (filter (lambda (p-e) (positive? (car p-e))) (map (lambda (value conditional-probability) (list (* conditional-probability probability) (extend-environment variable value environment))) values (influence environment)))) (confuse-pes (cdr pes) variable values influence)))) (define (switch-pes pes variable value) (if (null? pes) '() (let ([probability (caar pes)] [environment (cadar pes)]) (let ([binding (find-binding variable environment)]) (and binding (if (equal? value (binding-value binding)) (let ([rest-pes (switch-pes (cdr pes) variable value)]) (and rest-pes (cons (car pes) rest-pes))) (switch-pes (cdr pes) variable value))))))) ; Change "utility" to deal with "confuse" and "discover" instead of "observe". (define (utility pes policy model) (cond [(reward? model) (and (end? policy) (* (apply + (map car pes)) (reward-utility model)))] [(decide? model) (and (do? policy) (let ([choice (find-choice (do-value policy) (decide-choices model))]) (and choice (utility pes (do-then policy) (choice-then choice)))))] [(confuse? model) (utility (confuse-pes pes (confuse-variable model) (confuse-values model) (confuse-influence model)) policy (confuse-then model))] [(discover? model) (let ([variable (discover-variable model)]) (and (switch? policy) (equal? variable (switch-variable policy)) (let ([n-utility (utility-switch pes variable (switch-branches policy) (discover-choices model))]) (and n-utility (= (car n-utility) (length pes)) (cadr n-utility)))))])) (define (utility-switch pes variable branches choices) (cond [(null? branches) (and (null? choices) '(0 0))] [else (let ([value (branch-value (car branches))]) (and (not (null? choices)) (equal? value (choice-value (car choices))) (let ([then-pes (switch-pes pes variable value)]) (let ([car-utility (utility then-pes (branch-then (car branches)) (choice-then (car choices)))] [cdr-n-utility (utility-switch pes variable (cdr branches) (cdr choices))]) (and car-utility cdr-n-utility (list (+ (length then-pes) (car cdr-n-utility)) (+ car-utility (cadr cdr-n-utility))))))))])) ; Change "solve" to deal with "confuse" and "discover" instead of "observe". (define (solve pes model) (cond [(reward? model) (make-solution (make-end) (* (apply + (map car pes)) (reward-utility model)))] [(decide? model) (let ([solutions (map (solve-choice pes) (decide-choices model))]) (let ([utility (apply max (map solution-utility solutions))]) (find (lambda (solution) (= utility (solution-utility solution))) solutions)))] [(confuse? model) (solve (confuse-pes pes (confuse-variable model) (confuse-values model) (confuse-influence model)) (confuse-then model))] [(discover? model) (let ([variable (discover-variable model)] [choices (discover-choices model)]) (let ([solutions (map (lambda (choice) (solve (switch-pes pes variable (choice-value choice)) (choice-then choice))) choices)]) (make-solution (make-switch variable (map collect-branch choices solutions)) (apply + (map solution-utility solutions)))))])) (define (solve-choice pes) (lambda (choice) (let ([solution (solve pes (choice-then choice))]) (make-solution (make-do (choice-value choice) (solution-policy solution)) (solution-utility solution))))) (define (collect-branch choice solution) (make-branch (choice-value choice) (solution-policy solution))) ; Redefine "make-observe" to use "confuse" and "discover" instead. (define (make-observe variable chances) (make-confuse variable (map chance-value chances) (lambda (environment) (map chance-probability chances)) (make-discover variable (map (lambda (c) (make-choice (chance-value c) (chance-then c))) chances)))) ; A utility function to make make-decide and make-discover more convenient. (define (among variable values maker continuation) (maker variable (map (lambda (value) (make-choice value (continuation value))) values))) ; The Monty Hall problem. (define (monty-hall) (let ([doors '(a b c)]) (make-confuse 'prize doors (lambda (environment) (normalize (map (lambda (prize) 1) doors))) (among 'initial doors make-decide (lambda (initial) (make-confuse 'open doors (lambda (environment) (let ([prize (binding-value (find-binding 'prize environment))]) (normalize (map (lambda (open) (if (or (equal? open initial) (equal? open prize)) 0 1)) doors)))) (among 'open doors make-discover (lambda (open) (among 'final doors make-decide (lambda (final) (among 'prize doors make-discover (lambda (prize) (make-reward (if (equal? final prize) 1 0))))))))))))))