; A utility function to find a list element that satisfies a predicate. (define (find pred? list) (cond [(null? list) #f] [(pred? (car list)) (car list)] [else (find pred? (cdr list))])) ; This line and (make-inspector) below tells DrScheme to ; show a visual representation of the contents of structures (print-struct #t) ; Three ways to construct a decision model, i.e., three kinds of nodes that the ; root node of a decision tree can be. A reward node is a leaf node that just ; stores a utility value. A decide node is an internal node with a label and a ; list of choices. An observe node is an internal node with a label and a list ; of chances. (define-struct reward (utility) (make-inspector)) (define-struct decide (variable choices) (make-inspector)) (define-struct observe (variable chances) (make-inspector)) ; A choice is a branch off a choice node. It stores an action to take and the ; consequent subtree. (define-struct choice (value then) (make-inspector)) (define (find-choice value choices) (find (lambda (choice) (equal? value (choice-value choice))) choices)) ; A chance is a branch off a chance node. It stores a probability, an action ; to take, and the consequent subtree. (define-struct chance (value probability then) (make-inspector)) (define (take-chance mass chances) (let ([remaining-mass (- mass (chance-probability (car chances)))]) (if (positive? remaining-mass) (take-chance remaining-mass (cdr chances)) (car chances)))) ; A sample decision model (define (dog) (make-decide 1 (list (make-choice 'f (make-reward .2)) (make-choice 'w (make-observe 'X (list (make-chance 'y .8 (make-decide 2 (list (make-choice 'f (make-reward .7)) (make-choice 'h (make-reward .25))))) (make-chance 'n .2 (make-decide 3 (list (make-choice 'f (make-reward .1)) (make-choice 'h (make-reward .25)))))))) (make-choice 'h (make-reward .3))))) ; The interpreter below simulates one run of a decision model using interactive ; user input and returns the utility rewarded. (define (interact model) (cond [(reward? model) (reward-utility model)] [(decide? model) (display (decide-variable model)) (display " ") (display (map choice-value (decide-choices model))) (display "? ") (let ([choice (find-choice (read) (decide-choices model))]) (cond [choice (interact (choice-then choice))] [else (display "That is not a valid choice value.") (newline) (interact model)]))] [(observe? model) (display (observe-variable model)) (display " = ") (let ([chance (take-chance (random) (observe-chances model))]) (display (chance-value chance)) (newline) (interact (chance-then chance)))])) ; Three kinds of policies. (define-struct end () (make-inspector)) (define-struct do (value then) (make-inspector)) (define-struct switch (variable branches) (make-inspector)) ; Each branch listed in a switch policy stores a potentially observable value ; and a consequent policy. (define-struct branch (value then) (make-inspector)) (define (find-branch value branches) (find (lambda (branch) (equal? value (branch-value branch))) branches)) ; The interpreter below simulates one run of a decision model using a policy ; (assumed feasible) and returns the utility rewarded. (define (execute policy model) (cond [(reward? model) (reward-utility model)] [(decide? model) (display (decide-variable model)) (display " ") (display (map choice-value (decide-choices model))) (display "? ") (display (do-value policy)) (newline) (let ([choice (find-choice (do-value policy) (decide-choices model))]) (execute (do-then policy) (choice-then choice)))] [(observe? model) (display (observe-variable model)) (display " = ") (let ([chance (take-chance (random) (observe-chances model))]) (display (chance-value chance)) (newline) (execute (branch-then (find-branch (chance-value chance) (switch-branches policy))) (chance-then chance)))])) ; The utility function checks whether a policy is feasible with respect to a ; decision model. If the policy is feasible, then it returns the expected ; utility. (define (utility policy model) (cond [(reward? model) (and (end? policy) (reward-utility model))] [(decide? model) (and (do? policy) (let ([choice (find-choice (do-value policy) (decide-choices model))]) (and choice (utility (do-then policy) (choice-then choice)))))] [(observe? model) (and (switch? policy) (equal? (switch-variable policy) (observe-variable model)) (utility-switch (switch-branches policy) (observe-chances model)))])) (define (utility-switch branches chances) (cond [(null? branches) (and (null? chances) 0)] [else (and (not (null? chances)) (equal? (branch-value (car branches)) (chance-value (car chances))) (let ([car-utility (utility (branch-then (car branches)) (chance-then (car chances)))] [cdr-utility (utility-switch (cdr branches) (cdr chances))]) (and car-utility cdr-utility (+ (* (chance-probability (car chances)) car-utility) cdr-utility))))])) ; The interpreter below solves the decision model to yield a policy and an ; expected utility. (define-struct solution (policy utility) (make-inspector)) (define (solve model) (cond [(reward? model) (make-solution (make-end) (reward-utility model))] [(decide? model) (let ([solutions (map solve-choice (decide-choices model))]) (let ([utility (apply max (map solution-utility solutions))]) (find (lambda (solution) (= utility (solution-utility solution))) solutions)))] [(observe? model) (let ([solutions (map solve (map chance-then (observe-chances model)))]) (make-solution (make-switch (observe-variable model) (map collect-branch (observe-chances model) solutions)) (apply + (map collect-utility (observe-chances model) solutions))))])) (define (solve-choice choice) (let ([solution (solve (choice-then choice))]) (make-solution (make-do (choice-value choice) (solution-policy solution)) (solution-utility solution)))) (define (collect-branch chance solution) (make-branch (chance-value chance) (solution-policy solution))) (define (collect-utility chance solution) (* (chance-probability chance) (solution-utility solution)))