;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; unscramble.lisp ;; c) 2001, Lee Spector #| This file implements a simple genetic algorithm that shows how standard Darwinian mechanisms could unscramble a telephone directory in which all of the names and numbers have been paired randomly. It was written to accompany an article called "Hierarchy Helps It Work That Way" which is in preparation for the journal Philosophical Psychology. For convenience we use integers both for the names and for the numbers, which "match" when the integer values are equivalent. But we do not allow ourselves to use knowledge of this numerical implementation to cheat -- for example by sorting names or numbers numerically, or by deterimining a numerical distance of an item from its correct place in a sorted directory. The only information that the system can determine about a canditate directory is the number of correct pairs (which will be the "fitness" in the genetic algorithm). This is a proof of concept rather than production software, and its efficiency could be improved in several ways. It seems to work fairly well with the current parameters for small directories (in the neighborhood of 100 names) but it would probably have to be streamlined and run with different parameters (for example a much larger population size) to work on the Manhattan telephone directory, which is the subject of the discussion in the above-mentioned article. For the purposes of that discussion it should be noted that natural evolution had access to vastly more resources (time, number of individuals, etc.). This code uses relatively standard genetic operators (mutation and crossover) from the genetic algorithms literature, although adjustments were made to ensure that each directory always contains all of the "names" and all of the "phone numbers." A "shuffe" operator was also added as a primitive deterrent to premature convergence. To run this code simply load this file into a Common Lisp environment. In some environments it will run much faster if you compile it prior to loading. Upon success the system will print a success message and enter a "break loop" -- you can then exit Lisp (usually by typing (quit) or (exit) though the details will depend on your Lisp implementation). Upon failure the sustem will print a failure message and you can then exit Lisp in the same way. A file called "out" in your current directory will accumulate output from the program (which will also be printed to the terminal). |# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; parameters (defparameter *directory-size* 100 "The number of name/number pairs in each telephone directory.") (defparameter *population-size* 200 "The number of directories in the population.") (defparameter *crossover-percentage* 60 "The percentage of the next generation that will be produced by crossover (recombination) from members of the current generation.") (defparameter *mutation-percentage* 35 "The percentage of the next generation that will be produced by mutation of a member of the current generation.") (defparameter *max-mutations* 10 "The maximum number of number swaps that will occur in each instance of mutation.") (defparameter *shuffle-percentage* 2 "The percentage of the next generation that will be produced by shuffling a member of the current generation.") ;; the reproduction (perfect cloning) percentage is: ;; 100 - (*crossover-percentage* + ;; *mutation-percentage* + ;; *shuffle-percentage*) (defparameter *generations* 100 "The maximum number of generations that the system will run.") (defparameter *tournament-size* 10 "The size of tournaments used to select individuals for participation in genetic operations. Higher numbers produce more selection pressure.") ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; utilities (defun out (&rest format-args) "An output utility -- similar to Lisp's FORMAT but doesn't take a stream argument and sends output to standard output AND a file called 'out'." (apply #'format (cons t format-args)) (with-open-file (out "out" :direction :output :if-exists :append :if-does-not-exist :create) (apply #'format (cons out format-args)))) (defun upto (n) "Returns a list of numbers from 0 to n." (if (< n 0) nil (append (upto (- n 1)) (list n)))) (defparameter *all-valid-numbers* (upto (1- *directory-size*)) "Contains a list of all integers that are valid 'names' or 'phone numbers'.") (defun shuffle (list) "Returns a randomly re-ordered copy of list." (let ((result nil)) (do () ((null list) result) (let* ((which (random (length list))) (it (nth which list))) (push it result) (setq list (remove it list :count 1)))))) (defun scrambled-directory () "Returns a randomly scrambled directory." (let ((names (upto (1- *directory-size*))) (numbers (upto (1- *directory-size*)))) (mapcar #'list (shuffle names) (shuffle numbers)))) (defun random-pair (dir) "Returns a random name/number pair from a directory, assuming that the directory is full." (nth (random *directory-size*) dir)) (defun random-element (list) "Returns a random element from a list of any length." (nth (random (length list)) list)) (defun replace-association (key pairs new-association) "Returns pairs (a list of two-element lists) with the second element of the first pair that matches key (in its first place) replaced with new-association." (if (null pairs) nil (if (eq (first (first pairs)) key) (cons (list key new-association) (rest pairs)) (cons (first pairs) (replace-association key (rest pairs) new-association))))) (defun new-valid-pair (partial-dir) "Returns a random name/number pair that is compatible with the provided partial directory." (let ((valid-names (set-difference *all-valid-numbers* (mapcar #'first partial-dir))) (valid-numbers (set-difference *all-valid-numbers* (mapcar #'second partial-dir)))) (list (random-element valid-names) (random-element valid-numbers)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; fitness and genetic operators (defun fitness (dir) "Returns the fitness of a directory, which is the number of correct name/number pairs that it contains." (count t dir :key #'(lambda (pair) (= (first pair) (second pair))))) (defun mutate (dir) "Returns a mutated version of the given directory, with at most one association swapped." (let ((pair1 (random-pair dir)) (pair2 (random-pair dir))) (if (equalp pair1 pair2) dir (replace-association (first pair1) (replace-association (first pair2) dir (second pair1)) (second pair2))))) (defun crossover (dir1 dir2 &optional (result nil)) "Returns the result of recombining the two provided directories." (if (null dir1) result (let* ((coin (nth (random 2) (list 'heads 'tails))) (first-try (case coin (heads (first dir1)) (tails (first dir2)))) (second-try (case coin (heads (first dir2)) (tails (first dir1))))) (cond ((and (not (member (first first-try) result :key #'first)) (not (member (second first-try) result :key #'first))) (crossover (rest dir1) (rest dir2) (cons first-try result))) ((and (not (member (first second-try) result :key #'first)) (not (member (second second-try) result :key #'first))) (crossover (rest dir1) (rest dir2) (cons second-try result))) (t (let ((new-pair (new-valid-pair result))) (crossover (rest dir1) (rest dir2) (cons new-pair result)))))))) (defun select (pop) "Returns the result of conducting a selection tournament on the provided population, which is assumed to be full." (let ((tourn-set nil)) (dotimes (i *tournament-size*) (push (nth (random *population-size*) pop) tourn-set)) (let ((winner (first tourn-set))) (dolist (this (rest tourn-set)) (when (> (first this) (first winner)) (setq winner this))) winner))) (defun child (evaluated-pop) "Returns a child produced from the provided evaluated population." (let ((selector (random 101))) (cond ((< selector *mutation-percentage*) (let ((child (mutate (second (select evaluated-pop))))) (dotimes (i (random *max-mutations*)) (setq child (mutate child))) child)) ((< selector (+ *mutation-percentage* *crossover-percentage*)) (crossover (second (select evaluated-pop)) (second (select evaluated-pop)))) ((< selector (+ *mutation-percentage* *crossover-percentage* *shuffle-percentage*)) (shuffle (second (select evaluated-pop)))) (t (second (select evaluated-pop)))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; top level (defun unscramble () "Runs a genetic algorithm to produce an unscrambled directory from a population of scrambled directories." (out "~%creating population...") (let ((pop nil)) (dotimes (i *population-size*) (push (scrambled-directory) pop)) (dotimes (gen *generations* (print "Failed!")) (out "~%generation: ~A" gen) (out "~%evaluating fitness...") (let* ((evaluated-pop (mapcar #'(lambda (dir) (list (fitness dir) dir)) pop)) (best (apply #'max (mapcar #'first evaluated-pop)))) (out "~%best fitness: ~A (perfect is ~A)" best *directory-size*) (when (= best *directory-size*) (break "Success!")) (out "~%creating next generation...") (let ((next-pop nil)) (dotimes (i *population-size*) (push (child evaluated-pop) next-pop)) (setq pop next-pop)))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; call to the top level function (unscramble)