TORCS  1.3.9
The Open Racing Car Simulator
ann_policy.cpp
Go to the documentation of this file.
1 // -*- Mode: c++ -*-
2 // copyright (c) 2004 by Christos Dimitrakakis <dimitrak@idiap.ch>
3 // $Id$
4 
5 
6 /***************************************************************************
7  * *
8  * This program is free software; you can redistribute it and/or modify *
9  * it under the terms of the GNU General Public License as published by *
10  * the Free Software Foundation; either version 2 of the License, or *
11  * (at your option) any later version. *
12  * *
13  ***************************************************************************/
14 
15 #include <learning/ann_policy.h>
16 
17 ANN_Policy::ANN_Policy (int n_states, int n_actions, int n_hidden, real alpha, real gamma, real lambda, bool eligibility, bool softmax, real randomness, real init_eval, bool separate_actions) : DiscretePolicy (n_states, n_actions, alpha, gamma, lambda, softmax, randomness, init_eval)
18 {
19  this->separate_actions = separate_actions;
20  this->eligibility = eligibility;
21  if (eligibility) {
22  message ("Using eligibility traces");
23  }
24  if (separate_actions) {
25  message ("Separate actions");
26  J = NULL;
27  Ja = new ANN* [n_actions];
28  JQs = new real [n_actions];
29  for (int i=0; i<n_actions; i++) {
30  Ja[i] = NewANN (n_states, 1);
31  if (n_hidden > 0) {
32  ANN_AddHiddenLayer (Ja[i], n_hidden);
33  }
34  ANN_Init (Ja[i]);
36  ANN_SetBatchMode(Ja[i], false);
40  }
41  } else {
42  JQs = NULL;
43  Ja = NULL;
45  if (n_hidden > 0) {
46  ANN_AddHiddenLayer (J, n_hidden);
47  }
48  ANN_Init (J);
50  ANN_SetBatchMode(J, false);
54  }
55  ps = new real [n_states];
56  delta_vector = new real [n_actions];
57  J_ps_pa = 0.0;
58 }
59 
61 {
62  delete [] ps;
63  delete [] delta_vector;
64  if (separate_actions) {
65  for (int i=0; i<n_actions; i++) {
66  DeleteANN(Ja[i]);
67  }
68  delete [] Ja;
69  } else {
70  //ANN_ShowWeights(J);
71  DeleteANN (J);
72  }
73 }
74 
75 int ANN_Policy::SelectAction (real* s, real r, int forced_a)
76 {
77  int a; // selected action
78  int amax; //maximum evaluated action
79  real* Q_s; // pointer to evaluations for state s
80  if (confidence) {
81  if (separate_actions) {
82  for (int i=0; i<n_actions; i++) {
83  ANN_StochasticInput (Ja[i], s);
84  JQs[i] = ANN_GetOutput(Ja[i])[0];
85  }
86  Q_s = JQs;
87  } else {
89  Q_s = ANN_GetOutput (J);
90  }
91  } else {
92  if (separate_actions) {
93  for (int i=0; i<n_actions; i++) {
94  ANN_Input (Ja[i], s);
95  JQs[i] = ANN_GetOutput(Ja[i])[0];
96  }
97  Q_s = JQs;
98  } else {
99  ANN_Input (J, s);
100  Q_s = ANN_GetOutput (J);
101  }
102  }
103  int argmax = argMax (Q_s);
104 
105  if (forced_learning) {
106  a = forced_a;
107  } else if (confidence) {
108  a = argmax;
109  } else if (smax) {
110  a = softMax (Q_s);
111  //printf ("Q[%d][%d]=%f\n", s, a, Q[s][a]);
112  } else {
113  a = eGreedy (Q_s);
114  }
115 
116  if (a<0 || a>=n_actions) {
117  fprintf (stderr, "Action %d out of bounds\n", a);
118  }
119 
120  switch (learning_method) {
121 
122  case Sarsa:
123  amax = a;
124  break;
125  case QLearning:
126  amax = argmax;
127  break;
128  default:
129  amax = a;
130  fprintf (stderr, "Unknown learning method\n");
131  }
132  if (pa>=0) { // do not update at start of episode
133  real delta = r + gamma*Q_s[amax] - J_ps_pa;
134  tdError = delta;
135  for (int j=0; j<n_actions; j++) {
136  delta_vector[j] = 0.0;
137  }
138  if (separate_actions) {
139  if (eligibility) {
140  delta_vector[0] = 1.0;
141  ANN_Delta_Train (Ja[pa], delta_vector, delta);
142  // Reset other actions' traces.
143  for (int i=0; i<n_actions; i++) {
144  if (i!=pa) {
145  ANN_Reset(Ja[i]);
146  }
147  }
148  } else {
149  delta_vector[0] = delta;
151  }
152  } else {
153  if (J->eligibility_traces) {
154  delta_vector[pa] = 1.0;
155  ANN_Delta_Train (J, delta_vector, delta);
156  } else {
157  delta_vector[pa] = delta;
159  }
160  }
161 
162 
163  }
164 
165  //printf ("%d %d #STATE\n", min_el_state, max_el_state);
166  // printf ("Q[%d,%d]=%f r=%f e=%f ad=%f gl=%f #QV\n",
167  // ps, pa, Q[ps][pa], r, e[ps][pa], ad, gl);
168 
169  J_ps_pa = Q_s[a];
170  pa = a;
171 
172  return a;
173 }
174 
176 {
177  if (separate_actions) {
178  for (int i=0; i<n_actions; i++) {
179  ANN_Reset (Ja[i]);
180  }
181  } else {
182  ANN_Reset(J);
183  }
184 }
185 
188 bool ANN_Policy::useConfidenceEstimates (bool confidence, real zeta) {
189  this->confidence = confidence;
190  this->zeta = zeta;
191  if (separate_actions) {
192  for (int i=0; i<n_actions; i++) {
193  ANN_SetZeta(Ja[i], zeta);
194  }
195  } else {
196  ANN_SetZeta(J, zeta);
197  }
198  if (confidence) {
199  logmsg ("#+[CONDIFENCE]\n");
200  } else {
201  logmsg ("#-[CONDIFENCE]\n");
202  }
203 
204  return confidence;
205 }
bool forced_learning
Force agent to take supplied action.
Definition: policy.h:173
int softMax(real *Qs)
Softmax Gibbs sampling.
Definition: policy.cpp:783
bool confidence
Confidence estimates option.
Definition: policy.h:174
int ANN_Init(ANN *ann)
Initialise neural network.
Definition: ANN.cpp:346
real r
reward
Definition: policy.h:158
real ANN_Input(ANN *ann, real *x)
Give an input vector to the neural network.
Definition: ANN.cpp:406
real tdError
temporal difference error
Definition: policy.h:160
bool eligibility_traces
use eligibility
Definition: ANN.h:102
virtual bool useConfidenceEstimates(bool confidence, real zeta=0.01)
Set to use confidence estimates for action selection, with variance smoothing zeta.
Definition: ann_policy.cpp:188
virtual void Reset()
Reset eligibility traces.
Definition: ann_policy.cpp:175
Definition: policy.h:140
virtual ~ANN_Policy()
Definition: ann_policy.cpp:60
int eGreedy(real *Qs)
e-greedy sampling
Definition: policy.cpp:802
virtual int SelectAction(real *s, real r, int forced_a=-1)
Select an action, given a vector of real numbers which represents the state.
Definition: ann_policy.cpp:75
ANN * J
Evaluation network.
Definition: ann_policy.h:38
void ANN_SetLearningRate(ANN *ann, real a)
Set the learning rate to a.
Definition: ANN.cpp:856
int n_actions
number of actions
Definition: policy.h:150
real * ps
Previous state vector.
Definition: ann_policy.h:40
ANN_Policy(int n_states, int n_actions, int n_hidden=0, real alpha=0.1, real gamma=0.8, real lambda=0.8, bool eligibility=false, bool softmax=false, real randomness=0.1, real init_eval=0.0, bool separate_actions=false)
Make a new policy.
Definition: ann_policy.cpp:17
bool eligibility
eligibility option
Definition: ann_policy.h:44
void ANN_SetZeta(ANN *ann, real zeta)
Set zeta, parameter variance smoothing.
Definition: ANN.cpp:890
int DeleteANN(ANN *ann)
Delete a neural network.
Definition: ANN.cpp:77
real alpha
learning rate
Definition: policy.h:166
void ANN_Reset(ANN *ann)
Resets the eligbility traces and batch updates.
Definition: ANN.cpp:379
real ANN_StochasticInput(ANN *ann, real *x)
Stochastically generate an output, depending on parameter distributions.
Definition: ANN.cpp:429
real * ANN_GetOutput(ANN *ann)
Get the output for the current input.
Definition: ANN.cpp:824
int ANN_AddHiddenLayer(ANN *ann, int n_nodes)
Add a hidden layer with n_nodes.
Definition: ANN.cpp:111
int n_states
number of states
Definition: policy.h:149
enum LearningMethod learning_method
learning method to use;
Definition: policy.h:148
bool smax
softmax option
Definition: policy.h:161
ANN * NewANN(int n_inputs, int n_outputs)
Create a new ANN.
Definition: ANN.cpp:25
int argMax(real *Qs)
Get ID of maximum action.
Definition: policy.cpp:816
real lambda
Eligibility trace decay.
Definition: policy.h:165
real * JQs
Placeholder for evaluation vector (separate_actions)
Definition: ann_policy.h:41
real gamma
Future discount parameter.
Definition: policy.h:164
void ANN_SetBatchMode(ANN *ann, bool batch)
Set batch updates.
Definition: ANN.cpp:906
int pa
previous action
Definition: policy.h:157
bool separate_actions
Single/separate evaluation option.
Definition: ann_policy.h:45
void ANN_SetLambda(ANN *ann, real lambda)
Set lambda, eligibility decay.
Definition: ANN.cpp:872
real J_ps_pa
Evaluation of last action.
Definition: ann_policy.h:42
real ANN_Delta_Train(ANN *ann, real *delta, real TD)
Minimise a custom cost function.
Definition: ANN.cpp:584
Discrete policies with reinforcement learning.
Definition: policy.h:144
real * delta_vector
Scratch vector for TD error.
Definition: ann_policy.h:43
float real
Definition: real.h:13
void ANN_SetOutputsToLinear(ANN *ann)
Set outputs to linear.
Definition: ANN.cpp:1033
ANN management structure.
Definition: ANN.h:89
ANN ** Ja
Evaluation networks (for separate_actions case)
Definition: ann_policy.h:39
#define logmsg
Definition: learn_debug.h:17
void message(const char *msg,...)
Prints a message.
real zeta
Confidence smoothing.
Definition: policy.h:179