TORCS  1.3.9
The Open Racing Car Simulator
policy.h
Go to the documentation of this file.
1 // -*- Mode: c++ -*-
2 // copyright (c) 2004 by Christos Dimitrakakis <dimitrak@idiap.ch>
3 // $Id$
4 
5 
6 /***************************************************************************
7  * *
8  * This program is free software; you can redistribute it and/or modify *
9  * it under the terms of the GNU General Public License as published by *
10  * the Free Software Foundation; either version 2 of the License, or *
11  * (at your option) any later version. *
12  * *
13  ***************************************************************************/
14 #ifndef POLICY_H
15 #define POLICY_H
16 
17 #include <cmath>
18 #include <cstdlib>
19 #include <cstdio>
20 #include <learning/ANN.h>
21 #include <learning/Distribution.h>
22 
143 
145 {
146 protected:
147 
149  int n_states;
150  int n_actions;
151  real** Q;
152  real** e;
156  int ps;
157  int pa;
158  real r;
161  bool smax;
162  bool pursuit;
163  real** P;
169  int n_samples;
174  bool confidence;
180  real** vQ;
181  int confMax(real* Qs, real* vQs, real p = 1.0);
182  int confSample(real* Qs, real* vQs);
183  int softMax(real* Qs);
184  int eGreedy(real* Qs);
185  int argMax(real* Qs);
186 public:
187  DiscretePolicy (int n_states, int n_actions, real alpha=0.1, real gamma=0.8, real lambda=0.8, bool softmax = false, real randomness=0.1, real init_eval=0.0);
189  virtual ~DiscretePolicy();
191  virtual void setLearningRate (real alpha) {this->alpha = alpha;}
193  virtual real getTDError () {return tdError;}
195  virtual real getLastActionValue () {return Q[ps][pa];}
196  virtual int SelectAction(int s, real r, int forced_a=-1);
197  virtual void Reset();
198  virtual void loadFile (char* f);
199  virtual void saveFile (char* f);
200  virtual void setQLearning();
201  virtual void setELearning();
202  virtual void setSarsa();
203  virtual bool useConfidenceEstimates(bool confidence, real zeta=0.01, bool confidence_eligibility = false);
204  virtual void setForcedLearning(bool forced);
205  virtual void setRandomness (real epsilon);
206  virtual void setGamma (real gamma);
207  virtual void setPursuit (bool pursuit);
208  virtual void setReplacingTraces (bool replacing);
209  virtual void useSoftmax (bool softmax);
210  virtual void setConfidenceDistribution (enum ConfidenceDistribution cd);
211  virtual void useGibbsConfidence (bool gibbs);
212  virtual void useReliabilityEstimate (bool ri);
213  virtual void saveState (FILE* f);
214 };
215 
216 
217 #endif
bool forced_learning
Force agent to take supplied action.
Definition: policy.h:173
int softMax(real *Qs)
Softmax Gibbs sampling.
Definition: policy.cpp:783
virtual void setPursuit(bool pursuit)
Use Pursuit for action selection.
Definition: policy.cpp:618
bool confidence_uses_gibbs
Additional gibbs sampling for confidence.
Definition: policy.h:178
bool confidence
Confidence estimates option.
Definition: policy.h:174
real r
reward
Definition: policy.h:158
real tdError
temporal difference error
Definition: policy.h:160
real * sample
sampling output
Definition: policy.h:154
real ** P
pursuit action probabilities
Definition: policy.h:163
virtual ~DiscretePolicy()
Kill the agent and free everything.
Definition: policy.cpp:155
Definition: policy.h:140
int max_el_state
max state ID to search for eligibility
Definition: policy.h:171
real * eval
evaluation of current aciton
Definition: policy.h:153
LearningMethod
Types of learning methods.
Definition: policy.h:140
int ps
previous state
Definition: policy.h:156
virtual void setSarsa()
Set the algorithm to SARSA mode.
Definition: policy.cpp:611
int eGreedy(real *Qs)
e-greedy sampling
Definition: policy.cpp:802
virtual void saveFile(char *f)
Save policy to a file.
Definition: policy.cpp:550
bool pursuit
pursuit option
Definition: policy.h:162
virtual void Reset()
Use at the end of every episode, after agent has entered the absorbing state.
Definition: policy.cpp:474
int n_actions
number of actions
Definition: policy.h:150
real expected_r
Expected reward.
Definition: policy.h:167
real ** vQ
variance estimate for Q
Definition: policy.h:180
real pQ
previous Q
Definition: policy.h:155
virtual real getTDError()
Get the temporal difference error of the previous action.
Definition: policy.h:193
bool replacing_traces
Replacing instead of accumulating traces.
Definition: policy.h:172
virtual void setELearning()
Set the algorithm to ELearning mode.
Definition: policy.cpp:604
int n_samples
number of samples for above expected r and V
Definition: policy.h:169
virtual void setLearningRate(real alpha)
Set the learning rate.
Definition: policy.h:191
real alpha
learning rate
Definition: policy.h:166
virtual void useSoftmax(bool softmax)
Set action selection to softmax.
Definition: policy.cpp:662
static Point p[4]
Definition: Convex.cpp:54
virtual void setForcedLearning(bool forced)
Set forced learning (force-feed actions)
Definition: policy.cpp:639
virtual void loadFile(char *f)
Load policy from a file.
Definition: policy.cpp:484
enum ConfidenceDistribution confidence_distribution
Distribution to use for confidence sampling.
Definition: policy.h:177
int n_states
number of states
Definition: policy.h:149
virtual real getLastActionValue()
Get the vale of the last action taken.
Definition: policy.h:195
enum LearningMethod learning_method
learning method to use;
Definition: policy.h:148
virtual void useGibbsConfidence(bool gibbs)
Add Gibbs sampling for confidences.
Definition: policy.cpp:704
virtual void setGamma(real gamma)
Set the gamma of the sum to be maximised.
Definition: policy.cpp:656
bool smax
softmax option
Definition: policy.h:161
ConfidenceDistribution
Types of confidence distributions.
Definition: policy.h:142
int confMax(real *Qs, real *vQs, real p=1.0)
Confidence-based Gibbs sampling.
Definition: policy.cpp:715
virtual void setConfidenceDistribution(enum ConfidenceDistribution cd)
Set the distribution for direct action sampling.
Definition: policy.cpp:684
bool confidence_eligibility
Apply eligibility traces to confidence.
Definition: policy.h:175
virtual void setQLearning()
Set the algorithm to QLearning mode.
Definition: policy.cpp:598
int argMax(real *Qs)
Get ID of maximum action.
Definition: policy.cpp:816
real lambda
Eligibility trace decay.
Definition: policy.h:165
virtual void setRandomness(real epsilon)
Set randomness for action selection. Does not affect confidence mode.
Definition: policy.cpp:645
virtual void useReliabilityEstimate(bool ri)
Use the reliability estimate method for action selection.
Definition: policy.cpp:673
real gamma
Future discount parameter.
Definition: policy.h:164
int pa
previous action
Definition: policy.h:157
virtual bool useConfidenceEstimates(bool confidence, real zeta=0.01, bool confidence_eligibility=false)
Set to use confidence estimates for action selection, with variance smoothing zeta.
Definition: policy.cpp:580
real temp
scratch
Definition: policy.h:159
int confSample(real *Qs, real *vQs)
Directly sample from action value distribution.
Definition: policy.cpp:749
real ** e
eligibility trace
Definition: policy.h:152
virtual int SelectAction(int s, real r, int forced_a=-1)
Select an action a, given state s and reward from previous action.
Definition: policy.cpp:283
real expected_V
Expected state return.
Definition: policy.h:168
Discrete policies with reinforcement learning.
Definition: policy.h:144
real ** Q
state-action evaluation
Definition: policy.h:151
A neural network implementation.
float real
Definition: real.h:13
virtual void saveState(FILE *f)
Save the current evaluations in text format to a file.
Definition: policy.cpp:128
int min_el_state
min state ID to search for eligibility
Definition: policy.h:170
DiscretePolicy(int n_states, int n_actions, real alpha=0.1, real gamma=0.8, real lambda=0.8, bool softmax=false, real randomness=0.1, real init_eval=0.0)
Create a new discrete policy.
Definition: policy.cpp:42
bool reliability_estimate
reliability estimates option
Definition: policy.h:176
virtual void setReplacingTraces(bool replacing)
Use Pursuit for action selection.
Definition: policy.cpp:629
real zeta
Confidence smoothing.
Definition: policy.h:179