TORCS  1.3.9
The Open Racing Car Simulator
ann_policy.h
Go to the documentation of this file.
1 // -*- Mode: c++ -*-
2 // copyright (c) 2004 by Christos Dimitrakakis <dimitrak@idiap.ch>
3 // $Id$
4 
5 
6 /***************************************************************************
7  * *
8  * This program is free software; you can redistribute it and/or modify *
9  * it under the terms of the GNU General Public License as published by *
10  * the Free Software Foundation; either version 2 of the License, or *
11  * (at your option) any later version. *
12  * *
13  ***************************************************************************/
14 
15 #ifndef ANN_POLICY_H
16 #define ANN_POLICY_H
17 
18 #include <learning/policy.h>
35 class ANN_Policy : public DiscretePolicy
36 {
37 protected:
38  ANN* J;
39  ANN** Ja;
40  real* ps;
41  real* JQs;
44  bool eligibility;
46 public:
48  ANN_Policy (int n_states, int n_actions, int n_hidden = 0, real alpha=0.1, real gamma=0.8, real lambda=0.8, bool eligibility = false, bool softmax = false, real randomness=0.1, real init_eval=0.0, bool separate_actions = false);
49  virtual ~ANN_Policy();
52  virtual int SelectAction(real* s, real r, int forced_a=-1);
54  virtual void Reset();
56  virtual real getLastActionValue () {return J_ps_pa;}
59  real sum = 0.0;
60  int i;
61  for (i=0; i<n_actions; i++) {
62  sum += eval[i];
63  }
64  for (i=0; i<n_actions; i++) {
65  eval[i] = eval[i]/sum;
66  }
67  return eval;
68  }
69  virtual bool useConfidenceEstimates(bool confidence, real zeta=0.01);
70 };
71 
72 
73 #endif
bool confidence
Confidence estimates option.
Definition: policy.h:174
real r
reward
Definition: policy.h:158
virtual bool useConfidenceEstimates(bool confidence, real zeta=0.01)
Set to use confidence estimates for action selection, with variance smoothing zeta.
Definition: ann_policy.cpp:188
virtual void Reset()
Reset eligibility traces.
Definition: ann_policy.cpp:175
virtual ~ANN_Policy()
Definition: ann_policy.cpp:60
real * eval
evaluation of current aciton
Definition: policy.h:153
virtual int SelectAction(real *s, real r, int forced_a=-1)
Select an action, given a vector of real numbers which represents the state.
Definition: ann_policy.cpp:75
ANN * J
Evaluation network.
Definition: ann_policy.h:38
int n_actions
number of actions
Definition: policy.h:150
real * ps
Previous state vector.
Definition: ann_policy.h:40
ANN_Policy(int n_states, int n_actions, int n_hidden=0, real alpha=0.1, real gamma=0.8, real lambda=0.8, bool eligibility=false, bool softmax=false, real randomness=0.1, real init_eval=0.0, bool separate_actions=false)
Make a new policy.
Definition: ann_policy.cpp:17
bool eligibility
eligibility option
Definition: ann_policy.h:44
real alpha
learning rate
Definition: policy.h:166
A type of discrete action policy using a neural network for function approximation.
Definition: ann_policy.h:35
int n_states
number of states
Definition: policy.h:149
real lambda
Eligibility trace decay.
Definition: policy.h:165
real * JQs
Placeholder for evaluation vector (separate_actions)
Definition: ann_policy.h:41
real gamma
Future discount parameter.
Definition: policy.h:164
virtual real getLastActionValue()
Return the last action value.
Definition: ann_policy.h:56
bool separate_actions
Single/separate evaluation option.
Definition: ann_policy.h:45
real J_ps_pa
Evaluation of last action.
Definition: ann_policy.h:42
Discrete policies with reinforcement learning.
Definition: policy.h:144
real * delta_vector
Scratch vector for TD error.
Definition: ann_policy.h:43
float real
Definition: real.h:13
ANN management structure.
Definition: ANN.h:89
ANN ** Ja
Evaluation networks (for separate_actions case)
Definition: ann_policy.h:39
virtual real * getActionProbabilities()
Definition: ann_policy.h:58
real zeta
Confidence smoothing.
Definition: policy.h:179