17 ANN_Policy::ANN_Policy (
int n_states,
int n_actions,
int n_hidden,
real alpha,
real gamma,
real lambda,
bool eligibility,
bool softmax,
real randomness,
real init_eval,
bool separate_actions) :
DiscretePolicy (n_states, n_actions, alpha, gamma, lambda, softmax, randomness, init_eval)
22 message (
"Using eligibility traces");
103 int argmax =
argMax (Q_s);
117 fprintf (stderr,
"Action %d out of bounds\n", a);
130 fprintf (stderr,
"Unknown learning method\n");
199 logmsg (
"#+[CONDIFENCE]\n");
201 logmsg (
"#-[CONDIFENCE]\n");
bool forced_learning
Force agent to take supplied action.
int softMax(real *Qs)
Softmax Gibbs sampling.
bool confidence
Confidence estimates option.
int ANN_Init(ANN *ann)
Initialise neural network.
real ANN_Input(ANN *ann, real *x)
Give an input vector to the neural network.
real tdError
temporal difference error
bool eligibility_traces
use eligibility
virtual bool useConfidenceEstimates(bool confidence, real zeta=0.01)
Set to use confidence estimates for action selection, with variance smoothing zeta.
virtual void Reset()
Reset eligibility traces.
int eGreedy(real *Qs)
e-greedy sampling
virtual int SelectAction(real *s, real r, int forced_a=-1)
Select an action, given a vector of real numbers which represents the state.
ANN * J
Evaluation network.
void ANN_SetLearningRate(ANN *ann, real a)
Set the learning rate to a.
int n_actions
number of actions
real * ps
Previous state vector.
ANN_Policy(int n_states, int n_actions, int n_hidden=0, real alpha=0.1, real gamma=0.8, real lambda=0.8, bool eligibility=false, bool softmax=false, real randomness=0.1, real init_eval=0.0, bool separate_actions=false)
Make a new policy.
bool eligibility
eligibility option
void ANN_SetZeta(ANN *ann, real zeta)
Set zeta, parameter variance smoothing.
int DeleteANN(ANN *ann)
Delete a neural network.
void ANN_Reset(ANN *ann)
Resets the eligbility traces and batch updates.
real ANN_StochasticInput(ANN *ann, real *x)
Stochastically generate an output, depending on parameter distributions.
real * ANN_GetOutput(ANN *ann)
Get the output for the current input.
int ANN_AddHiddenLayer(ANN *ann, int n_nodes)
Add a hidden layer with n_nodes.
int n_states
number of states
enum LearningMethod learning_method
learning method to use;
ANN * NewANN(int n_inputs, int n_outputs)
Create a new ANN.
int argMax(real *Qs)
Get ID of maximum action.
real lambda
Eligibility trace decay.
real * JQs
Placeholder for evaluation vector (separate_actions)
real gamma
Future discount parameter.
void ANN_SetBatchMode(ANN *ann, bool batch)
Set batch updates.
bool separate_actions
Single/separate evaluation option.
void ANN_SetLambda(ANN *ann, real lambda)
Set lambda, eligibility decay.
real J_ps_pa
Evaluation of last action.
real ANN_Delta_Train(ANN *ann, real *delta, real TD)
Minimise a custom cost function.
Discrete policies with reinforcement learning.
real * delta_vector
Scratch vector for TD error.
void ANN_SetOutputsToLinear(ANN *ann)
Set outputs to linear.
ANN management structure.
ANN ** Ja
Evaluation networks (for separate_actions case)
void message(const char *msg,...)
Prints a message.
real zeta
Confidence smoothing.