12 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP
13 #define MLPACK_METHODS_RL_ENVIRONMENT_REWARD_CLIPPING_HPP
29 template <
typename EnvironmentType>
34 using State =
typename EnvironmentType::State;
37 using Action =
typename EnvironmentType::Action;
48 const double minReward = -1.0,
49 const double maxReward = 1.0) :
50 environment(environment),
64 return environment.InitialSample();
76 return environment.IsTerminal(state);
93 double unclippedReward = environment.Sample(state, action, nextState);
109 return Sample(state, action, nextState);
129 EnvironmentType environment;
EnvironmentType & Environment()
Modify the environment.
RewardClipping(EnvironmentType &environment, const double minReward=-1.0, const double maxReward=1.0)
Constructor for creating a RewardClipping instance.
The core includes that mlpack expects; standard C++ includes and Armadillo.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Environment.
State InitialSample()
The InitialSample method is called by the environment to initialize the starting state.
Miscellaneous math clamping routines.
double MaxReward() const
Get the maximum reward value.
EnvironmentType & Environment() const
Get the environment.
typename EnvironmentType::State State
Convenient typedef for state.
typename EnvironmentType::Action Action
Convenient typedef for action.
double & MaxReward()
Modify the maximum reward value.
double & MinReward()
Modify the minimum reward value.
Interface for clipping the reward to some value between the specified maximum and minimum value (Clip...
double MinReward() const
Get the minimum reward value.
double Sample(const State &state, const Action &action)
Dynamics of Environment.
bool IsTerminal(const State &state) const
Checks whether given state is a terminal state.
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.