13 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP
14 #define MLPACK_METHODS_RL_ENVIRONMENT_ACROBOT_HPP
48 State(
const arma::colvec& data) : data(data)
52 arma::colvec&
Data() {
return data; }
55 double Theta1()
const {
return data[0]; }
60 double Theta2()
const {
return data[1]; }
75 const arma::colvec&
Encode()
const {
return data; }
123 const double gravity = 9.81,
124 const double linkLength1 = 1.0,
125 const double linkLength2 = 1.0,
126 const double linkMass1 = 1.0,
127 const double linkMass2 = 1.0,
128 const double linkCom1 = 0.5,
129 const double linkCom2 = 0.5,
130 const double linkMoi = 1.0,
131 const double maxVel1 = 4 *
M_PI,
132 const double maxVel2 = 9 *
M_PI,
133 const double dt = 0.2,
134 const double doneReward = 0) :
137 linkLength1(linkLength1),
138 linkLength2(linkLength2),
139 linkMass1(linkMass1),
140 linkMass2(linkMass2),
147 doneReward(doneReward),
168 arma::colvec currentState = {state.
Theta1(), state.
Theta2(),
171 arma::colvec currentNextState =
Rk4(currentState,
Torque(action));
187 if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
207 return Sample(state, action, nextState);
216 return State((arma::randu<arma::colvec>(4) - 0.5) / 5.0);
227 if (maxSteps != 0 && stepsPerformed >= maxSteps)
229 Log::Info <<
"Episode terminated due to the maximum number of steps"
233 else if (-std::cos(state.
Theta1()) - std::cos(state.
Theta1() +
236 Log::Info <<
"Episode terminated due to agent succeeding.";
249 arma::colvec
Dsdt(arma::colvec state,
const double torque)
const
251 const double m1 = linkMass1;
252 const double m2 = linkMass2;
253 const double l1 = linkLength1;
254 const double lc1 = linkCom1;
255 const double lc2 = linkCom2;
256 const double I1 = linkMoi;
257 const double I2 = linkMoi;
258 const double g = gravity;
259 const double a = torque;
260 const double theta1 = state[0];
261 const double theta2 = state[1];
263 arma::colvec values(4);
264 values[0] = state[2];
265 values[1] = state[3];
267 const double d1 = m1 * std::pow(lc1, 2) + m2 * (std::pow(l1, 2) +
268 std::pow(lc2, 2) + 2 * l1 * lc2 * std::cos(theta2)) + I1 + I2;
270 const double d2 = m2 * (std::pow(lc2, 2) + l1 * lc2 * std::cos(theta2)) +
273 const double phi2 = m2 * lc2 * g * std::cos(theta1 + theta2 -
M_PI / 2.);
275 const double phi1 = - m2 * l1 * lc2 * std::pow(values[1], 2) *
276 std::sin(theta2) - 2 * m2 * l1 * lc2 * values[1] * values[0] *
277 std::sin(theta2) + (m1 * lc1 + m2 * l1) * g *
278 std::cos(theta1 -
M_PI / 2) + phi2;
280 values[3] = (a + d2 / d1 * phi1 - m2 * l1 * lc2 * std::pow(values[0], 2) *
281 std::sin(theta2) - phi2) / (m2 * std::pow(lc2, 2) + I2 -
282 std::pow(d2, 2) / d1);
284 values[2] = -(d2 * values[3] + phi1) / d1;
299 const double minimum,
300 const double maximum)
const
302 const double diff = maximum - minimum;
306 value = value - diff;
308 else if (value < minimum)
310 value = value + diff;
335 arma::colvec
Rk4(
const arma::colvec state,
const double torque)
const
337 arma::colvec k1 =
Dsdt(state, torque);
338 arma::colvec k2 =
Dsdt(state + dt * k1 / 2, torque);
339 arma::colvec k3 =
Dsdt(state + dt * k2 / 2, torque);
340 arma::colvec k4 =
Dsdt(state + dt * k3, torque);
341 arma::colvec nextState = state + dt * (k1 + 2 * k2 + 2 * k3 + k4) / 6;
395 size_t stepsPerformed;
arma::colvec & Data()
Modify the state representation.
Acrobot(const size_t maxSteps=500, const double gravity=9.81, const double linkLength1=1.0, const double linkLength2=1.0, const double linkMass1=1.0, const double linkMass2=1.0, const double linkCom1=0.5, const double linkCom2=0.5, const double linkMoi=1.0, const double maxVel1=4 *M_PI, const double maxVel2=9 *M_PI, const double dt=0.2, const double doneReward=0)
Construct a Acrobot instance using the given constants.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of the Acrobot System.
double & AngularVelocity1()
Modify the angular velocity (one).
double AngularVelocity1() const
Get value of Angular velocity (one).
bool IsTerminal(const State &state) const
This function checks if the acrobot has reached the terminal state.
const arma::colvec & Encode() const
Encode the state to a column vector.
size_t MaxSteps() const
Get the maximum number of steps allowed.
double & Theta1()
Modify value of theta (one).
double AngularVelocity2() const
Get value of Angular velocity (two).
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
double Theta2() const
Get value of theta (two).
double Theta1() const
Get value of theta (one).
double & Theta2()
Modify value of theta (two).
double Wrap(double value, const double minimum, const double maximum) const
Wrap funtion is required to truncate the angle value from -180 to 180.
double & AngularVelocity2()
Modify the angular velocity (two).
static constexpr size_t dimension
Dimension of the encoded state.
Implementation of Acrobot game.
size_t & MaxSteps()
Set the maximum number of steps allowed.
double Sample(const State &state, const Action &action)
Dynamics of the Acrobot System.
Include all of the base components required to write mlpack methods, and the main mlpack Doxygen docu...
double Random()
Generates a uniform random number between 0 and 1.
State()
Construct a state instance.
State(const arma::colvec &data)
Construct a state instance from given data.
State InitialSample()
This function does random initialization of state space.
size_t StepsPerformed() const
Get the number of steps performed.
arma::colvec Rk4(const arma::colvec state, const double torque) const
This function calls the RK4 iterative method to estimate the next state based on given ordinary diffe...
double Torque(const Action &action) const
This function calculates the torque for a particular action.
arma::colvec Dsdt(arma::colvec state, const double torque) const
This is the ordinary differential equations required for estimation of nextState through RK4 method...
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.