16 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP
17 #define MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP
52 State(
const arma::colvec& data): data(data)
56 arma::colvec&
Data() {
return data; }
59 double Theta()
const {
return data[0]; }
61 double&
Theta() {
return data[0]; }
69 const arma::colvec&
Encode()
const {
return data; }
104 const double maxTorque = 2.0,
105 const double dt = 0.05,
106 const double angleThreshold =
M_PI / 12,
107 const double doneReward = 0.0,
108 const size_t maxSteps = 0) :
109 maxAngularVelocity(maxAngularVelocity),
110 maxTorque(maxTorque),
112 angleThreshold(angleThreshold),
113 doneReward(doneReward),
135 double theta = state.
Theta();
139 const double gravity = 10.0;
140 const double mass = 1.0;
141 const double length = 1.0;
148 std::pow(angularVelocity, 2) + 0.001 * std::pow(torque, 2);
151 double newAngularVelocity = angularVelocity + (-3.0 * gravity / (2 *
152 length) * std::sin(theta +
M_PI) + 3.0 / std::pow(mass * length, 2) *
155 -maxAngularVelocity, maxAngularVelocity);
156 nextState.
Theta() = theta + newAngularVelocity * dt;
162 if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
182 return Sample(state, action, nextState);
219 if (maxSteps != 0 && stepsPerformed >= maxSteps)
221 Log::Info <<
"Episode terminated due to the maximum number of steps"
225 else if (state.
Theta() >
M_PI - angleThreshold ||
228 Log::Info <<
"Episode terminated due to agent succeeding.";
244 double maxAngularVelocity;
253 double angleThreshold;
262 size_t stepsPerformed;
size_t MaxSteps() const
Get the maximum number of steps allowed.
double & Theta()
Modify the value of theta.
Implementation of Pendulum task.
double Theta() const
Get the theta.
size_t StepsPerformed() const
Get the number of steps performed.
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Pendulum.
The core includes that mlpack expects; standard C++ includes and Armadillo.
Implementation of action of Pendulum.
arma::colvec & Data()
Modify the internal representation of the state.
bool IsTerminal(const State &state) const
This function checks if the pendulum has reaches a terminal state.
State(const arma::colvec &data)
Construct a state based on the given data.
Pendulum(const double maxAngularVelocity=8, const double maxTorque=2.0, const double dt=0.05, const double angleThreshold=M_PI/12, const double doneReward=0.0, const size_t maxSteps=0)
Construct a Pendulum instance using the given values.
Miscellaneous math clamping routines.
double AngleNormalize(double theta) const
This function calculates the normalized angle for a particular theta.
State()
Construct a state instance.
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
double & AngularVelocity()
Modify the value of angular velocity.
static constexpr size_t dimension
Dimension of the encoded state.
size_t & MaxSteps()
Set the maximum number of steps allowed.
const arma::colvec & Encode() const
Encode the state to a column vector.
Implementation of state of Pendulum.
double Sample(const State &state, const Action &action)
Dynamics of Pendulum.
double Random()
Generates a uniform random number between 0 and 1.
State InitialSample()
Initial theta is randomly generated within [-pi, pi].
double AngularVelocity() const
Get the angular velocity.
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.