mlpack  3.1.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
pendulum.hpp
Go to the documentation of this file.
1 
16 #ifndef MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP
17 #define MLPACK_METHODS_RL_ENVIRONMENT_PENDULUM_HPP
18 
19 #include <mlpack/prereqs.hpp>
21 
22 namespace mlpack {
23 namespace rl {
24 
31 class Pendulum
32 {
33  public:
38  class State
39  {
40  public:
44  State() : data(dimension, arma::fill::zeros)
45  { /* Nothing to do here. */ }
46 
52  State(const arma::colvec& data): data(data)
53  { /* Nothing to do here. */ }
54 
56  arma::colvec& Data() { return data; }
57 
59  double Theta() const { return data[0]; }
61  double& Theta() { return data[0]; }
62 
64  double AngularVelocity() const { return data[1]; }
66  double& AngularVelocity() { return data[1]; }
67 
69  const arma::colvec& Encode() const { return data; }
70 
72  static constexpr size_t dimension = 2;
73 
74  private:
76  arma::colvec data;
77  };
78 
84  struct Action
85  {
86  double action[1];
87  // Storing degree of freedom
88  const int size = 1;
89  };
90 
103  Pendulum(const double maxAngularVelocity = 8,
104  const double maxTorque = 2.0,
105  const double dt = 0.05,
106  const double angleThreshold = M_PI / 12,
107  const double doneReward = 0.0,
108  const size_t maxSteps = 0) :
109  maxAngularVelocity(maxAngularVelocity),
110  maxTorque(maxTorque),
111  dt(dt),
112  angleThreshold(angleThreshold),
113  doneReward(doneReward),
114  maxSteps(maxSteps),
115  stepsPerformed(0)
116  { /* Nothing to do here */ }
117 
127  double Sample(const State& state,
128  const Action& action,
129  State& nextState)
130  {
131  // Update the number of steps performed.
132  stepsPerformed++;
133 
134  // Get current state.
135  double theta = state.Theta();
136  double angularVelocity = state.AngularVelocity();
137 
138  // Define constants which specify our pendulum.
139  const double gravity = 10.0;
140  const double mass = 1.0;
141  const double length = 1.0;
142 
143  // Get action and clip the values between max and min limits.
144  double torque = math::ClampRange(action.action[0], -maxTorque, maxTorque);
145 
146  // Calculate costs of taking this action in the current state.
147  double costs = std::pow(AngleNormalize(theta), 2) + 0.1 *
148  std::pow(angularVelocity, 2) + 0.001 * std::pow(torque, 2);
149 
150  // Calculate new state values and assign to the next state.
151  double newAngularVelocity = angularVelocity + (-3.0 * gravity / (2 *
152  length) * std::sin(theta + M_PI) + 3.0 / std::pow(mass * length, 2) *
153  torque) * dt;
154  nextState.AngularVelocity() = math::ClampRange(newAngularVelocity,
155  -maxAngularVelocity, maxAngularVelocity);
156  nextState.Theta() = theta + newAngularVelocity * dt;
157 
158  // Check if the episode has terminated
159  bool done = IsTerminal(nextState);
160 
161  // Do not reward the agent if time ran out.
162  if (done && maxSteps != 0 && stepsPerformed >= maxSteps)
163  return 0;
164  else if (done)
165  return doneReward;
166 
167  // Return the reward of taking the action in current state.
168  // The reward is simply the negative of cost incurred for the action.
169  return -costs;
170  }
171 
179  double Sample(const State& state, const Action& action)
180  {
181  State nextState;
182  return Sample(state, action, nextState);
183  }
184 
192  {
193  State state;
194  state.Theta() = math::Random(-M_PI + angleThreshold, M_PI - angleThreshold);
195  state.AngularVelocity() = math::Random(-1.0, 1.0);
196  stepsPerformed = 0;
197  return state;
198  }
199 
205  double AngleNormalize(double theta) const
206  {
207  // Scale angle within [-pi, pi).
208  return double(fmod(theta + M_PI, 2 * M_PI) - M_PI);
209  }
210 
217  bool IsTerminal(const State& state) const
218  {
219  if (maxSteps != 0 && stepsPerformed >= maxSteps)
220  {
221  Log::Info << "Episode terminated due to the maximum number of steps"
222  "being taken.";
223  return true;
224  }
225  else if (state.Theta() > M_PI - angleThreshold ||
226  state.Theta() < -M_PI + angleThreshold)
227  {
228  Log::Info << "Episode terminated due to agent succeeding.";
229  return true;
230  }
231  return false;
232  }
233 
235  size_t StepsPerformed() const { return stepsPerformed; }
236 
238  size_t MaxSteps() const { return maxSteps; }
240  size_t& MaxSteps() { return maxSteps; }
241 
242  private:
244  double maxAngularVelocity;
245 
247  double maxTorque;
248 
250  double dt;
251 
253  double angleThreshold;
254 
256  double doneReward;
257 
259  size_t maxSteps;
260 
262  size_t stepsPerformed;
263 };
264 
265 } // namespace rl
266 } // namespace mlpack
267 
268 #endif
size_t MaxSteps() const
Get the maximum number of steps allowed.
Definition: pendulum.hpp:238
double & Theta()
Modify the value of theta.
Definition: pendulum.hpp:61
Implementation of Pendulum task.
Definition: pendulum.hpp:31
double Theta() const
Get the theta.
Definition: pendulum.hpp:59
size_t StepsPerformed() const
Get the number of steps performed.
Definition: pendulum.hpp:235
double Sample(const State &state, const Action &action, State &nextState)
Dynamics of Pendulum.
Definition: pendulum.hpp:127
The core includes that mlpack expects; standard C++ includes and Armadillo.
Implementation of action of Pendulum.
Definition: pendulum.hpp:84
arma::colvec & Data()
Modify the internal representation of the state.
Definition: pendulum.hpp:56
bool IsTerminal(const State &state) const
This function checks if the pendulum has reaches a terminal state.
Definition: pendulum.hpp:217
State(const arma::colvec &data)
Construct a state based on the given data.
Definition: pendulum.hpp:52
#define M_PI
Definition: prereqs.hpp:39
Pendulum(const double maxAngularVelocity=8, const double maxTorque=2.0, const double dt=0.05, const double angleThreshold=M_PI/12, const double doneReward=0.0, const size_t maxSteps=0)
Construct a Pendulum instance using the given values.
Definition: pendulum.hpp:103
Miscellaneous math clamping routines.
double AngleNormalize(double theta) const
This function calculates the normalized angle for a particular theta.
Definition: pendulum.hpp:205
State()
Construct a state instance.
Definition: pendulum.hpp:44
static MLPACK_EXPORT util::PrefixedOutStream Info
Prints informational messages if –verbose is specified, prefixed with [INFO ].
Definition: log.hpp:84
double & AngularVelocity()
Modify the value of angular velocity.
Definition: pendulum.hpp:66
static constexpr size_t dimension
Dimension of the encoded state.
Definition: pendulum.hpp:72
size_t & MaxSteps()
Set the maximum number of steps allowed.
Definition: pendulum.hpp:240
const arma::colvec & Encode() const
Encode the state to a column vector.
Definition: pendulum.hpp:69
Implementation of state of Pendulum.
Definition: pendulum.hpp:38
double Sample(const State &state, const Action &action)
Dynamics of Pendulum.
Definition: pendulum.hpp:179
double Random()
Generates a uniform random number between 0 and 1.
Definition: random.hpp:78
State InitialSample()
Initial theta is randomly generated within [-pi, pi].
Definition: pendulum.hpp:191
double AngularVelocity() const
Get the angular velocity.
Definition: pendulum.hpp:64
double ClampRange(double value, const double rangeMin, const double rangeMax)
Clamp a number between a particular range.
Definition: clamp.hpp:53