Differential Dynamic Programming (DDP) solver. More...
#include <crocoddyl/core/solvers/ddp.hpp>
Public Member Functions | |
EIGEN_MAKE_ALIGNED_OPERATOR_NEW | SolverDDP (boost::shared_ptr< ShootingProblem > problem) |
Initialize the DDP solver. More... | |
virtual void | allocateData () |
Allocate all the internal data needed for the solver. | |
virtual void | backwardPass () |
Run the backward pass (Riccati sweep) More... | |
virtual double | calcDiff () |
Update the Jacobian and Hessian of the optimal control problem. More... | |
virtual void | computeDirection (const bool &recalc=true) |
Compute the search direction \((\delta\mathbf{x},\delta\mathbf{u})\) for the current guess \((\mathbf{x}_s,\mathbf{u}_s)\). More... | |
virtual void | computeGains (const std::size_t &t) |
Compute the feedforward and feedback terms using a Cholesky decomposition. More... | |
void | decreaseRegularization () |
Decrease the state and control regularization values by a regfactor_ factor. | |
virtual const Eigen::Vector2d & | expectedImprovement () |
Return the expected improvement from a given current search direction. More... | |
virtual void | forwardPass (const double &stepLength) |
Run the forward pass or rollout. More... | |
const std::vector< double > & | get_alphas () const |
Return the set of step lengths using by the line-search procedure. | |
const std::vector< Eigen::VectorXd > & | get_fs () const |
Return the gaps \(\mathbf{\bar{f}}_{s}\). | |
const std::vector< Eigen::MatrixXd > & | get_K () const |
Return the feedback gains \(\mathbf{K}_{s}\). | |
const std::vector< Eigen::VectorXd > & | get_k () const |
Return the feedforward gains \(\mathbf{k}_{s}\). | |
const std::vector< Eigen::VectorXd > & | get_Qu () const |
Return the Jacobian of the Hamiltonian function \(\mathbf{Q}_{\mathbf{u}_s}\). | |
const std::vector< Eigen::MatrixXd > & | get_Quu () const |
Return the Hessian of the Hamiltonian function \(\mathbf{Q}_{\mathbf{uu}_s}\). | |
const std::vector< Eigen::VectorXd > & | get_Qx () const |
Return the Jacobian of the Hamiltonian function \(\mathbf{Q}_{\mathbf{x}_s}\). | |
const std::vector< Eigen::MatrixXd > & | get_Qxu () const |
Return the Hessian of the Hamiltonian function \(\mathbf{Q}_{\mathbf{xu}_s}\). | |
const std::vector< Eigen::MatrixXd > & | get_Qxx () const |
Return the Hessian of the Hamiltonian function \(\mathbf{Q}_{\mathbf{xx}_s}\). | |
const double & | get_regfactor () const |
Return the regularization factor used to decrease / increase it. | |
const double & | get_regmax () const |
Return the maximum regularization value. | |
const double & | get_regmin () const |
Return the minimum regularization value. | |
const double & | get_th_grad () const |
Return the tolerance of the expected gradient used for testing the step. | |
const double & | get_th_stepdec () const |
Return the step-length threshold used to decrease regularization. | |
const double & | get_th_stepinc () const |
Return the step-length threshold used to increase regularization. | |
const std::vector< Eigen::VectorXd > & | get_Vx () const |
Return the Hessian of the Value function \(V_{\mathbf{x}_s}\). | |
const std::vector< Eigen::MatrixXd > & | get_Vxx () const |
Return the Hessian of the Value function \(V_{\mathbf{xx}_s}\). | |
void | increaseRegularization () |
Increase the state and control regularization values by a regfactor_ factor. | |
void | set_alphas (const std::vector< double > &alphas) |
Modify the set of step lengths using by the line-search procedure. | |
void | set_regfactor (const double ®_factor) |
Modify the regularization factor used to decrease / increase it. | |
void | set_regmax (const double ®max) |
Modify the maximum regularization value. | |
void | set_regmin (const double ®min) |
Modify the minimum regularization value. | |
void | set_th_grad (const double &th_grad) |
Modify the tolerance of the expected gradient used for testing the step. | |
void | set_th_stepdec (const double &th_step) |
Modify the step-length threshold used to decrease regularization. | |
void | set_th_stepinc (const double &th_step) |
Modify the step-length threshold used to increase regularization. | |
virtual bool | solve (const std::vector< Eigen::VectorXd > &init_xs=DEFAULT_VECTOR, const std::vector< Eigen::VectorXd > &init_us=DEFAULT_VECTOR, const std::size_t &maxiter=100, const bool &is_feasible=false, const double ®Init=1e-9) |
Compute the optimal trajectory \(\mathbf{x}^*_s,\mathbf{u}^*_s\) as lists of \(T+1\) and \(T\) terms. More... | |
virtual double | stoppingCriteria () |
Return a positive value that quantifies the algorithm termination. More... | |
virtual double | tryStep (const double &steplength=1) |
Try a predefined step length and compute its cost improvement. More... | |
![]() | |
EIGEN_MAKE_ALIGNED_OPERATOR_NEW | SolverAbstract (boost::shared_ptr< ShootingProblem > problem) |
Initialize the solver. More... | |
const double & | get_cost () const |
Return the total cost. | |
const Eigen::Vector2d & | get_d () const |
Return the LQ approximation of the expected improvement. | |
const double & | get_dV () const |
Return the cost reduction. | |
const double & | get_dVexp () const |
Return the expected cost reduction. | |
const bool & | get_is_feasible () const |
Return the feasibility status of the \((\mathbf{x}_s,\mathbf{u}_s)\) trajectory. | |
const std::size_t & | get_iter () const |
Return the number of iterations performed by the solver. | |
const boost::shared_ptr< ShootingProblem > & | get_problem () const |
Return the shooting problem. | |
const double & | get_steplength () const |
Return the step length. | |
const double & | get_stop () const |
Return the value computed by stoppingCriteria() | |
const double & | get_th_acceptstep () const |
Return the threshold used for accepting a step. | |
const double & | get_th_stop () const |
Return the tolerance for stopping the algorithm. | |
const double & | get_ureg () const |
Return the control regularization value. | |
const std::vector< Eigen::VectorXd > & | get_us () const |
Return the control trajectory \(\mathbf{u}_s\). | |
const double & | get_xreg () const |
Return the state regularization value. | |
const std::vector< Eigen::VectorXd > & | get_xs () const |
Return the state trajectory \(\mathbf{x}_s\). | |
const std::vector< boost::shared_ptr< CallbackAbstract > > & | getCallbacks () const |
"Return the list of callback functions using for diagnostic | |
void | set_th_acceptstep (const double &th_acceptstep) |
Modify the threshold used for accepting step. | |
void | set_th_stop (const double &th_stop) |
Modify the tolerance for stopping the algorithm. | |
void | set_ureg (const double &ureg) |
Modify the control regularization value. | |
void | set_us (const std::vector< Eigen::VectorXd > &us) |
Modify the control trajectory \(\mathbf{u}_s\). | |
void | set_xreg (const double &xreg) |
Modify the state regularization value. | |
void | set_xs (const std::vector< Eigen::VectorXd > &xs) |
Modify the state trajectory \(\mathbf{x}_s\). | |
void | setCallbacks (const std::vector< boost::shared_ptr< CallbackAbstract > > &callbacks) |
Set a list of callback functions using for diagnostic. More... | |
void | setCandidate (const std::vector< Eigen::VectorXd > &xs_warm=DEFAULT_VECTOR, const std::vector< Eigen::VectorXd > &us_warm=DEFAULT_VECTOR, const bool &is_feasible=false) |
Set the solver candidate warm-point values \((\mathbf{x}_s,\mathbf{u}_s)\). More... | |
Protected Attributes | |
std::vector< double > | alphas_ |
Set of step lengths using by the line-search procedure. | |
double | cost_try_ |
Total cost computed by line-search procedure. | |
std::vector< Eigen::VectorXd > | dx_ |
std::vector< Eigen::VectorXd > | fs_ |
Gaps/defects between shooting nodes. | |
Eigen::VectorXd | fTVxx_p_ |
fTVxx_p term | |
std::vector< Eigen::MatrixXd > | FuTVxx_p_ |
fuTVxx_p_ | |
Eigen::MatrixXd | FxTVxx_p_ |
fxTVxx_p_ | |
std::vector< Eigen::MatrixXd > | K_ |
Feedback gains. | |
std::vector< Eigen::VectorXd > | k_ |
Feed-forward terms. | |
std::vector< Eigen::VectorXd > | Qu_ |
Gradient of the Hamiltonian. | |
std::vector< Eigen::MatrixXd > | Quu_ |
Hessian of the Hamiltonian. | |
std::vector< Eigen::LLT< Eigen::MatrixXd > > | Quu_llt_ |
Cholesky LLT solver. | |
std::vector< Eigen::VectorXd > | Quuk_ |
Quuk term. | |
std::vector< Eigen::VectorXd > | Qx_ |
Gradient of the Hamiltonian. | |
std::vector< Eigen::MatrixXd > | Qxu_ |
Hessian of the Hamiltonian. | |
std::vector< Eigen::MatrixXd > | Qxx_ |
Hessian of the Hamiltonian. | |
double | regfactor_ |
Regularization factor used to decrease / increase it. | |
double | regmax_ |
Maximum allowed regularization value. | |
double | regmin_ |
Minimum allowed regularization value. | |
double | th_grad_ |
Tolerance of the expected gradient used for testing the step. | |
double | th_stepdec_ |
Step-length threshold used to decrease regularization. | |
double | th_stepinc_ |
Step-length threshold used to increase regularization. | |
std::vector< Eigen::VectorXd > | us_try_ |
Control trajectory computed by line-search procedure. | |
std::vector< Eigen::VectorXd > | Vx_ |
Gradient of the Value function. | |
std::vector< Eigen::MatrixXd > | Vxx_ |
Hessian of the Value function. | |
bool | was_feasible_ |
Label that indicates in the previous iterate was feasible. | |
Eigen::VectorXd | xnext_ |
Next state. | |
std::vector< Eigen::VectorXd > | xs_try_ |
State trajectory computed by line-search procedure. | |
![]() | |
std::vector< boost::shared_ptr< CallbackAbstract > > | callbacks_ |
Callback functions. | |
double | cost_ |
Total cost. | |
Eigen::Vector2d | d_ |
LQ approximation of the expected improvement. | |
double | dV_ |
Cost reduction obtained by tryStep() | |
double | dVexp_ |
Expected cost reduction. | |
bool | is_feasible_ |
Label that indicates is the iteration is feasible. | |
std::size_t | iter_ |
Number of iteration performed by the solver. | |
boost::shared_ptr< ShootingProblem > | problem_ |
optimal control problem | |
double | steplength_ |
Current applied step-length. | |
double | stop_ |
Value computed by stoppingCriteria() | |
double | th_acceptstep_ |
Threshold used for accepting step. | |
double | th_stop_ |
Tolerance for stopping the algorithm. | |
double | ureg_ |
Current control regularization values. | |
std::vector< Eigen::VectorXd > | us_ |
Control trajectory. | |
double | xreg_ |
Current state regularization value. | |
std::vector< Eigen::VectorXd > | xs_ |
State trajectory. | |
Differential Dynamic Programming (DDP) solver.
The DDP solver computes an optimal trajectory and control commands by iterates running backwardPass()
and forwardPass()
. The backward-pass updates locally the quadratic approximation of the problem and computes descent direction. If the warm-start is feasible, then it computes the gaps \(\mathbf{\bar{f}}_s\) and run a modified Riccati sweep:
\begin{eqnarray*} \mathbf{Q}_{\mathbf{x}_k} &=& \mathbf{l}_{\mathbf{x}_k} + \mathbf{f}^\top_{\mathbf{x}_k} (V_{\mathbf{x}_{k+1}} + V_{\mathbf{xx}_{k+1}}\mathbf{\bar{f}}_{k+1}),\\ \mathbf{Q}_{\mathbf{u}_k} &=& \mathbf{l}_{\mathbf{u}_k} + \mathbf{f}^\top_{\mathbf{u}_k} (V_{\mathbf{x}_{k+1}} + V_{\mathbf{xx}_{k+1}}\mathbf{\bar{f}}_{k+1}),\\ \mathbf{Q}_{\mathbf{xx}_k} &=& \mathbf{l}_{\mathbf{xx}_k} + \mathbf{f}^\top_{\mathbf{x}_k} V_{\mathbf{xx}_{k+1}} \mathbf{f}_{\mathbf{x}_k},\\ \mathbf{Q}_{\mathbf{xu}_k} &=& \mathbf{l}_{\mathbf{xu}_k} + \mathbf{f}^\top_{\mathbf{x}_k} V_{\mathbf{xx}_{k+1}} \mathbf{f}_{\mathbf{u}_k},\\ \mathbf{Q}_{\mathbf{uu}_k} &=& \mathbf{l}_{\mathbf{uu}_k} + \mathbf{f}^\top_{\mathbf{u}_k} V_{\mathbf{xx}_{k+1}} \mathbf{f}_{\mathbf{u}_k}. \end{eqnarray*}
Then, the forward-pass rollouts this new policy by integrating the system dynamics along a tuple of optimized control commands \(\mathbf{u}^*_s\), i.e.
\begin{eqnarray} \mathbf{\hat{x}}_0 &=& \mathbf{\tilde{x}}_0,\\ \mathbf{\hat{u}}_k &=& \mathbf{u}_k + \alpha\mathbf{k}_k + \mathbf{K}_k(\mathbf{\hat{x}}_k-\mathbf{x}_k),\\ \mathbf{\hat{x}}_{k+1} &=& \mathbf{f}_k(\mathbf{\hat{x}}_k,\mathbf{\hat{u}}_k). \end{eqnarray}
backwardPass()
and forwardPass()
|
explicit |
|
virtual |
Compute the optimal trajectory \(\mathbf{x}^*_s,\mathbf{u}^*_s\) as lists of \(T+1\) and \(T\) terms.
From an initial guess init_xs
, init_us
(feasible or not), iterate over computeDirection()
and tryStep()
until stoppingCriteria()
is below threshold. It also describes the globalization strategy used during the numerical optimization.
[in] | init_xs | initial guess for state trajectory with \(T+1\) elements (default []) |
[in] | init_us | initial guess for control trajectory with \(T\) elements (default []) |
[in] | maxiter | maximun allowed number of iterations (default 100) |
[in] | isFeasible | true if the init_xs are obtained from integrating the init_us (rollout) (default false) |
[in] | regInit | initial guess for the regularization value. Very low values are typical used with very good guess points (init_xs, init_us) |
Implements SolverAbstract.
Reimplemented in SolverFDDP.
|
virtual |
Compute the search direction \((\delta\mathbf{x},\delta\mathbf{u})\) for the current guess \((\mathbf{x}_s,\mathbf{u}_s)\).
You must call setCandidate first in order to define the current guess. A current guess defines a state and control trajectory \((\mathbf{x}_s,\mathbf{u}_s)\) of \(T+1\) and \(T\) elements, respectively.
[in] | recalc | True for recalculating the derivatives at current state and control |
Implements SolverAbstract.
|
virtual |
Try a predefined step length and compute its cost improvement.
It uses the search direction found by computeDirection()
to try a determined step length \(\alpha\); so you need to run first computeDirection()
. Additionally it returns the cost improvement along the predefined step length.
[in] | stepLength | step length |
Implements SolverAbstract.
|
virtual |
Return a positive value that quantifies the algorithm termination.
These values typically represents the gradient norm which tell us that it's been reached the local minima. This function is used to evaluate the algorithm convergence. The stopping criteria strictly speaking depends on the search direction (calculated by computeDirection()
) but it could also depend on the chosen step length, tested by tryStep()
.
Implements SolverAbstract.
|
virtual |
Return the expected improvement from a given current search direction.
For computing the expected improvement, you need to compute first the search direction by running computeDirection()
.
Implements SolverAbstract.
Reimplemented in SolverFDDP.
|
virtual |
Update the Jacobian and Hessian of the optimal control problem.
These derivatives are computed around the guess state and control trajectory. These trajectory can be set by using setCandidate()
.
|
virtual |
Run the backward pass (Riccati sweep)
It assumes that the Jacobian and Hessians of the optimal control problem have been compute (i.e. calcDiff()
). The backward pass handles infeasible guess through a modified Riccati sweep:
\begin{eqnarray*} \mathbf{Q}_{\mathbf{x}_k} &=& \mathbf{l}_{\mathbf{x}_k} + \mathbf{f}^\top_{\mathbf{x}_k} (V_{\mathbf{x}_{k+1}} + V_{\mathbf{xx}_{k+1}}\mathbf{\bar{f}}_{k+1}),\\ \mathbf{Q}_{\mathbf{u}_k} &=& \mathbf{l}_{\mathbf{u}_k} + \mathbf{f}^\top_{\mathbf{u}_k} (V_{\mathbf{x}_{k+1}} + V_{\mathbf{xx}_{k+1}}\mathbf{\bar{f}}_{k+1}),\\ \mathbf{Q}_{\mathbf{xx}_k} &=& \mathbf{l}_{\mathbf{xx}_k} + \mathbf{f}^\top_{\mathbf{x}_k} V_{\mathbf{xx}_{k+1}} \mathbf{f}_{\mathbf{x}_k},\\ \mathbf{Q}_{\mathbf{xu}_k} &=& \mathbf{l}_{\mathbf{xu}_k} + \mathbf{f}^\top_{\mathbf{x}_k} V_{\mathbf{xx}_{k+1}} \mathbf{f}_{\mathbf{u}_k},\\ \mathbf{Q}_{\mathbf{uu}_k} &=& \mathbf{l}_{\mathbf{uu}_k} + \mathbf{f}^\top_{\mathbf{u}_k} V_{\mathbf{xx}_{k+1}} \mathbf{f}_{\mathbf{u}_k}, \end{eqnarray*}
where \(\mathbf{l}_{\mathbf{x}_k}\), \(\mathbf{l}_{\mathbf{u}_k}\), \(\mathbf{f}_{\mathbf{x}_k}\) and \(\mathbf{f}_{\mathbf{u}_k}\) are the Jacobians of the cost function and dynamics, \(\mathbf{l}_{\mathbf{xx}_k}\), \(\mathbf{l}_{\mathbf{xu}_k}\) and \(\mathbf{l}_{\mathbf{uu}_k}\) are the Hessians of the cost function, \(V_{\mathbf{x}_{k+1}}\) and \(V_{\mathbf{xx}_{k+1}}\) defines the linear-quadratic approximation of the Value function, and \(\mathbf{\bar{f}}_{k+1}\) describes the gaps of the dynamics.
|
virtual |
Run the forward pass or rollout.
It rollouts the action model given the computed policy (feedforward terns and feedback gains) by the backwardPass()
:
\begin{eqnarray} \mathbf{\hat{x}}_0 &=& \mathbf{\tilde{x}}_0,\\ \mathbf{\hat{u}}_k &=& \mathbf{u}_k + \alpha\mathbf{k}_k + \mathbf{K}_k(\mathbf{\hat{x}}_k-\mathbf{x}_k),\\ \mathbf{\hat{x}}_{k+1} &=& \mathbf{f}_k(\mathbf{\hat{x}}_k,\mathbf{\hat{u}}_k). \end{eqnarray}
We can define different step lengths \(\alpha\).
stepLength | applied step length ( \(0\leq\alpha\leq1\)) |
Reimplemented in SolverFDDP, SolverBoxDDP, and SolverBoxFDDP.
|
virtual |
Compute the feedforward and feedback terms using a Cholesky decomposition.
To compute the feedforward \(\mathbf{k}_k\) and feedback \(\mathbf{K}_k\) terms, we use a Cholesky decomposition to solve \(\mathbf{Q}_{\mathbf{uu}_k}^{-1}\) term:
\begin{eqnarray} \mathbf{k}_k &=& \mathbf{Q}_{\mathbf{uu}_k}^{-1}\mathbf{Q}_{\mathbf{u}},\\ \mathbf{K}_k &=& \mathbf{Q}_{\mathbf{uu}_k}^{-1}\mathbf{Q}_{\mathbf{ux}}. \end{eqnarray}
Note that if the Cholesky decomposition fails, then we re-start the backward pass and increase the state and control regularization values.
Reimplemented in SolverBoxDDP, and SolverBoxFDDP.