基于Msnhnet实现最优化问题(中)一(无约束优化问题)

1. 阻尼牛顿法

牛顿法最突出的优点是收敛速度快，具有局部二阶收敛性，但是，基本牛顿法初始点需要足够“靠近”极小点，否则，有可能导致算法不收敛。

这样就引入了阻尼牛顿法，阻尼牛顿法最核心的一点在于可以修改每次迭代的步长，通过沿着牛顿法确定的方向一维搜索最优的步长，最终选择使得函数值最小的步长。

补充：一维搜索非精确搜索方法。

Goldstein准则(控制步长太小)
Wolfe准则

满足Wolfe准则的点为,和区间的点.

#include <Msnhnet/math/MsnhMatrixS.h>  
#include <Msnhnet/cv/MsnhCVGui.h>  
#include <iostream>  
  
using namespace Msnhnet;  
  
class DampedNewton  
{  
public:  
    DampedNewton(int maxIter, double eps, double rho, double tau):_maxIter(maxIter),_eps(eps),_rho(rho),_tau(tau){}  
  
  
    void setMaxIter(int maxIter)  
    {  
        _maxIter = maxIter;  
    }  
  
    virtual int solve(MatSDS &startPoint) = 0;  
  
    void setEps(double eps)  
    {  
        _eps = eps;  
    }  
  
    void setRho(double rho)  
    {  
        _rho = rho;  
    }  
  
    void setTau(double tau)  
    {  
        _tau = tau;  
    }  
  
    //正定性判定  
    bool isPosMat(const MatSDS &H)  
    {  
        MatSDS eigen = H.eigen()[0];  
        for (int i = 0; i < eigen.mWidth; ++i)  
        {  
            if(eigen[i]<=0)  
            {  
                return false;  
            }  
        }  
  
        return true;  
    }  
  
    const std::vector<Vec2F32> &getXStep() const  
    {  
        return _xStep;  
    }  
  
protected:  
    int _maxIter = 100;  
    double _eps = 0.00001;  
    double _rho = 0.2;  
    double _tau = 0.9;  
    std::vector<Vec2F32> _xStep;  
protected:  
    virtual MatSDS calGradient(const MatSDS& point) = 0;  
    virtual MatSDS calHessian(const MatSDS& point) = 0;  
    virtual bool calDk(const MatSDS& point, MatSDS &dk) = 0;  
    virtual MatSDS function(const MatSDS& point) = 0;  
};  
  
  
class DampedNewtonProblem1:public DampedNewton  
{  
public:  
    DampedNewtonProblem1(int maxIter, double eps, double rho, double tau):DampedNewton(maxIter, eps, rho, tau){}  
  
    MatSDS calGradient(const MatSDS &point) override  
    {  
        MatSDS J(1,2);  
        double x1 = point(0,0);  
        double x2 = point(0,1);  
  
        J(0,0) = 6*x1 - 2*x1*x2;  
        J(0,1) = 6*x2 - x1*x1;  
  
        return J;  
    }  
  
    MatSDS calHessian(const MatSDS &point) override  
    {  
        MatSDS H(2,2);  
        double x1 = point(0,0);  
        double x2 = point(0,1);  
  
        H(0,0) = 6 - 2*x2;  
        H(0,1) = -2*x1;  
        H(1,0) = -2*x1;  
        H(1,1) = 6;  
  
        return H;  
    }  
  
  
    bool calDk(const MatSDS& point, MatSDS &dk) override  
    {  
        MatSDS J = calGradient(point);  
        MatSDS H = calHessian(point);  
        if(!isPosMat(H))  
        {  
            return false;  
        }  
        dk = -1*H.invert()*J;  
        return true;  
    }  
  
    MatSDS function(const MatSDS &point) override  
    {  
        MatSDS f(1,1);  
        double x1 = point(0,0);  
        double x2 = point(0,1);  
  
        f(0,0) = 3*x1*x1 + 3*x2*x2 - x1*x1*x2;  
  
        return f;  
    }  
  
    int solve(MatSDS &startPoint) override  
    {  
        MatSDS x = startPoint;  
        for (int i = 0; i < _maxIter; ++i)  
        {  
  
            _xStep.push_back({(float)x[0],(float)x[1]});  
            MatSDS dk;  
  
            bool ok = calDk(x, dk);  
  
            if(!ok)  
            {  
                return -2;  
            }  
  
            double alpha = 1;  
  
            //Armijo准则  
            for (int i = 0; i < 100; ++i)  
            {  
                MatSDS left  = function(x + alpha*dk);  
                MatSDS right = function(x) + this->_rho*alpha*calGradient(x).transpose()*dk;  
  
                if(left(0,0) <= right(0,0))  
                {  
                    break;  
                }  
  
                alpha = alpha * _tau;  
            }  
  
            std::cout<<std::left<<"Iter(s): "<<std::setw(4)<<i<<", Loss: "<<std::setw(12)<<dk.L2()<<" Result: "<<function(x)[0]<<std::endl;  
  
            x = x + alpha*dk;  
  
            if(dk.LInf() < _eps)  
            {  
                startPoint = x;  
                return i+1;  
            }  
        }  
  
        return -1;  
    }  
};  
  
  
  
int main()  
{  
    DampedNewtonProblem1 function(100, 0.001, 0.4, 0.8);  
    MatSDS startPoint(1,2,{1.5,1.5});  
  
    try  
    {  
        int res = function.solve(startPoint);  
        if(res == -1)  
        {  
            std::cout<<"求解失败"<<std::endl;  
        }  
        else if(res == -2)  
        {  
            std::cout<<"Hessian 矩阵非正定, 求解失败"<<std::endl;  
        }  
        else  
        {  
            std::cout<<"求解成功! 迭代次数: "<<res<<std::endl;  
            std::cout<<"最小值点："<<res<<std::endl;  
            startPoint.print();  
  
            std::cout<<"此时方程的值为："<<std::endl;  
            function.function(startPoint).print();  
  
#ifdef WIN32  
        Gui::setFont("c:/windows/fonts/MSYH.TTC",16);  
#endif  
        std::cout<<"按\"esc\"退出!"<<std::endl;  
        Gui::plotLine(u8"阻尼牛顿法迭代X中间值","x",function.getXStep());  
        Gui::wait();  
  
        }  
  
    }  
    catch(Exception ex)  
    {  
        std::cout<<ex.what();  
    }  
}

结果:对于初始点 (1.5,1.5) ,迭代4次即可完成

阻尼牛顿法,4次完成迭代

阻尼牛顿法X中间点可视化

结果:对于初始点 (0,3) ,同样是Hessian矩阵不正定.

阻尼牛顿法求解失败

2. 牛顿Levenberg-Marquardt法

步骤

举例

#include <Msnhnet/math/MsnhMatrixS.h>  
#include <Msnhnet/cv/MsnhCVGui.h>  
#include <iostream>  
  
using namespace Msnhnet;  
  
class NewtonLM  
{  
public:  
    NewtonLM(int maxIter, double eps, double vk, double rho, double tau):_maxIter(maxIter),_eps(eps),_vk(vk),_rho(rho),_tau(tau){}  
  
  
    void setMaxIter(int maxIter)  
    {  
        _maxIter = maxIter;  
    }  
  
    virtual int solve(MatSDS &startPoint) = 0;  
  
    void setEps(double eps)  
    {  
        _eps = eps;  
    }  
  
    void setRho(double rho)  
    {  
        _rho = rho;  
    }  
  
    void setTau(double tau)  
    {  
        _tau = tau;  
    }  
  
    //正定性判定  
    bool isPosMat(const MatSDS &H)  
    {  
         MatSDS eigen = H.eigen()[0];  
         for (int i = 0; i < eigen.mWidth; ++i)  
         {  
            if(eigen[i]<=0)  
            {  
                return false;  
            }  
         }  
  
         return true;  
    }  
  
    const std::vector<Vec2F32> &getXStep() const  
    {  
        return _xStep;  
    }  
  
protected:  
    int _maxIter = 100;  
    double _eps = 0.00001;  
    double _vk  = 3;  
    double _rho = 0.2;  
    double _tau = 0.9;  
    std::vector<Vec2F32> _xStep;  
protected:  
    virtual MatSDS calGradient(const MatSDS& point) = 0;  
    virtual MatSDS calHessian(const MatSDS& point) = 0;  
    virtual MatSDS calDk(const MatSDS& point) = 0;  
    virtual MatSDS function(const MatSDS& point) = 0;  
};  
  
  
class NewtonLMProblem1:public NewtonLM  
{  
public:  
    NewtonLMProblem1(int maxIter, double eps, double vk, double rho, double tau):NewtonLM(maxIter, eps, vk,rho,tau){}  
  
    MatSDS calGradient(const MatSDS &point) override  
    {  
        MatSDS J(1,2);  
        double x1 = point(0,0);  
        double x2 = point(0,1);  
  
        J(0,0) = 6*x1 - 2*x1*x2;  
        J(0,1) = 6*x2 - x1*x1;  
  
        return J;  
    }  
  
  
    MatSDS calHessian(const MatSDS &point) override  
    {  
        MatSDS H(2,2);  
        double x1 = point(0,0);  
        double x2 = point(0,1);  
  
        H(0,0) = 6 - 2*x2;  
        H(0,1) = -2*x1;  
        H(1,0) = -2*x1;  
        H(1,1) = 6;  
  
        return H;  
    }  
  
  
    MatSDS calDk(const MatSDS& point) override  
    {  
        MatSDS J = calGradient(point);  
        MatSDS H = calHessian(point);  
  
        MatSDS I = MatSDS::eye(H.mWidth);  
  
  
        MatSDS Hp  = H + _vk*I;  
  
        if(!isPosMat(Hp))  
        {  
            H = H + 2*_vk*I;  
        }  
        else  
        {  
            H = Hp;  
        }  
  
        return -1*H.invert()*J;  
    }  
  
  
    MatSDS function(const MatSDS &point) override  
    {  
        MatSDS f(1,1);  
        double x1 = point(0,0);  
        double x2 = point(0,1);  
  
        f(0,0) = 3*x1*x1 + 3*x2*x2 - x1*x1*x2;  
  
        return f;  
    }  
  
    int solve(MatSDS &startPoint) override  
    {  
        MatSDS x = startPoint;  
        for (int i = 0; i < _maxIter; ++i)  
        {  
            _xStep.push_back({(float)x[0],(float)x[1]});  
            //这里就不用检查正定了  
            MatSDS dk = calDk(x);  
  
            double alpha = 1;  
  
            //Armijo准则  
            for (int i = 0; i < 100; ++i)  
            {  
                MatSDS left  = function(x + alpha*dk);  
                MatSDS right = function(x) + this->_rho*alpha*calGradient(x).transpose()*dk;  
  
                if(left(0,0) <= right(0,0))  
                {  
                    break;  
                }  
  
                alpha = alpha * _tau;  
            }  
  
            std::cout<<std::left<<"Iter(s): "<<std::setw(4)<<i<<", Loss: "<<std::setw(12)<<dk.L2()<<" Result: "<<function(x)[0]<<std::endl;  
  
            x = x + alpha*dk;  
  
            if(dk.LInf() < _eps)  
            {  
                startPoint = x;  
                return i+1;  
            }  
        }  
  
        return -1;  
    }  
};  
  
  
  
int main()  
{  
    NewtonLMProblem1 function(1000, 0.001,3, 0.4, 0.8);  
    MatSDS startPoint(1,2,{0,3});  
  
    try  
    {  
        int res = function.solve(startPoint);  
        if(res < 0)  
        {  
            std::cout<<"求解失败"<<std::endl;  
        }  
        else  
        {  
            std::cout<<"求解成功! 迭代次数: "<<res<<std::endl;  
            std::cout<<"最小值点："<<res<<std::endl;  
            startPoint.print();  
  
            std::cout<<"此时方程的值为："<<std::endl;  
            function.function(startPoint).print();  
  
#ifdef WIN32  
        Gui::setFont("c:/windows/fonts/MSYH.TTC",16);  
#endif  
        std::cout<<"按\"esc\"退出!"<<std::endl;  
        Gui::plotLine(u8"牛顿LM法迭代X中间值","x",function.getXStep());  
        Gui::wait();  
        }  
  
    }  
    catch(Exception ex)  
    {  
        std::cout<<ex.what();  
    }  
}

结果: 对于初始点 (0,3) ,迭代8次即可完成,解决了Newton法Hessian矩阵不正定的问题.

3.拟牛顿法

举例

#include <Msnhnet/math/MsnhMatrixS.h>  
#include <Msnhnet/cv/MsnhCVGui.h>  
#include <iostream>  
  
using namespace Msnhnet;  
  
class NewtonLM  
{  
public:  
    NewtonLM(int maxIter, double eps, double vk, double rho, double tau):_maxIter(maxIter),_eps(eps),_vk(vk),_rho(rho),_tau(tau){}  
  
  
    void setMaxIter(int maxIter)  
    {  
        _maxIter = maxIter;  
    }  
  
    virtual int solve(MatSDS &startPoint) = 0;  
  
    void setEps(double eps)  
    {  
        _eps = eps;  
    }  
  
    void setRho(double rho)  
    {  
        _rho = rho;  
    }  
  
    void setTau(double tau)  
    {  
        _tau = tau;  
    }  
  
    //正定性判定  
    bool isPosMat(const MatSDS &H)  
    {  
         MatSDS eigen = H.eigen()[0];  
         for (int i = 0; i < eigen.mWidth; ++i)  
         {  
            if(eigen[i]<=0)  
            {  
                return false;  
            }  
         }  
  
         return true;  
    }  
  
    const std::vector<Vec2F32> &getXStep() const  
    {  
        return _xStep;  
    }  
  
protected:  
    int _maxIter = 100;  
    double _eps = 0.00001;  
    double _vk  = 3;  
    double _rho = 0.2;  
    double _tau = 0.9;  
    std::vector<Vec2F32> _xStep;  
protected:  
    virtual MatSDS calGradient(const MatSDS& point) = 0;  
    virtual MatSDS calHessian(const MatSDS& point) = 0;  
    virtual MatSDS calDk(const MatSDS& point) = 0;  
    virtual MatSDS function(const MatSDS& point) = 0;  
};  
  
  
class NewtonLMProblem1:public NewtonLM  
{  
public:  
    NewtonLMProblem1(int maxIter, double eps, double vk, double rho, double tau):NewtonLM(maxIter, eps, vk,rho,tau){}  
  
    MatSDS calGradient(const MatSDS &point) override  
    {  
        MatSDS J(1,2);  
        double x1 = point(0,0);  
        double x2 = point(0,1);  
  
        J(0,0) = 6*x1 - 2*x1*x2;  
        J(0,1) = 6*x2 - x1*x1;  
  
        return J;  
    }  
  
  
    MatSDS calHessian(const MatSDS &point) override  
    {  
        MatSDS H(2,2);  
        double x1 = point(0,0);  
        double x2 = point(0,1);  
  
        H(0,0) = 6 - 2*x2;  
        H(0,1) = -2*x1;  
        H(1,0) = -2*x1;  
        H(1,1) = 6;  
  
        return H;  
    }  
  
  
    MatSDS calDk(const MatSDS& point) override  
    {  
        MatSDS J = calGradient(point);  
        MatSDS H = calHessian(point);  
  
        MatSDS I = MatSDS::eye(H.mWidth);  
  
  
        MatSDS Hp  = H + _vk*I;  
  
        if(!isPosMat(Hp))  
        {  
            H = H + 2*_vk*I;  
        }  
        else  
        {  
            H = Hp;  
        }  
  
        return -1*H.invert()*J;  
    }  
  
  
    MatSDS function(const MatSDS &point) override  
    {  
        MatSDS f(1,1);  
        double x1 = point(0,0);  
        double x2 = point(0,1);  
  
        f(0,0) = 3*x1*x1 + 3*x2*x2 - x1*x1*x2;  
  
        return f;  
    }  
  
    int solve(MatSDS &startPoint) override  
    {  
        MatSDS x = startPoint;  
        for (int i = 0; i < _maxIter; ++i)  
        {  
            _xStep.push_back({(float)x[0],(float)x[1]});  
            //这里就不用检查正定了  
            MatSDS dk = calDk(x);  
  
            double alpha = 1;  
  
            //Armijo准则  
            for (int i = 0; i < 100; ++i)  
            {  
                MatSDS left  = function(x + alpha*dk);  
                MatSDS right = function(x) + this->_rho*alpha*calGradient(x).transpose()*dk;  
  
                if(left(0,0) <= right(0,0))  
                {  
                    break;  
                }  
  
                alpha = alpha * _tau;  
            }  
  
            std::cout<<std::left<<"Iter(s): "<<std::setw(4)<<i<<", Loss: "<<std::setw(12)<<dk.L2()<<" Result: "<<function(x)[0]<<std::endl;  
  
            x = x + alpha*dk;  
  
            if(dk.LInf() < _eps)  
            {  
                startPoint = x;  
                return i+1;  
            }  
        }  
  
        return -1;  
    }  
};  
  
  
  
int main()  
{  
    NewtonLMProblem1 function(1000, 0.001,3, 0.4, 0.8);  
    MatSDS startPoint(1,2,{0,3});  
  
    try  
    {  
        int res = function.solve(startPoint);  
        if(res < 0)  
        {  
            std::cout<<"求解失败"<<std::endl;  
        }  
        else  
        {  
            std::cout<<"求解成功! 迭代次数: "<<res<<std::endl;  
            std::cout<<"最小值点："<<res<<std::endl;  
            startPoint.print();  
  
            std::cout<<"此时方程的值为："<<std::endl;  
            function.function(startPoint).print();  
  
#ifdef WIN32  
        Gui::setFont("c:/windows/fonts/MSYH.TTC",16);  
#endif  
        std::cout<<"按\"esc\"退出!"<<std::endl;  
        Gui::plotLine(u8"牛顿LM法迭代X中间值","x",function.getXStep());  
        Gui::wait();  
        }  
  
    }  
    catch(Exception ex)  
    {  
        std::cout<<ex.what();  
    }  
}

结果:对于初始点 (4,3) ,迭代9次即可完成,此点Newton法,DFP法都无解,BFGS方法有解,一般来说BFGS效果比较好.

拟牛顿法9次完成求解

拟牛顿法X中间值可视化

4. 源码

https://github.com/msnh2012/n...https://github.com/msnh2012/numerical-optimizaiton)

5. 依赖包

https://github.com/msnh2012/M...https://github.com/msnh2012/Msnhnet)

6. 参考文献

Numerical Optimization. Jorge Nocedal Stephen J. Wrigh
Methods for non-linear least squares problems. K. Madsen, H.B. Nielsen, O. Tingleff.
Practical Optimization\_ Algorithms and Engineering Applications. Andreas Antoniou Wu-Sheng Lu
最优化理论与算法. 陈宝林
数值最优化方法. 高立

网盘资料下载：链接：https://pan.baidu.com/s/1hpFw... 提取码：b6gq

7. 最后

欢迎关注我和BBuf及公众号的小伙伴们一块维护的一个深度学习框架Msnhnet: https://github.com/msnh2012/M... Msnhnet除了是一个深度网络推理库之外，还是一个小型矩阵库，包含了矩阵常规操作，LU分解，Cholesky分解，SVD分解。

END

原文：GiantPandaCV
作者：msnh2012

推荐阅读

更多嵌入式AI技术干货请关注嵌入式AI专栏。