Alglib Подгонка данных с помощью minlmoptimize не сводит к минимуму результаты. Полный C# включен

У меня проблемы с реализацией оптимизатора lm в библиотеке alglib. Я не уверен, почему параметры почти не меняются, хотя он все еще получает код выхода 4. Я не смог определить, что я делаю неправильно с документацией для alglib. Ниже приводится полный исходный код, который я использую:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Threading.Tasks;

namespace FBkineticsFitter
{
    class Program
    {

        public static int Main(string[] args)
        {
            /*
             * This code finds the parameters ka, kd, and Bmax from the minimization of the residuals using "V" mode of the Levenberg-Marquardt optimizer (alglib library).
             * This optimizer is used because the equation is non-linear and this particular version of the optimizer does not require the ab inito calculation of partial
             * derivatives, a jacobian matrix, or other parameter-space definitions, so it's implementation is simple.
             * 
             * The equations being solved represent a model of a protein-protein interaction where protein in solution is interacting with immobilized protein on a sensor
             * in a 1:1 stoichiometery. Mass transport limit is not taken into account. The detials of this equation are described in:
             *      R.B.M. Schasfoort and Anna J. Tudos Handbook of Surface Plasmon Resonance, 2008, Chapter 5, ISBN: 978-0-85404-267-8
             *
             *          Y=((ka*Cpro*Bmax)/(ka*Cpro+kd))*(1-exp(-1*X*(ka*Cpro+kd)))  ; this equation describes the association phase
             *          
             *          Y=Req*exp(-1*X*kd) ; this equation describes the dissociation phase
             * 
             * The data are fit globally such that Bmax and Req parameters are linked and kd parameters are linked during simultaneous optimization for the most robust fit
             *          
             *  Y= signal
             *  X= time
             *  ka= association constant
             *  kd= dissociation constant
             *  Bmax= maximum binding capacity at equilibrium
             *  Req=(Cpro/(Cpro+kobs))*Bmax :. in this case Req=Bmax because Cpro=0 during the dissociation step
             *  Cpro= concentration of protein in solution
             *  
             *      additional calculations:
             *          kobs=ka*Cpro
             *          kD=kd/ka
            */
            GetRawDataXY(@"C:\Results.txt");
            double epsg = .0000001;
            double epsf = 0;
            double epsx = 0;
            int maxits = 0;
            alglib.minlmstate state;
            alglib.minlmreport rep;

            alglib.minlmcreatev(2, GlobalVariables.param, 0.0001, out state);
            alglib.minlmsetcond(state, epsg, epsf, epsx, maxits);
            alglib.minlmoptimize(state, Calc_residuals, null, null);
            alglib.minlmresults(state, out GlobalVariables.param, out rep);

            System.Console.WriteLine("{0}", rep.terminationtype); ////1=relative function improvement is no more than EpsF. 2=relative step is no more than EpsX. 4=gradient norm is no more than EpsG. 5=MaxIts steps was taken. 7=stopping conditions are too stringent,further improvement is impossible, we return best X found so far. 8= terminated by user
            System.Console.WriteLine("{0}", alglib.ap.format(GlobalVariables.param, 20));
            System.Console.ReadLine();
            return 0;
        }

        public static void Calc_residuals(double[] param, double[] fi, object obj)
        {
            /*calculate the difference of the model and the raw data at each X (I.E. residuals)
             * the sum of the square of the residuals is returned to the optimized function to be minimized*/
            fi[0] = 0;
            fi[1] = 0;

            for (int i = 0; i < GlobalVariables.rawXYdata[0].Count();i++ )
            {
                if (GlobalVariables.rawXYdata[1][i] <= GlobalVariables.breakpoint)
                {
                    fi[0] += System.Math.Pow((kaEQN(GlobalVariables.rawXYdata[0][i]) - GlobalVariables.rawXYdata[1][i]), 2);
                }
                else
                {
                    fi[1] += System.Math.Pow((kdEQN(GlobalVariables.rawXYdata[0][i]) - GlobalVariables.rawXYdata[1][i]), 2);
                }
            }

        }

        public static double kdEQN(double x)
        {
            /*Calculate kd Y value based on the incremented parameters*/
            return GlobalVariables.param[2] * Math.Exp(-1 * x * GlobalVariables.param[1]);
        }

        public static double kaEQN(double x)
        {
            /*Calculate ka Y value based on the incremented parameters*/
            return ((GlobalVariables.param[0] * GlobalVariables.Cpro * GlobalVariables.param[2]) / (GlobalVariables.param[0] * GlobalVariables.Cpro + GlobalVariables.param[1])) * (1 - Math.Exp(-1 * x * (GlobalVariables.param[0] * GlobalVariables.Cpro + GlobalVariables.param[1])));
        }

        public static void GetRawDataXY(string filename)
        {
            /*Read in Raw data From tab delim txt*/
            string[] elements = { "x", "y" };
            int count = 0;
            GlobalVariables.rawXYdata[0] = new double[1798];
            GlobalVariables.rawXYdata[1] = new double[1798];

            using (StreamReader sr = new StreamReader(filename))
            {
                while (sr.Peek() >= 0)
                {
                    elements = sr.ReadLine().Split('\t');
                    GlobalVariables.rawXYdata[0][count] = Convert.ToDouble(elements[0]);
                    GlobalVariables.rawXYdata[1][count] = Convert.ToDouble(elements[1]);
                    count++;
                }
            }
        }

        public class GlobalVariables
        {
            public static double[] param = new double[] { 1, .02, 0.13 }; ////ka,kd,Bmax  these are initial guesses for the algorithm
            public static double[][] rawXYdata = new double[2][];
            public static double Cpro = 100E-9;
            public static double kD = 0;
            public static double breakpoint = 180;
        }
    }
}

1 ответ

По словам Сергея Бочканова, проблема в следующем:

"Вам следует использовать массив param[], предоставленный вам оптимизатором. Он создает свою внутреннюю копию вашего параметра и обновляет эту копию, а не ваш массив параметров.

С точки зрения оптимизатора, он имеет функцию, которая никогда не меняется, когда он изменяет свою внутреннюю копию параметра. Итак, он заканчивается сразу после первой итерации."

Вот обновленный и рабочий пример кода:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Threading.Tasks;

namespace FBkineticsFitter
{
    class Program
    {

        public static int Main(string[] args)
        {
            /*
             * This code finds the parameters ka, kd, and Bmax from the minimization of the residuals using "V" mode of the Levenberg-Marquardt optimizer (alglib library).
             * This optimizer is used because the equation is non-linear and this particular version of the optimizer does not require the ab inito calculation of partial
             * derivatives, a jacobian matrix, or other parameter-space definitions, so it's implementation is simple.
             * 
             * The equations being solved represent a model of a protein-protein interaction where protein in solution is interacting with immobilized protein on a sensor
             * in a 1:1 stoichiometery. Mass transport limit is not taken into account. The detials of this equation are described in:
             *      R.B.M. Schasfoort and Anna J. Tudos Handbook of Surface Plasmon Resonance, 2008, Chapter 5, ISBN: 978-0-85404-267-8
             *
             *          Y=((Cpro*Rmax)/(Cpro+kd))*(1-exp(-1*X*(ka*Cpro+kd)))  ; this equation describes the association phase
             *          
             *          Y=Req*exp(-1*X*kd)+NS ; this equation describes the dissociation phase
             *          
             * According to ForteBio's Application Notes #14 the amplitudes of the data can be correctly accounted for by modifying the above equations as follows:
             * 
             *          Y=(Rmax*(1/(1+(kd/(ka*Cpro))))*(1-exp(((-1*Cpro)+kd)*X))    ; this equation describes the association phase
             *          
             *          Y=Y0*(exp(-1*kd*(X-X0)))    ; this equation describes the dissociation phase
             * 
             * 
             * 
             * The data are fit simultaneously such that all fitting parameters are linked during optimization for the most robust fit
             *          
             *  Y= signal
             *  X= time
             *  ka= association constant  [fitting parameter 0]
             *  kd= dissociation constant [fitting parameter 1]
             *  Rmax= maximum binding capacity at equilibrium  [fitting parameter 2]
             *  KD=kd/ka 
             *  kobs=ka*Cpro+kd
             *  Req=(Cpro/(Cpro+KD))*Rmax
             *  Cpro= concentration of protein in solution
             *  NS= non-specific binding at time=infinity (constant correction for end point of fit) [this is taken into account in the amplitude corrected formula: Y0=Ylast]
             *  Y0= the initial value of Y for the first point of the dissociation curve (I.E. the last point of the association phase)
             *  X0= the initial value of X for the first point of the dissociation phase
             *          
            */

            GetRawDataXY(@"C:\Results.txt");
            double epsg = .00001;
            double epsf = 0;
            double epsx = 0;
            int maxits = 10000;
            alglib.minlmstate state;
            alglib.minlmreport rep;
            double[] param = new double[] { 1000000, .0100, 0.20};////ka,kd,Rmax  these are initial guesses for the algorithm and should be mid range for the expected data.,  The last parameter Rmax should be guessed as the maximum Y-value of Ka
            double[] scaling= new double[] { 1E6,1,1};

            alglib.minlmcreatev(2, param, 0.001, out state);
            alglib.minlmsetcond(state, epsg, epsf, epsx, maxits);
            alglib.minlmsetgradientcheck(state, 1);
            alglib.minlmsetscale(state, scaling);
            alglib.minlmoptimize(state, Calc_residuals, null, V.rawXYdata);
            alglib.minlmresults(state, out param, out rep);

            System.Console.WriteLine("{0}", rep.terminationtype); ////1=relative function improvement is no more than EpsF. 2=relative step is no more than EpsX. 4=gradient norm is no more than EpsG. 5=MaxIts steps was taken. 7=stopping conditions are too stringent,further improvement is impossible, we return best X found so far. 8= terminated by user
            System.Console.WriteLine("{0}", alglib.ap.format(param, 25));
            System.Console.ReadLine();
            return 0;
        }

        public static void Calc_residuals(double[] param, double[] fi, object obj)
        {
            /*calculate the difference of the model and the raw data at each X (I.E. residuals)
             * the sum of the square of the residuals is returned to the optimized function to be minimized*/
            CalcVariables(param);

            fi[0] = 0;
            fi[1] = 0;

            for (int i = 0; i < V.rawXYdata[0].Count(); i++)
            {
                if (V.rawXYdata[0][i] <= V.breakpoint)
                {
                    fi[0] += System.Math.Pow((kaEQN(V.rawXYdata[0][i], param) - V.rawXYdata[1][i]), 2);
                }
                else
                {
                    if (!V.breakpointreached) 
                    { 
                        V.breakpointreached = true;
                        V.X_0 = V.rawXYdata[0][i];
                        V.Y_0 = V.rawXYdata[1][i];
                    }

                    fi[1] += System.Math.Pow((kdEQN(V.rawXYdata[0][i], param) - V.rawXYdata[1][i]), 2);
                }
            }

            if (param[0] <= 0 || param[1] <=0 || param[2] <= 0)////Exponentiates the error if the parameters go negative to favor positive non-zero values
            {
                fi[0] = Math.Pow(fi[0], 2);
                fi[1] = Math.Pow(fi[1], 2);
            }

            System.Console.WriteLine("{0}"+"  "+V.Cpro+"  -->"+fi[0], alglib.ap.format(param, 5));
            Console.WriteLine((kdEQN(V.rawXYdata[0][114], param)));
        }

        public static double kdEQN(double X, double[] param)
        {
            /*Calculate kd Y value based on the incremented parameters*/
            return (V.Rmax * (1 / (1 + (V.kd / (V.ka * V.Cpro)))) * (1 - Math.Exp((-1 * V.ka * V.Cpro) * V.X_0))) * Math.Exp((-1 * V.kd) * (X - V.X_0));
        }

        public static double kaEQN(double X, double[] param)
        {
            /*Calculate ka Y value based on the incremented parameters*/
            return ((V.Cpro * V.Rmax) / (V.Cpro + V.kd)) * (1 - Math.Exp(-1 * X * ((V.ka * V.Cpro) + V.kd)));
        }

        public static void GetRawDataXY(string filename)
        {
            /*Read in Raw data From tab delim txt*/
            string[] elements = { "x", "y" };
            int count = 0;
            V.rawXYdata[0] = new double[226];
            V.rawXYdata[1] = new double[226];

            using (StreamReader sr = new StreamReader(filename))
            {
                while (sr.Peek() >= 0)
                {
                    elements = sr.ReadLine().Split('\t');
                    V.rawXYdata[0][count] = Convert.ToDouble(elements[0]);
                    V.rawXYdata[1][count] = Convert.ToDouble(elements[1]);
                    count++;
                }
            }
        }

        public class V
        {
            /*Global Variables*/
            public static double[][] rawXYdata = new double[2][];
            public static double Cpro = 100E-9;
            public static bool breakpointreached = false;
            public static double X_0 = 0;
            public static double Y_0 = 0;
            public static double ka = 0;
            public static double kd = 0;
            public static double Rmax = 0;
            public static double KD = 0;
            public static double Kobs = 0;
            public static double Req = 0;
            public static double breakpoint = 180;
        }

        public static void CalcVariables(double[] param)
        {
            V.ka = param[0];
            V.kd = param[1];
            V.Rmax = param[2];
            V.KD = param[1] / param[0];
            V.Kobs = param[0] * V.Cpro + param[1];
            V.Req = (V.Cpro / (V.Cpro + param[0] * V.Cpro + param[1])) * param[2];
        }
    }
}
Другие вопросы по тегам