Alglib Подгонка данных с помощью minlmoptimize не сводит к минимуму результаты. Полный C# включен
У меня проблемы с реализацией оптимизатора lm в библиотеке alglib. Я не уверен, почему параметры почти не меняются, хотя он все еще получает код выхода 4. Я не смог определить, что я делаю неправильно с документацией для alglib. Ниже приводится полный исходный код, который я использую:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Threading.Tasks;
namespace FBkineticsFitter
{
class Program
{
public static int Main(string[] args)
{
/*
* This code finds the parameters ka, kd, and Bmax from the minimization of the residuals using "V" mode of the Levenberg-Marquardt optimizer (alglib library).
* This optimizer is used because the equation is non-linear and this particular version of the optimizer does not require the ab inito calculation of partial
* derivatives, a jacobian matrix, or other parameter-space definitions, so it's implementation is simple.
*
* The equations being solved represent a model of a protein-protein interaction where protein in solution is interacting with immobilized protein on a sensor
* in a 1:1 stoichiometery. Mass transport limit is not taken into account. The detials of this equation are described in:
* R.B.M. Schasfoort and Anna J. Tudos Handbook of Surface Plasmon Resonance, 2008, Chapter 5, ISBN: 978-0-85404-267-8
*
* Y=((ka*Cpro*Bmax)/(ka*Cpro+kd))*(1-exp(-1*X*(ka*Cpro+kd))) ; this equation describes the association phase
*
* Y=Req*exp(-1*X*kd) ; this equation describes the dissociation phase
*
* The data are fit globally such that Bmax and Req parameters are linked and kd parameters are linked during simultaneous optimization for the most robust fit
*
* Y= signal
* X= time
* ka= association constant
* kd= dissociation constant
* Bmax= maximum binding capacity at equilibrium
* Req=(Cpro/(Cpro+kobs))*Bmax :. in this case Req=Bmax because Cpro=0 during the dissociation step
* Cpro= concentration of protein in solution
*
* additional calculations:
* kobs=ka*Cpro
* kD=kd/ka
*/
GetRawDataXY(@"C:\Results.txt");
double epsg = .0000001;
double epsf = 0;
double epsx = 0;
int maxits = 0;
alglib.minlmstate state;
alglib.minlmreport rep;
alglib.minlmcreatev(2, GlobalVariables.param, 0.0001, out state);
alglib.minlmsetcond(state, epsg, epsf, epsx, maxits);
alglib.minlmoptimize(state, Calc_residuals, null, null);
alglib.minlmresults(state, out GlobalVariables.param, out rep);
System.Console.WriteLine("{0}", rep.terminationtype); ////1=relative function improvement is no more than EpsF. 2=relative step is no more than EpsX. 4=gradient norm is no more than EpsG. 5=MaxIts steps was taken. 7=stopping conditions are too stringent,further improvement is impossible, we return best X found so far. 8= terminated by user
System.Console.WriteLine("{0}", alglib.ap.format(GlobalVariables.param, 20));
System.Console.ReadLine();
return 0;
}
public static void Calc_residuals(double[] param, double[] fi, object obj)
{
/*calculate the difference of the model and the raw data at each X (I.E. residuals)
* the sum of the square of the residuals is returned to the optimized function to be minimized*/
fi[0] = 0;
fi[1] = 0;
for (int i = 0; i < GlobalVariables.rawXYdata[0].Count();i++ )
{
if (GlobalVariables.rawXYdata[1][i] <= GlobalVariables.breakpoint)
{
fi[0] += System.Math.Pow((kaEQN(GlobalVariables.rawXYdata[0][i]) - GlobalVariables.rawXYdata[1][i]), 2);
}
else
{
fi[1] += System.Math.Pow((kdEQN(GlobalVariables.rawXYdata[0][i]) - GlobalVariables.rawXYdata[1][i]), 2);
}
}
}
public static double kdEQN(double x)
{
/*Calculate kd Y value based on the incremented parameters*/
return GlobalVariables.param[2] * Math.Exp(-1 * x * GlobalVariables.param[1]);
}
public static double kaEQN(double x)
{
/*Calculate ka Y value based on the incremented parameters*/
return ((GlobalVariables.param[0] * GlobalVariables.Cpro * GlobalVariables.param[2]) / (GlobalVariables.param[0] * GlobalVariables.Cpro + GlobalVariables.param[1])) * (1 - Math.Exp(-1 * x * (GlobalVariables.param[0] * GlobalVariables.Cpro + GlobalVariables.param[1])));
}
public static void GetRawDataXY(string filename)
{
/*Read in Raw data From tab delim txt*/
string[] elements = { "x", "y" };
int count = 0;
GlobalVariables.rawXYdata[0] = new double[1798];
GlobalVariables.rawXYdata[1] = new double[1798];
using (StreamReader sr = new StreamReader(filename))
{
while (sr.Peek() >= 0)
{
elements = sr.ReadLine().Split('\t');
GlobalVariables.rawXYdata[0][count] = Convert.ToDouble(elements[0]);
GlobalVariables.rawXYdata[1][count] = Convert.ToDouble(elements[1]);
count++;
}
}
}
public class GlobalVariables
{
public static double[] param = new double[] { 1, .02, 0.13 }; ////ka,kd,Bmax these are initial guesses for the algorithm
public static double[][] rawXYdata = new double[2][];
public static double Cpro = 100E-9;
public static double kD = 0;
public static double breakpoint = 180;
}
}
}
1 ответ
По словам Сергея Бочканова, проблема в следующем:
"Вам следует использовать массив param[], предоставленный вам оптимизатором. Он создает свою внутреннюю копию вашего параметра и обновляет эту копию, а не ваш массив параметров.
С точки зрения оптимизатора, он имеет функцию, которая никогда не меняется, когда он изменяет свою внутреннюю копию параметра. Итак, он заканчивается сразу после первой итерации."
Вот обновленный и рабочий пример кода:
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.IO;
using System.Threading.Tasks;
namespace FBkineticsFitter
{
class Program
{
public static int Main(string[] args)
{
/*
* This code finds the parameters ka, kd, and Bmax from the minimization of the residuals using "V" mode of the Levenberg-Marquardt optimizer (alglib library).
* This optimizer is used because the equation is non-linear and this particular version of the optimizer does not require the ab inito calculation of partial
* derivatives, a jacobian matrix, or other parameter-space definitions, so it's implementation is simple.
*
* The equations being solved represent a model of a protein-protein interaction where protein in solution is interacting with immobilized protein on a sensor
* in a 1:1 stoichiometery. Mass transport limit is not taken into account. The detials of this equation are described in:
* R.B.M. Schasfoort and Anna J. Tudos Handbook of Surface Plasmon Resonance, 2008, Chapter 5, ISBN: 978-0-85404-267-8
*
* Y=((Cpro*Rmax)/(Cpro+kd))*(1-exp(-1*X*(ka*Cpro+kd))) ; this equation describes the association phase
*
* Y=Req*exp(-1*X*kd)+NS ; this equation describes the dissociation phase
*
* According to ForteBio's Application Notes #14 the amplitudes of the data can be correctly accounted for by modifying the above equations as follows:
*
* Y=(Rmax*(1/(1+(kd/(ka*Cpro))))*(1-exp(((-1*Cpro)+kd)*X)) ; this equation describes the association phase
*
* Y=Y0*(exp(-1*kd*(X-X0))) ; this equation describes the dissociation phase
*
*
*
* The data are fit simultaneously such that all fitting parameters are linked during optimization for the most robust fit
*
* Y= signal
* X= time
* ka= association constant [fitting parameter 0]
* kd= dissociation constant [fitting parameter 1]
* Rmax= maximum binding capacity at equilibrium [fitting parameter 2]
* KD=kd/ka
* kobs=ka*Cpro+kd
* Req=(Cpro/(Cpro+KD))*Rmax
* Cpro= concentration of protein in solution
* NS= non-specific binding at time=infinity (constant correction for end point of fit) [this is taken into account in the amplitude corrected formula: Y0=Ylast]
* Y0= the initial value of Y for the first point of the dissociation curve (I.E. the last point of the association phase)
* X0= the initial value of X for the first point of the dissociation phase
*
*/
GetRawDataXY(@"C:\Results.txt");
double epsg = .00001;
double epsf = 0;
double epsx = 0;
int maxits = 10000;
alglib.minlmstate state;
alglib.minlmreport rep;
double[] param = new double[] { 1000000, .0100, 0.20};////ka,kd,Rmax these are initial guesses for the algorithm and should be mid range for the expected data., The last parameter Rmax should be guessed as the maximum Y-value of Ka
double[] scaling= new double[] { 1E6,1,1};
alglib.minlmcreatev(2, param, 0.001, out state);
alglib.minlmsetcond(state, epsg, epsf, epsx, maxits);
alglib.minlmsetgradientcheck(state, 1);
alglib.minlmsetscale(state, scaling);
alglib.minlmoptimize(state, Calc_residuals, null, V.rawXYdata);
alglib.minlmresults(state, out param, out rep);
System.Console.WriteLine("{0}", rep.terminationtype); ////1=relative function improvement is no more than EpsF. 2=relative step is no more than EpsX. 4=gradient norm is no more than EpsG. 5=MaxIts steps was taken. 7=stopping conditions are too stringent,further improvement is impossible, we return best X found so far. 8= terminated by user
System.Console.WriteLine("{0}", alglib.ap.format(param, 25));
System.Console.ReadLine();
return 0;
}
public static void Calc_residuals(double[] param, double[] fi, object obj)
{
/*calculate the difference of the model and the raw data at each X (I.E. residuals)
* the sum of the square of the residuals is returned to the optimized function to be minimized*/
CalcVariables(param);
fi[0] = 0;
fi[1] = 0;
for (int i = 0; i < V.rawXYdata[0].Count(); i++)
{
if (V.rawXYdata[0][i] <= V.breakpoint)
{
fi[0] += System.Math.Pow((kaEQN(V.rawXYdata[0][i], param) - V.rawXYdata[1][i]), 2);
}
else
{
if (!V.breakpointreached)
{
V.breakpointreached = true;
V.X_0 = V.rawXYdata[0][i];
V.Y_0 = V.rawXYdata[1][i];
}
fi[1] += System.Math.Pow((kdEQN(V.rawXYdata[0][i], param) - V.rawXYdata[1][i]), 2);
}
}
if (param[0] <= 0 || param[1] <=0 || param[2] <= 0)////Exponentiates the error if the parameters go negative to favor positive non-zero values
{
fi[0] = Math.Pow(fi[0], 2);
fi[1] = Math.Pow(fi[1], 2);
}
System.Console.WriteLine("{0}"+" "+V.Cpro+" -->"+fi[0], alglib.ap.format(param, 5));
Console.WriteLine((kdEQN(V.rawXYdata[0][114], param)));
}
public static double kdEQN(double X, double[] param)
{
/*Calculate kd Y value based on the incremented parameters*/
return (V.Rmax * (1 / (1 + (V.kd / (V.ka * V.Cpro)))) * (1 - Math.Exp((-1 * V.ka * V.Cpro) * V.X_0))) * Math.Exp((-1 * V.kd) * (X - V.X_0));
}
public static double kaEQN(double X, double[] param)
{
/*Calculate ka Y value based on the incremented parameters*/
return ((V.Cpro * V.Rmax) / (V.Cpro + V.kd)) * (1 - Math.Exp(-1 * X * ((V.ka * V.Cpro) + V.kd)));
}
public static void GetRawDataXY(string filename)
{
/*Read in Raw data From tab delim txt*/
string[] elements = { "x", "y" };
int count = 0;
V.rawXYdata[0] = new double[226];
V.rawXYdata[1] = new double[226];
using (StreamReader sr = new StreamReader(filename))
{
while (sr.Peek() >= 0)
{
elements = sr.ReadLine().Split('\t');
V.rawXYdata[0][count] = Convert.ToDouble(elements[0]);
V.rawXYdata[1][count] = Convert.ToDouble(elements[1]);
count++;
}
}
}
public class V
{
/*Global Variables*/
public static double[][] rawXYdata = new double[2][];
public static double Cpro = 100E-9;
public static bool breakpointreached = false;
public static double X_0 = 0;
public static double Y_0 = 0;
public static double ka = 0;
public static double kd = 0;
public static double Rmax = 0;
public static double KD = 0;
public static double Kobs = 0;
public static double Req = 0;
public static double breakpoint = 180;
}
public static void CalcVariables(double[] param)
{
V.ka = param[0];
V.kd = param[1];
V.Rmax = param[2];
V.KD = param[1] / param[0];
V.Kobs = param[0] * V.Cpro + param[1];
V.Req = (V.Cpro / (V.Cpro + param[0] * V.Cpro + param[1])) * param[2];
}
}
}