Как работает Matlab Lognfit?

Есть ли в lognfit данные перед установкой? Если да, то как он определяет количество ячеек, ширину ячеек и выбирает точки данных для ячеек, которые устанавливаются? Если нет, как это подходит?

1 ответ

Решение

Общая стратегия обратного проектирования алгоритма заключается в использовании взгляда на .m файлы, связанные с этими функциями.

Возможно, я неправильно читаю комментарии, но думаю, что это подходит для наименьших квадратов без биннинга. Вы можете уместить guassian без бинни правильно?

Например, lognfit.m кажется необычной оберткой для normfit.m

function [parmhat, parmci] = lognfit(x,alpha,censoring,freq,options)
%LOGNFIT Parameter estimates and confidence intervals for lognormal data.
%   PARMHAT = LOGNFIT(X) returns a vector of maximum likelihood estimates 
%   PARMHAT(1) = MU and PARMHAT(2) = SIGMA of parameters for a lognormal 
%   distribution fitting X.  MU and SIGMA are the mean and standard 
%   deviation, respectively, of the associated normal distribution.
%
%   [PARMHAT,PARMCI] = LOGNFIT(X) returns 95% confidence intervals for the
%   parameter estimates.
%
%   [PARMHAT,PARMCI] = LOGNFIT(X,ALPHA) returns 100(1-ALPHA) percent
%   confidence intervals for the parameter estimates.
%
%   [...] = LOGNFIT(X,ALPHA,CENSORING) accepts a boolean vector of the same
%   size as X that is 1 for observations that are right-censored and 0 for
%   observations that are observed exactly.
%
%   [...] = LOGNFIT(X,ALPHA,CENSORING,FREQ) accepts a frequency vector of
%   the same size as X.  FREQ typically contains integer frequencies for
%   the corresponding elements in X, but may contain any non-integer
%   non-negative values.
%
%   [...] = LOGNFIT(X,ALPHA,CENSORING,FREQ,OPTIONS) specifies control
%   parameters for the iterative algorithm used to compute ML estimates
%   when there is censoring.  This argument can be created by a call to
%   STATSET.  See STATSET('lognfit') for parameter names and default values.
%
%   Pass in [] for ALPHA, CENSORING, or FREQ to use their default values.
%
%   With no censoring, SIGMAHAT is the square root of the unbiased estimate
%   of the variance of log(X).  With censoring, SIGMAHAT is the maximum
%   likelihood estimate.
%
%   See also LOGNCDF, LOGNINV, LOGNLIKE, LOGNPDF, LOGNRND, LOGNSTAT, MLE,
%            STATSET.

%   References:
%      [1] Evans, M., Hastings, N., and Peacock, B. (1993) Statistical
%          Distributions, 2nd ed., Wiley, 170pp.
%      [2] Lawless, J.F. (1982) Statistical Models and Methods for Lifetime
%          Data, Wiley, New York, 580pp.
%      [3} Meeker, W.Q. and L.A. Escobar (1998) Statistical Methods for
%          Reliability Data, Wiley, New York, 680pp.

%   Copyright 1993-2011 The MathWorks, Inc.


% Illegal data return an error.
if ~isvector(x)
    error(message('stats:lognfit:VectorRequired'));
elseif any(x <= 0)
    error(message('stats:lognfit:PositiveDataRequired'));
end

if nargin < 2 || isempty(alpha)
    alpha = 0.05;
end
if nargin < 3 || isempty(censoring)
    censoring = [];
elseif ~isempty(censoring) && ~isequal(size(x), size(censoring))
    error(message('stats:lognfit:InputSizeMismatchCensoring'));
end
if nargin < 4 || isempty(freq)
    freq = [];
elseif ~isempty(freq) && ~isequal(size(x), size(freq))
    error(message('stats:lognfit:InputSizeMismatchFreq'));
end
if nargin < 5 || isempty(options)
    options = [];
end

% Fit a normal distribution to the logged data.  The parameterizations of
% the normal and lognormal are identical.
%
% Get parameter estimates only.
if nargout <= 1
    [muhat,sigmahat] = normfit(log(x),alpha,censoring,freq,options);
    parmhat = [muhat sigmahat];

% Get parameter estimates and CIs.
else
    [muhat,sigmahat,muci,sigmaci] = normfit(log(x),alpha,censoring,freq,options);
    parmhat = [muhat sigmahat];
    parmci = [muci sigmaci];
end

Также для справки normfit.m

function [muhat, sigmahat, muci, sigmaci] = normfit(x,alpha,censoring,freq,options)
%NORMFIT Parameter estimates and confidence intervals for normal data.
%   [MUHAT,SIGMAHAT] = NORMFIT(X) returns estimates of the parameters of
%   the normal distribution given the data in X.  MUHAT is an estimate of
%   the mean, and SIGMAHAT is an estimate of the standard deviation.
%
%   [MUHAT,SIGMAHAT,MUCI,SIGMACI] = NORMFIT(X) returns 95% confidence
%   intervals for the parameter estimates.
%
%   [MUHAT,SIGMAHAT,MUCI,SIGMACI] = NORMFIT(X,ALPHA) returns 100(1-ALPHA)
%   percent confidence intervals for the parameter estimates.
%
%   [...] = NORMFIT(X,ALPHA,CENSORING) accepts a boolean vector of the same
%   size as X that is 1 for observations that are right-censored and 0 for
%   observations that are observed exactly.
%
%   [...] = NORMFIT(X,ALPHA,CENSORING,FREQ) accepts a frequency vector of the
%   same size as X.  FREQ typically contains integer frequencies for the
%   corresponding elements in X, but may contain any non-integer
%   non-negative values.
%
%   [...] = NORMFIT(X,ALPHA,CENSORING,FREQ,OPTIONS) specifies control
%   parameters for the iterative algorithm used to compute ML estimates
%   when there is censoring.  This argument can be created by a call to
%   STATSET.  See STATSET('normfit') for parameter names and default values.
%
%   Pass in [] for ALPHA, CENSORING, or FREQ to use their default values.
%
%   With no censoring, SIGMAHAT is computed using the square root of the
%   unbiased estimator of the variance.  With censoring, SIGMAHAT is the
%   maximum likelihood estimate.
%
%   See also NORMCDF, NORMINV, NORMLIKE, NORMPDF, NORMRND, NORMSTAT, MLE, STATSET.

%   References:
%      [1] Evans, M., Hastings, N., and Peacock, B. (1993) Statistical
%          Distributions, 2nd ed., Wiley, 170pp.
%      [2] Lawless, J.F. (1982) Statistical Models and Methods for Lifetime
%          Data, Wiley, New York, 580pp.
%      [3} Meeker, W.Q. and L.A. Escobar (1998) Statistical Methods for
%          Reliability Data, Wiley, New York, 680pp.

%   To compute weighted maximum likelihood estimates (WMLEs) for mu and
%   sigma, you can provide weights, normalized to sum to LENGTH(X), in FREQ
%   instead of frequencies.  In this case, NORMFIT computes the WMLE for
%   mu.  However, when there is no censoring, the estimate computed for
%   sigma is not exactly the WMLE.  To compute the WMLE, multiply the value
%   returned in SIGMAHAT by (SUM(FREQ) - 1)/SUM(FREQ).  This correction is
%   needed because NORMFIT normally computes SIGMAHAT using an unbiased
%   variance estimator when there is no censoring in the data.  When there
%   is censoring, the correction is not needed, since NORMFIT does not use
%   the unbiased variance estimator in that case.

%   Copyright 1993-2015 The MathWorks, Inc.


% Illegal data return an error.
if ~isvector(x)
    if nargin < 3
        % Accept matrix data under the 2-arg syntax.  censoring and freq
        % will be scalar zero and one.
        [n,ncols] = size(x); % all columns have same number of data
    else
        error(message('stats:normfit:InvalidData'));
    end
else
    n = numel(x); % a scalar -- all columns have same number of data
    ncols = 1;
end

if nargin < 2 || isempty(alpha)
    alpha = 0.05;
end
if nargin < 3 || isempty(censoring)
    censoring = 0; % make this a scalar, will expand when needed
elseif ~isempty(censoring) && ~isequal(size(x), size(censoring))
    error(message('stats:normfit:InputSizeMismatchCensoring'));
end
if nargin < 4 || isempty(freq)
    freq = 1; % make this a scalar, will expand when needed
elseif isequal(size(x), size(freq))
    n = sum(freq);
    zerowgts = find(freq == 0);
    if numel(zerowgts) > 0
        x(zerowgts) = [];
        if numel(censoring)==numel(freq), censoring(zerowgts) = []; end
        freq(zerowgts) = [];
    end
else
    error(message('stats:normfit:InputSizeMismatchFreq'));
end
if nargin < 5 || isempty(options)
    options = [];
end

ncen = sum(freq.*censoring); % a scalar in all cases
nunc = n - ncen; % a scalar in all cases
sumx = sum(freq.*x);

% Weed out cases which cannot really be fit, no data or all censored.  When
% all observations are censored, the likelihood surface is at its maximum
% (zero) for any mu > max(x) at the boundary sigma==0.
if n == 0 || nunc == 0
    muhat = NaN(1,ncols,'like',x);
    sigmahat = NaN(1,ncols,'like',x);
    muci = NaN(2,ncols,'like',x);
    sigmaci = NaN(2,ncols,'like',x);
    return

% No censoring, find the parameter estimates explicitly.
elseif ncen == 0
    muhat = sumx ./ n;
    if n > 1
        if numel(muhat) == 1 % vector data
            xc = x - muhat;
        else % matrix data
            xc = x - repmat(muhat,[n 1]);
        end
        sigmahat = sqrt(sum(conj(xc).*xc.*freq) ./ (n-1));
    else
        sigmahat = zeros(1,ncols,'like',x);
    end

    if nargout > 2
        if n > 1
            parmhat = [muhat; sigmahat];
            ci = statnormci(parmhat,[],alpha,x,[],freq);
            muci = ci(:,:,1);
            sigmaci = ci(:,:,2);
        else
            muci = [-Inf; Inf]*ones(1,ncols,'like',x);
            sigmaci = [0; Inf]*ones(1,ncols,'like',x);
        end
    end
    return
end
% Past this point, guaranteed to have only one vector of data, with censoring.

% Not much can be done with Infs, either censored or uncensored.
if ~isfinite(sumx)
    muhat = sumx;
    sigmahat = NaN('like',x);
    muci = NaN(2,1,'like',x);
    sigmaci = NaN(2,1,'like',x);
    return
end

% When all uncensored observations are equal and greater than all the
% censored observations, the likelihood surface becomes infinite at the
% boundary sigma==0.  Return something reasonable anyway.
xunc = x(censoring==0);
rangexUnc = range(xunc);
if rangexUnc < realmin(internal.stats.typeof(x))
    if xunc(1) == max(x)
        muhat = xunc(1);
        sigmahat = zeros('like',x);
        if nunc > 1
            muci = [muhat; muhat];
            sigmaci = zeros(2,1,'like',x);
        else
            muci = cast([-Inf; Inf],'like',x);
            sigmaci = cast([0; Inf],'like', x);
        end
        return
    end
end
% Otherwise the data are ok to fit, go on.

% First, get a rough estimate for parameters using the "least squares" method
% as a starting value...
if rangexUnc > 0
    if numel(freq) == numel(x)
        [p,q] = ecdf(x, 'censoring',censoring, 'frequency',freq);
    else
        [p,q] = ecdf(x, 'censoring',censoring);
    end
    pmid = (p(1:(end-1))+p(2:end)) / 2;
    linefit = polyfit(-sqrt(2)*erfcinv(2*pmid), q(2:end), 1);
    parmhat = linefit([2 1]);

% ...unless there's only one uncensored value.
else
    parmhat = [xunc(1) 1];
end

% Optimize the parameters as doubles, regardless of input data type
parmhat = cast(parmhat,'like',1);

% The default options include turning statsfminbx's display off.  This
% function gives its own warning/error messages, and the caller can
% turn display on to get the text output from statsfminbx if desired.
options = statset(statset('normfit'), options);
tolBnd = options.TolBnd;
options = optimset(options);
dfltOptions = struct('DerivativeCheck','off', 'HessMult',[], ...
    'HessPattern',ones(2,2), 'PrecondBandWidth',Inf, ...
    'TypicalX',ones(2,1), 'MaxPCGIter',1, 'TolPCG',0.1);

% Maximize the log-likelihood with respect to mu and sigma.
funfcn = {'fungrad' 'normfit' @negloglike [] []};
[parmhat, ~, ~, err, output] = ...
         statsfminbx(funfcn, parmhat, [-Inf; tolBnd], [Inf; Inf], ...
                     options, dfltOptions, 1, x, censoring, freq);
if (err == 0)
    % statsfminbx may print its own output text; in any case give something
    % more statistical here, controllable via warning IDs.
    if output.funcCount >= options.MaxFunEvals
        warning(message('stats:normfit:EvalLimit'));
    else
        warning(message('stats:normfit:IterLimit'));
    end
elseif (err < 0)
    error(message('stats:normfit:NoSolution'));
end

% Make sure the outputs match the input data type
muhat = cast(parmhat(1),'like',x);
sigmahat = cast(parmhat(2),'like',x);

if nargout > 2
    parmhat = parmhat(:);
    if numel(freq) == numel(x)
        [~, acov] = normlike(parmhat, x, censoring, freq);
    else
        [~, acov] = normlike(parmhat, x, censoring);
    end
    ci = statnormci(parmhat,acov,alpha,x,censoring,freq);
    muci = ci(:,:,1);
    sigmaci = ci(:,:,2);
end


function [nll,ngrad] = negloglike(parms, x, censoring, freq)
% (Negative) log-likelihood function and gradient for normal distribution.
%
% Note that both outputs are the same type as the PARMS input
mu = parms(1);
sigma = parms(2);
cens = (censoring == 1);

z = (x - mu) ./ sigma;
zcen = z(cens);

Scen = .5*erfc(zcen/sqrt(2));
classX = internal.stats.typeof(x);
if all(Scen < realmin(classX))
    nll = cast(realmax(classX),'like',parms);
    ngrad = [nll nll];
    return
end
L = -.5.*z.*z - log(sigma);
L(cens) = log(Scen);
nll = -sum(freq .* L);
nll = cast(nll,'like',parms);

if nargout > 1
    dlogScen = exp(-.5*zcen.*zcen) ./ (sqrt(2*pi) .* Scen);
    dL1 = z ./ sigma;
    dL1(cens) = dlogScen ./ sigma;
    dL2 = (z.*z - 1) ./ sigma;
    dL2(cens) = zcen .* dlogScen ./ sigma;
    ngrad = cast([-sum(freq .* dL1) -sum(freq .* dL2)],'like',parms);
end
Другие вопросы по тегам