Уменьшение логарифмического правдоподобия в моделях смесей EM-алгоритм на R
Я работаю над моделью смеси, используя алгоритм EM. Модель Mixture составлена из моделей точечных процессов Пуассона, но моя главная задача в созданной функции ppmMixEngine заключается в том, что вычисляемая вероятность журнала уменьшается на каждой итерации. Важным фактом здесь является то, что у нас есть известные и неизвестные ярлыки, которые мы хотим классифицировать. Кажется, что здесь есть проблема с кодированием, но я не могу это понять. У кого-нибудь есть идея или предложение?
Заранее спасибо!
Код:
library(spatstat)
library(lattice)
########------------------------------------------------------------------------
# Set up initial values
#-------------------------------------------------------------------------------
# 1 # Set up data.ppp, cov.list, ppmform and quads
# Generate XY grid
set.seed(100)
XY = expand.grid(seq(0, 100, 1), seq(0, 100, 1))
X = XY[,1]
Y = XY[,2]
# Generate 2 covariates for PPM
v1 = (X - 30)^2 + (Y - 70)^2 - 0.5*X*Y
v2 = (X - 70)^2 + (Y - 60)^2 + 0.9*X*Y
v1 = -1*scale(v1)
v2 = -1*scale(v2)
vmat = as.matrix(data.frame(1, v1, v1^2, v2, v2^2))
# Generate true PPM coefficients based on linear and quadratic terms for 2 covariates
sp1_coef = c(-6, 4, -1, 2, -0.6)
sp1_int = exp(vmat %*% sp1_coef)
sp2_coef = c(-4.5, 1.8, -1, 1.5, -0.9)
sp2_int = exp(vmat %*% sp2_coef)
sp3_coef = c(-3.5, -0.5, -0.8, 1, -0.8)
sp3_int = exp(vmat %*% sp3_coef)
levelplot(sp1_int ~ X + Y)
levelplot(sp2_int ~ X + Y)
levelplot(sp3_int ~ X + Y)
# Create pixel images of intensity surfaces for spatstat
sp1_int_im = as.im(data.frame(x = X, y = Y, z = sp1_int))
sp2_int_im = as.im(data.frame(x = X, y = Y, z = sp2_int))
sp3_int_im = as.im(data.frame(x = X, y = Y, z = sp3_int))
# Simulate point patterns
sp1_sim = rpoispp(sp1_int_im)
sp2_sim = rpoispp(sp2_int_im)
sp3_sim = rpoispp(sp3_int_im)
plot(sp1_sim, cex = 0.6)
plot(sp2_sim, add = TRUE, col = "red", cex = 0.6)
plot(sp3_sim, add = TRUE, col = "blue", cex = 0.6)
pct_hidden = 0.5
# Randomly subset some (about 100*pct_hidden %) points from which we hide the labels
win = owin(xrange = c(-0.5, 100.5), yrange = c(-0.5, 100.5))
sp1_hide = sample(1:sp1_sim$n, floor(pct_hidden*sp1_sim$n))
sp1_sub = sp1_sim[-sp1_hide]
train1 = ppp(x = sp1_sub$x, y = sp1_sub$y, window = win)
sp1_test = sp1_sim[sp1_hide]
sp2_hide = sample(1:sp2_sim$n, floor(pct_hidden*sp2_sim$n))
sp2_sub = sp2_sim[-sp2_hide]
sp2_test = sp2_sim[sp2_hide]
train2 = ppp(x = sp2_sub$x, y = sp2_sub$y, window = win)
sp3_hide = sample(1:sp3_sim$n, floor(pct_hidden*sp3_sim$n))
sp3_sub = sp3_sim[-sp3_hide]
sp3_test = sp3_sim[sp3_hide]
train3 = ppp(x = sp3_sub$x, y = sp3_sub$y, window = win)
all_test = ppp(x = c(sp1_test$x, sp2_test$x, sp3_test$x),
y = c(sp1_test$y, sp2_test$y, sp3_test$y), window = win,
marks = c(rep("Hidden 1", sp1_test$n), rep("Hidden 2", sp2_test$n), rep("Hidden 3", sp3_test$n)))
test_labels = c(rep(1, sp1_test$n), rep(2, sp2_test$n), rep(3, sp3_test$n))
quads = ppp(X, Y, window = win)
datappp = ppp(x = c(sp1_sub$x, sp2_sub$x, sp3_sub$x, all_test$x),
y = c(sp1_sub$y, sp2_sub$y, sp3_sub$y, all_test$y),
marks = c(rep("Sp1", sp1_sub$n), rep("Sp2", sp2_sub$n),
rep("Sp3", sp3_sub$n), rep("Unknown", all_test$n)),
window = win)
# Set up covariate list for spatstat
cov.list = list()
for (v in 1:4)
{
v.v = as.im(data.frame(x = X, y = Y, z = vmat[,(v + 1)]))
cov.list[[v]] = v.v
}
names(cov.list) = c("V1", "V1sq", "V2", "V2sq")
# set up formula
ppmform = as.formula(paste("~", paste(names(cov.list), collapse = "+")))
###########################################################################---------------
# Create the ppmMixEngine function
#----------------------------------
makeMask = function(Qppp)
{
q_df = data.frame(x = Qppp$x, y = Qppp$y)
ux = sort(unique(q_df$x))
uy = sort(unique(q_df$y))
nx = length(ux)
ny = length(uy)
col.ref = match(q_df$x, ux)
row.ref = match(q_df$y, uy)
all.vec = rep(0, max(row.ref)*max(col.ref))
vec.ref = (col.ref - 1)*max(row.ref) + row.ref
all.vec[vec.ref] = 1
mask.out = matrix(all.vec, max(row.ref), max(col.ref), dimnames = list(uy, ux))
mask.out
}
ppmMixEngine = function(datappp, quads, ppmform, cov.list, verbose = TRUE, tol = 0.001, maxit = 50)
{
# Set up initial marks for the unknown observations (hidden ones) based on nearest neighbour distances
datamarks = marks(datappp)
uniquemarks = unique(datamarks)
unknown = datamarks == "Unknown"
nclust = length(unique(datamarks)) - 1
splitppps = split(datappp, as.factor(marks(datappp)))
for (i in 1:(nclust + 1))
{
assign(paste("ppp_", names(splitppps)[i], sep = ""),
splitppps[[i]])
}
nndists = nndist(datappp, by = as.factor(marks(datappp)))
nndists = nndists[,-which(colnames(nndists) == "Unknown")]
weight_num = apply(nndists, 1, min)/nndists
# for the marks where the observations component is known we keep 1 as their weight
initweights = weight_num/apply(weight_num, 1, sum)
initweights[datamarks != "Unknown"] = rep(0, nclust)
for (i in 1:nclust)
{
rowfill = which(datamarks == colnames(nndists)[i])
initweights[rowfill, i] = 1
}
iterweights = initweights
itermarks = datamarks
itermarks = colnames(nndists)[apply(initweights, 1, which.max)]
p = table(itermarks)/sum(table(itermarks))
iterppp = datappp
marks(iterppp) = as.factor(itermarks)
Qmask = makeMask(quads)
Q = quadscheme(data = iterppp, dummy = quads, method = "grid", ntile = c(dim(Qmask)[1], dim(Qmask)[2]), npix = c(dim(Qmask)[1], dim(Qmask)[2]))
formchr = as.character(ppmform)[2]
formsplit = strsplit(formchr, "\\+")
markform = as.formula(paste("~", paste(paste(formsplit[[1]], "* marks"), collapse = " + ")))
fit1 = ppm(Q, trend = markform, covariates = cov.list)
loglik.old <- 0.
loglik.new <- 1.
#
# Iterator starts here,
#
is_known = which(datamarks != "Unknown")
niter <- 0
while(abs(loglik.new - loglik.old)/(1 + abs(loglik.new)) > tol) {
if(niter >= maxit) {
warning(paste("E-M algorithm failed to converge in",
maxit, ngettext(maxit, "iteration", "iterations")),
call.=FALSE)
break
}
niter <- niter + 1
# E-step to update weights
predint = matrix(NA, sum(unknown), nclust) # predint will contain the predicted intensities at points with hidden labels
for (i in 1:nclust)
{
ppp_i = ppp_Unknown
marks(ppp_i) = as.factor(colnames(nndists)[i])
predint[,i] = predict(fit1, locations = ppp_i)
}
p_mat = t(matrix(p, ncol(predint), nrow(predint)))
predint_p = predint*p_mat
iterweights[unknown,] = predint_p/apply(predint_p, 1, sum)
# end E-step
# M-step
p = apply(iterweights, 2, sum)/sum(apply(iterweights, 2, sum)) # update p
itermarks = datamarks
itermarks = colnames(nndists)[apply(iterweights, 1, which.max)] # update marks for locations with hidden labels
iterppp = datappp
marks(iterppp) = as.factor(itermarks)
Q = quadscheme(data = iterppp, dummy = quads, method = "grid", ntile = c(dim(Qmask)[1], dim(Qmask)[2]), npix = c(dim(Qmask)[1], dim(Qmask)[2]))
fit1 = ppm(Q, trend = markform, covariates = cov.list) # new ppm with new coefficients
# end M-step
# evaluate marginal loglikelihood
loglik.old = loglik.new
allp_mat = t(matrix(p, ncol(predint), nrow(iterweights)))
loglik.new <- sum(log(apply(allp_mat * iterweights, 2, sum)))
if(verbose)
cat(paste("Iteration", niter, "\tlogLik =", loglik.new,
"\tp =", signif(p,4), "\n"))
}
if(verbose) {
cat("\nEstimated parameters:\n")
cat(paste("p [cluster] =", signif(p, 5), "\n"))
cat(paste("\nloglik.new:\n", signif(loglik.new), "\n"))
}
return(list(z = round(p),
probs = p,
niter = niter, maxit = maxit,
converged = (niter >= maxit),
New_weights = round(iterweights, digits = 3)
))
}
#################
# # Test the function - from ppmMixEngine
#------------------------------------------------------------------------------
Testmixfunction = ppmMixEngine(datappp = datappp, quads = quads,
ppmform = ppmform, cov.list = cov.list,
verbose=TRUE, tol = 0.00001, maxit = 50)