Title: | Simultaneous Semi-Parametric Estimation of Clustering and Regression |
---|---|
Description: | Parameter estimation of regression models with fixed group effects, when the group variable is missing while group-related variables are available. Parametric and semi-parametric approaches described in Marbac et al. (2020) <arXiv:2012.14159> are implemented. |
Authors: | Matthieu Marbac [aut, cre, cph], Mohammed Sedki [aut], Christophe Biernacki [aut], Vincent Vandewalle [aut] |
Maintainer: | Matthieu Marbac <[email protected]> |
License: | GPL (>= 2) |
Version: | 1.1.0 |
Built: | 2024-11-21 06:36:00 UTC |
Source: | CRAN |
Parameter estimation of regression models with fixed group effects, when the group variable is missing while group-related variables are available.
Package: | ClusPred |
Type: | Package |
Version: | 1.1.0 |
Date: | 2021-12-01 |
License: | GPL-3 |
LazyLoad: | yes |
Simultaneous semi-parametric estimation of clustering and regression, Matthieu Marbac and Mohammed Sedki and Christophe Biernacki and Vincent Vandewalle (2020) <arXiv:2012.14159>.
Estimation of the group-variable Z based on covariates X and estimation of the parameters of the regression of Y on (U, Z)
cluspred( y, x, u = NULL, K = 2, model.reg = "mean", tau = 0.5, simultaneous = TRUE, np = TRUE, nbinit = 20, nbCPU = 1, tol = 0.01, band = (length(y)^(-1/5)), seed = 134 )
cluspred( y, x, u = NULL, K = 2, model.reg = "mean", tau = 0.5, simultaneous = TRUE, np = TRUE, nbinit = 20, nbCPU = 1, tol = 0.01, band = (length(y)^(-1/5)), seed = 134 )
y |
numeric vector of the traget variable (must be numerical) |
x |
matrix used for clustering (can contain numerical and factors) |
u |
matrix of the covariates used for regression (can contain numerical and factors) |
K |
number of clusters |
model.reg |
indicates the type of the loss ("mean", "quantile", "expectile", "logcosh", "huber"). Only the losses "mean" and "quantile" are implemented if simultaneous=FALSE or np=FALSE |
tau |
specifies the level for the loss (quantile, expectile or huber) |
simultaneous |
oolean indicating whether the clustering and the regression are performed simultaneously (TRUE) or not (FALSE) |
np |
boolean indicating whether nonparameteric model is used (TRUE) or not (FALSE) |
nbinit |
number of random initializations |
nbCPU |
number of CPU only used for linux |
tol |
to specify the stopping rule |
band |
bandwidth selection |
seed |
value of the seed (used for drawing the starting points) |
cluspred returns a list containing the model parameters (param), the posterior probabilities of cluster memberships (tik), the partition (zhat) and the (smoothed) loglikelihood)
Simultaneous semi-parametric estimation of clustering and regression, Matthieu Marbac and Mohammed Sedki and Christophe Biernacki and Vincent Vandewalle (2020) <arXiv:2012.14159>.
require(ClusPred) # data loading data(simdata) # mean regression with two latent groups in parametric framework and two covariates res <- cluspred(simdata$y, simdata$x, simdata$u, K=2, np=FALSE, nbCPU = 1, nbinit = 10) # coefficient of the regression res$param$beta # proportions of the latent groups res$param$pi # posterior probability of the group memberships head(res$tik) # partition res$zhat # loglikelihood res$loglike # prediction (for possible new observations) pred <- predictboth(simdata$x, simdata$u, res, np = FALSE) # predicted cluster membreships pred$zhat # predicted value of the target variable pred$yhat # median regression with two latent groups in nonparametric framework and two covariates res <- cluspred(simdata$y, simdata$x, simdata$u, K=2, model.reg = "quantile", tau = 0.5, nbinit = 10) # coefficient of the regression res$param$beta # proportions of the latent groups res$param$pi # posterior probability of the group memberships head(res$tik) # partition res$zhat # smoothed loglikelihood res$logSmoothlike # prediction (for possible new observations) pred <- predictboth(simdata$x, simdata$u, res, np = TRUE) # predicted cluster membreships pred$zhat # predicted value of the target variable pred$yhat
require(ClusPred) # data loading data(simdata) # mean regression with two latent groups in parametric framework and two covariates res <- cluspred(simdata$y, simdata$x, simdata$u, K=2, np=FALSE, nbCPU = 1, nbinit = 10) # coefficient of the regression res$param$beta # proportions of the latent groups res$param$pi # posterior probability of the group memberships head(res$tik) # partition res$zhat # loglikelihood res$loglike # prediction (for possible new observations) pred <- predictboth(simdata$x, simdata$u, res, np = FALSE) # predicted cluster membreships pred$zhat # predicted value of the target variable pred$yhat # median regression with two latent groups in nonparametric framework and two covariates res <- cluspred(simdata$y, simdata$x, simdata$u, K=2, model.reg = "quantile", tau = 0.5, nbinit = 10) # coefficient of the regression res$param$beta # proportions of the latent groups res$param$pi # posterior probability of the group memberships head(res$tik) # partition res$zhat # smoothed loglikelihood res$logSmoothlike # prediction (for possible new observations) pred <- predictboth(simdata$x, simdata$u, res, np = TRUE) # predicted cluster membreships pred$zhat # predicted value of the target variable pred$yhat
Prediction for new observations
predictboth(x, u = NULL, result, np = FALSE)
predictboth(x, u = NULL, result, np = FALSE)
x |
covariates used for clustering |
u |
covariates of the regression (can be null) |
result |
results provided by function cluspred |
np |
boolean indicating whether nonparametric estimation is used (TRUE) or not (FALSE) |
predictboth returns a list containing the predicted cluster membership (zhat) and the predicted value of the target variable (yhat).
require(ClusPred) # data loading data(simdata) # mean regression with two latent groups in parametric framework and two covariates res <- cluspred(simdata$y, simdata$x, simdata$u, K=2, np=FALSE, nbCPU = 1, nbinit = 10) # coefficient of the regression res$param$beta # proportions of the latent groups res$param$pi # posterior probability of the group memberships head(res$tik) # partition res$zhat # loglikelihood res$loglike # prediction (for possible new observations) pred <- predictboth(simdata$x, simdata$u, res, np = FALSE) # predicted cluster membreships pred$zhat # predicted value of the target variable pred$yhat # median regression with two latent groups in nonparametric framework and two covariates res <- cluspred(simdata$y, simdata$x, simdata$u, K=2, model.reg = "quantile", tau = 0.5, nbinit = 10) # coefficient of the regression res$param$beta # proportions of the latent groups res$param$pi # posterior probability of the group memberships head(res$tik) # partition res$zhat # smoothed loglikelihood res$logSmoothlike # prediction (for possible new observations) pred <- predictboth(simdata$x, simdata$u, res, np = TRUE) # predicted cluster membreships pred$zhat # predicted value of the target variable pred$yhat
require(ClusPred) # data loading data(simdata) # mean regression with two latent groups in parametric framework and two covariates res <- cluspred(simdata$y, simdata$x, simdata$u, K=2, np=FALSE, nbCPU = 1, nbinit = 10) # coefficient of the regression res$param$beta # proportions of the latent groups res$param$pi # posterior probability of the group memberships head(res$tik) # partition res$zhat # loglikelihood res$loglike # prediction (for possible new observations) pred <- predictboth(simdata$x, simdata$u, res, np = FALSE) # predicted cluster membreships pred$zhat # predicted value of the target variable pred$yhat # median regression with two latent groups in nonparametric framework and two covariates res <- cluspred(simdata$y, simdata$x, simdata$u, K=2, model.reg = "quantile", tau = 0.5, nbinit = 10) # coefficient of the regression res$param$beta # proportions of the latent groups res$param$pi # posterior probability of the group memberships head(res$tik) # partition res$zhat # smoothed loglikelihood res$logSmoothlike # prediction (for possible new observations) pred <- predictboth(simdata$x, simdata$u, res, np = TRUE) # predicted cluster membreships pred$zhat # predicted value of the target variable pred$yhat
simulated data used for the pacakge examples.
data(simdata)
data(simdata)