Title: | Soft Maximin Estimation for Large Scale Array-Tensor Models |
---|---|
Description: | Efficient design matrix free procedure for solving a soft maximin problem for large scale array-tensor structured models, see Lund, Mogensen and Hansen (2019) <arXiv:1805.02407>. Currently Lasso and SCAD penalized estimation is implemented. |
Authors: | Adam Lund |
Maintainer: | Adam Lund <[email protected]> |
License: | GPL (>= 2) |
Version: | 1.0.3 |
Built: | 2024-11-16 06:39:57 UTC |
Source: | CRAN |
Given new covariate data this function computes the linear predictors
based on the estimated model coefficients in an object produced by the function softmaximin
. Note that the
data can be supplied in two different formats: i) as a matrix (
is the number of model
coefficients and
is the number of new data points) or ii) as a list of two or three matrices each of
size
(
is the number of new marginal data points in the
th dimension).
## S3 method for class 'SMMA' predict(object, x = NULL, X = NULL, ...)
## S3 method for class 'SMMA' predict(object, x = NULL, X = NULL, ...)
object |
An object of class SMMA, produced with |
x |
a matrix of size |
X |
a list containing the data matrices each of size |
... |
ignored |
A list of length nlambda
containing the linear predictors for each model. If
new covariate data is supplied in one matrix
x
each
item is a vector of length . If the data is supplied as a list of
matrices each of size
, each item is an array of size
, with
.
Adam Lund
##size of example n1 <- 65; n2 <- 26; n3 <- 13; p1 <- 13; p2 <- 5; p3 <- 4 ##marginal design matrices (Kronecker components) X1 <- matrix(rnorm(n1 * p1, 0, 0.5), n1, p1) X2 <- matrix(rnorm(n2 * p2, 0, 0.5), n2, p2) X3 <- matrix(rnorm(n3 * p3, 0, 0.5), n3, p3) X <- list(X1, X2, X3) component <- rbinom(p1 * p2 * p3, 1, 0.1) Beta1 <- array(rnorm(p1 * p2 * p3, 0, .1) + component, c(p1 , p2, p3)) Beta2 <- array(rnorm(p1 * p2 * p3, 0, .1) + component, c(p1 , p2, p3)) mu1 <- RH(X3, RH(X2, RH(X1, Beta1))) mu2 <- RH(X3, RH(X2, RH(X1, Beta2))) Y1 <- array(rnorm(n1 * n2 * n3, mu1), dim = c(n1, n2, n3)) Y2 <- array(rnorm(n1 * n2 * n3, mu2), dim = c(n1, n2, n3)) Y <- array(NA, c(dim(Y1), 2)) Y[,,, 1] <- Y1; Y[,,, 2] <- Y2; fit <- softmaximin(X, Y, zeta = 10, penalty = "lasso", alg = "npg") ##new data in matrix form x <- matrix(rnorm(p1 * p2 * p3), nrow = 1) predict(fit, x = x)[[15]] ##new data in tensor component form X1 <- matrix(rnorm(p1), nrow = 1) X2 <- matrix(rnorm(p2), nrow = 1) X3 <- matrix(rnorm(p3), nrow = 1) predict(fit, X = list(X1, X2, X3))[[15]]
##size of example n1 <- 65; n2 <- 26; n3 <- 13; p1 <- 13; p2 <- 5; p3 <- 4 ##marginal design matrices (Kronecker components) X1 <- matrix(rnorm(n1 * p1, 0, 0.5), n1, p1) X2 <- matrix(rnorm(n2 * p2, 0, 0.5), n2, p2) X3 <- matrix(rnorm(n3 * p3, 0, 0.5), n3, p3) X <- list(X1, X2, X3) component <- rbinom(p1 * p2 * p3, 1, 0.1) Beta1 <- array(rnorm(p1 * p2 * p3, 0, .1) + component, c(p1 , p2, p3)) Beta2 <- array(rnorm(p1 * p2 * p3, 0, .1) + component, c(p1 , p2, p3)) mu1 <- RH(X3, RH(X2, RH(X1, Beta1))) mu2 <- RH(X3, RH(X2, RH(X1, Beta2))) Y1 <- array(rnorm(n1 * n2 * n3, mu1), dim = c(n1, n2, n3)) Y2 <- array(rnorm(n1 * n2 * n3, mu2), dim = c(n1, n2, n3)) Y <- array(NA, c(dim(Y1), 2)) Y[,,, 1] <- Y1; Y[,,, 2] <- Y2; fit <- softmaximin(X, Y, zeta = 10, penalty = "lasso", alg = "npg") ##new data in matrix form x <- matrix(rnorm(p1 * p2 * p3), nrow = 1) predict(fit, x = x)[[15]] ##new data in tensor component form X1 <- matrix(rnorm(p1), nrow = 1) X2 <- matrix(rnorm(p2), nrow = 1) X3 <- matrix(rnorm(p3), nrow = 1) predict(fit, X = list(X1, X2, X3))[[15]]
This function will print some information about the SMMA object.
## S3 method for class 'SMMA' print(x, ...)
## S3 method for class 'SMMA' print(x, ...)
x |
a SMMA object |
... |
ignored |
Adam Lund
##size of example n1 <- 65; n2 <- 26; n3 <- 13; p1 <- 13; p2 <- 5; p3 <- 4 ##marginal design matrices (Kronecker components) X1 <- matrix(rnorm(n1 * p1, 0, 0.5), n1, p1) X2 <- matrix(rnorm(n2 * p2, 0, 0.5), n2, p2) X3 <- matrix(rnorm(n3 * p3, 0, 0.5), n3, p3) X <- list(X1, X2, X3) component <- rbinom(p1 * p2 * p3, 1, 0.1) Beta1 <- array(rnorm(p1 * p2 * p3, 0, .1) + component, c(p1 , p2, p3)) Beta2 <- array(rnorm(p1 * p2 * p3, 0, .1) + component, c(p1 , p2, p3)) mu1 <- RH(X3, RH(X2, RH(X1, Beta1))) mu2 <- RH(X3, RH(X2, RH(X1, Beta2))) Y1 <- array(rnorm(n1 * n2 * n3, mu1), dim = c(n1, n2, n3)) Y2 <- array(rnorm(n1 * n2 * n3, mu2), dim = c(n1, n2, n3)) Y <- array(NA, c(dim(Y1), 2)) Y[,,, 1] <- Y1; Y[,,, 2] <- Y2; fit <- softmaximin(X, Y, zeta = 10, penalty = "lasso", alg = "npg") fit
##size of example n1 <- 65; n2 <- 26; n3 <- 13; p1 <- 13; p2 <- 5; p3 <- 4 ##marginal design matrices (Kronecker components) X1 <- matrix(rnorm(n1 * p1, 0, 0.5), n1, p1) X2 <- matrix(rnorm(n2 * p2, 0, 0.5), n2, p2) X3 <- matrix(rnorm(n3 * p3, 0, 0.5), n3, p3) X <- list(X1, X2, X3) component <- rbinom(p1 * p2 * p3, 1, 0.1) Beta1 <- array(rnorm(p1 * p2 * p3, 0, .1) + component, c(p1 , p2, p3)) Beta2 <- array(rnorm(p1 * p2 * p3, 0, .1) + component, c(p1 , p2, p3)) mu1 <- RH(X3, RH(X2, RH(X1, Beta1))) mu2 <- RH(X3, RH(X2, RH(X1, Beta2))) Y1 <- array(rnorm(n1 * n2 * n3, mu1), dim = c(n1, n2, n3)) Y2 <- array(rnorm(n1 * n2 * n3, mu2), dim = c(n1, n2, n3)) Y <- array(NA, c(dim(Y1), 2)) Y[,,, 1] <- Y1; Y[,,, 2] <- Y2; fit <- softmaximin(X, Y, zeta = 10, penalty = "lasso", alg = "npg") fit
This function is an implementation of the -operator found in
Currie et al 2006. It forms the basis of the GLAM arithmetic.
RH(M, A)
RH(M, A)
M |
a |
A |
a 3d array of size |
For details see Currie et al 2006. Note that this particular implementation is not used in the routines underlying the optimization procedure.
A 3d array of size .
Adam Lund
Currie, I. D., M. Durban, and P. H. C. Eilers (2006). Generalized linear array models with applications to multidimensional smoothing. Journal of the Royal Statistical Society. Series B. 68, 259-280. url = http://dx.doi.org/10.1111/j.1467-9868.2006.00543.x.
n1 <- 65; n2 <- 26; n3 <- 13; p1 <- 13; p2 <- 5; p3 <- 4 ##marginal design matrices (Kronecker components) X1 <- matrix(rnorm(n1 * p1), n1, p1) X2 <- matrix(rnorm(n2 * p2), n2, p2) X3 <- matrix(rnorm(n3 * p3), n3, p3) Beta <- array(rnorm(p1 * p2 * p3, 0, 1), c(p1 , p2, p3)) max(abs(c(RH(X3, RH(X2, RH(X1, Beta)))) - kronecker(X3, kronecker(X2, X1)) %*% c(Beta)))
n1 <- 65; n2 <- 26; n3 <- 13; p1 <- 13; p2 <- 5; p3 <- 4 ##marginal design matrices (Kronecker components) X1 <- matrix(rnorm(n1 * p1), n1, p1) X2 <- matrix(rnorm(n2 * p2), n2, p2) X3 <- matrix(rnorm(n3 * p3), n3, p3) Beta <- array(rnorm(p1 * p2 * p3, 0, 1), c(p1 , p2, p3)) max(abs(c(RH(X3, RH(X2, RH(X1, Beta)))) - kronecker(X3, kronecker(X2, X1)) %*% c(Beta)))
Efficient design matrix free procedure for solving a soft maximin problem for large scale array-tensor structured models, see Lund et al., 2020. Currently Lasso and SCAD penalized estimation is implemented.
softmaximin(X, Y, zeta, penalty = c("lasso", "scad"), alg = c("npg", "fista"), nlambda = 30, lambda.min.ratio = 1e-04, lambda = NULL, penalty.factor = NULL, reltol = 1e-05, maxiter = 15000, steps = 1, btmax = 100, c = 0.0001, tau = 2, M = 4, nu = 1, Lmin = 0, log = TRUE)
softmaximin(X, Y, zeta, penalty = c("lasso", "scad"), alg = c("npg", "fista"), nlambda = 30, lambda.min.ratio = 1e-04, lambda = NULL, penalty.factor = NULL, reltol = 1e-05, maxiter = 15000, steps = 1, btmax = 100, c = 0.0001, tau = 2, M = 4, nu = 1, Lmin = 0, log = TRUE)
X |
list containing the Kronecker components (1, 2 or 3) of the Kronecker design matrix.
These are matrices of sizes |
Y |
array of size |
zeta |
strictly positive float controlling the softmaximin approximation accuracy. |
penalty |
string specifying the penalty type. Possible values are |
alg |
string specifying the optimization algorithm. Possible values are |
nlambda |
positive integer giving the number of |
lambda.min.ratio |
strictly positive float giving the smallest value for |
lambda |
sequence of strictly positive floats used as penalty parameters. |
penalty.factor |
array of size |
reltol |
strictly positive float giving the convergence tolerance for the inner loop. |
maxiter |
positive integer giving the maximum number of iterations allowed for each |
steps |
strictly positive integer giving the number of steps used in the multi-step adaptive lasso algorithm for non-convex penalties.
Automatically set to 1 when |
btmax |
strictly positive integer giving the maximum number of backtracking steps allowed in each iteration. Default is |
c |
strictly positive float used in the NPG algorithm. Default is |
tau |
strictly positive float used to control the stepsize for NPG. Default is |
M |
positive integer giving the look back for the NPG. Default is |
nu |
strictly positive float used to control the stepsize. A value less that 1 will decrease
the stepsize and a value larger than one will increase it. Default is |
Lmin |
non-negative float used by the NPG algorithm to control the stepsize. For the default |
log |
logical variable indicating whether to use log-loss. TRUE is default and yields the loss below. |
Following Lund et al., 2020 this package solves the optimization problem for a linear
model for heterogeneous -dimensional array data (
) organized in
known groups,
and with identical tensor structured design matrix
across all groups. Specifically
is the
number of observations in each group,
the
response array
for group
, and
a
design matrix, with tensor structure
For ,
are the marginal
design matrices (Kronecker components).
Using the GLAM framework the model equation for group
is expressed as
where is the so called rotated
-transfrom (see Currie et al., 2006),
for each
is a (random)
parameter array
and
is a
error array.
This package solves the penalized soft maximin problem from Lund et al., 2020, given by
for the setup described above. Note that
is the empirical explained variance from Meinshausen and Buhlmann, 2015. See Lund et al., 2020 for more details and references.
For , using only the marginal matrices
(for
there is only one marginal), the function
softmaximin
solves the soft maximin problem for a sequence of penalty parameters .
Two optimization algorithms are implemented, a non-monotone proximal gradient (NPG) algorithm and a fast iterative soft thresholding algorithm (FISTA). We note that this package also solves the problem above with the penalty given by the SCAD penalty, using the multiple step adaptive lasso procedure to loop over the proximal algorithm.
An object with S3 Class "SMMA".
spec |
A string indicating the array dimension (1, 2 or 3) and the penalty. |
coef |
A |
lambda |
A vector containing the sequence of penalty values used in the estimation procedure. |
Obj |
A matrix containing the objective values for each iteration and each model. |
df |
The number of nonzero coefficients for each value of |
dimcoef |
A vector giving the dimension of the model coefficient array |
dimobs |
A vector giving the dimension of the observation (response) array |
Iter |
A list with 4 items:
|
Adam Lund
Maintainer: Adam Lund, [email protected]
Lund, A., S. W. Mogensen and N. R. Hansen (2020). Soft Maximin Estimation for Heterogeneous Array Data. Preprint.
Meinshausen, N and P. Buhlmann (2015). Maximin effects in inhomogeneous large-scale data. The Annals of Statistics. 43, 4, 1801-1830. url = https://doi.org/10.1214/15-AOS1325.
Currie, I. D., M. Durban, and P. H. C. Eilers (2006). Generalized linear array models with applications to multidimensional smoothing. Journal of the Royal Statistical Society. Series B. 68, 259-280. url = http://dx.doi.org/10.1111/j.1467-9868.2006.00543.x.
##size of example n1 <- 65; n2 <- 26; n3 <- 13; p1 <- 13; p2 <- 5; p3 <- 4 ##marginal design matrices (Kronecker components) X1 <- matrix(rnorm(n1 * p1), n1, p1) X2 <- matrix(rnorm(n2 * p2), n2, p2) X3 <- matrix(rnorm(n3 * p3), n3, p3) X <- list(X1, X2, X3) component <- rbinom(p1 * p2 * p3, 1, 0.1) Beta1 <- array(rnorm(p1 * p2 * p3, 0, 0.1) + component, c(p1 , p2, p3)) mu1 <- RH(X3, RH(X2, RH(X1, Beta1))) Y1 <- array(rnorm(n1 * n2 * n3), dim = c(n1, n2, n3)) + mu1 Beta2 <- array(rnorm(p1 * p2 * p3, 0, 0.1) + component, c(p1 , p2, p3)) mu2 <- RH(X3, RH(X2, RH(X1, Beta2))) Y2 <- array(rnorm(n1 * n2 * n3), dim = c(n1, n2, n3)) + mu2 Beta3 <- array(rnorm(p1 * p2 * p3, 0, 0.1) + component, c(p1 , p2, p3)) mu3 <- RH(X3, RH(X2, RH(X1, Beta3))) Y3 <- array(rnorm(n1 * n2 * n3), dim = c(n1, n2, n3)) + mu3 Beta4 <- array(rnorm(p1 * p2 * p3, 0, 0.1) + component, c(p1 , p2, p3)) mu4 <- RH(X3, RH(X2, RH(X1, Beta4))) Y4 <- array(rnorm(n1 * n2 * n3), dim = c(n1, n2, n3)) + mu4 Beta5 <- array(rnorm(p1 * p2 * p3, 0, 0.1) + component, c(p1 , p2, p3)) mu5 <- RH(X3, RH(X2, RH(X1, Beta5))) Y5 <- array(rnorm(n1 * n2 * n3), dim = c(n1, n2, n3)) + mu5 Y <- array(NA, c(dim(Y1), 5)) Y[,,, 1] <- Y1; Y[,,, 2] <- Y2; Y[,,, 3] <- Y3; Y[,,, 4] <- Y4; Y[,,, 5] <- Y5; fit <- softmaximin(X, Y, zeta = 10, penalty = "lasso", alg = "npg") Betafit <- fit$coef modelno <- 15 m <- min(Betafit[ , modelno], c(component)) M <- max(Betafit[ , modelno], c(component)) plot(c(component), type="l", ylim = c(m, M)) lines(Betafit[ , modelno], col = "red")
##size of example n1 <- 65; n2 <- 26; n3 <- 13; p1 <- 13; p2 <- 5; p3 <- 4 ##marginal design matrices (Kronecker components) X1 <- matrix(rnorm(n1 * p1), n1, p1) X2 <- matrix(rnorm(n2 * p2), n2, p2) X3 <- matrix(rnorm(n3 * p3), n3, p3) X <- list(X1, X2, X3) component <- rbinom(p1 * p2 * p3, 1, 0.1) Beta1 <- array(rnorm(p1 * p2 * p3, 0, 0.1) + component, c(p1 , p2, p3)) mu1 <- RH(X3, RH(X2, RH(X1, Beta1))) Y1 <- array(rnorm(n1 * n2 * n3), dim = c(n1, n2, n3)) + mu1 Beta2 <- array(rnorm(p1 * p2 * p3, 0, 0.1) + component, c(p1 , p2, p3)) mu2 <- RH(X3, RH(X2, RH(X1, Beta2))) Y2 <- array(rnorm(n1 * n2 * n3), dim = c(n1, n2, n3)) + mu2 Beta3 <- array(rnorm(p1 * p2 * p3, 0, 0.1) + component, c(p1 , p2, p3)) mu3 <- RH(X3, RH(X2, RH(X1, Beta3))) Y3 <- array(rnorm(n1 * n2 * n3), dim = c(n1, n2, n3)) + mu3 Beta4 <- array(rnorm(p1 * p2 * p3, 0, 0.1) + component, c(p1 , p2, p3)) mu4 <- RH(X3, RH(X2, RH(X1, Beta4))) Y4 <- array(rnorm(n1 * n2 * n3), dim = c(n1, n2, n3)) + mu4 Beta5 <- array(rnorm(p1 * p2 * p3, 0, 0.1) + component, c(p1 , p2, p3)) mu5 <- RH(X3, RH(X2, RH(X1, Beta5))) Y5 <- array(rnorm(n1 * n2 * n3), dim = c(n1, n2, n3)) + mu5 Y <- array(NA, c(dim(Y1), 5)) Y[,,, 1] <- Y1; Y[,,, 2] <- Y2; Y[,,, 3] <- Y3; Y[,,, 4] <- Y4; Y[,,, 5] <- Y5; fit <- softmaximin(X, Y, zeta = 10, penalty = "lasso", alg = "npg") Betafit <- fit$coef modelno <- 15 m <- min(Betafit[ , modelno], c(component)) M <- max(Betafit[ , modelno], c(component)) plot(c(component), type="l", ylim = c(m, M)) lines(Betafit[ , modelno], col = "red")