Title: | Structured Learning in Time-Dependent Cox Models |
---|---|
Description: | Efficient procedures for fitting and cross-validating the structurally-regularized time-dependent Cox models. |
Authors: | Yi Lian [aut, cre], Guanbo Wang [aut], Archer Y. Yang [aut], Mireille E. Schnitzer [aut], Robert W. Platt [aut], Rui Wang [aut], Marc Dorais [aut], Sylvie Perreault [aut], Julien Mairal [ctb], Yuansi Chen [ctb] |
Maintainer: | Yi Lian <[email protected]> |
License: | GPL (>= 3) |
Version: | 1.2.1 |
Built: | 2024-12-04 06:59:11 UTC |
Source: | CRAN |
Automatically generate objects used to describe the structure of the nested group lasso penalty. The output is then used by sox()
and sox_cv()
.
nested_structure(group_list)
nested_structure(group_list)
group_list |
A list containing the indices of the group members. |
A list of objects describing the group structure.
groups |
Required by |
own_variables |
Required by |
N_own_variables |
Required by |
group_weights |
Required by |
# p = 9 Variables: ## 1: A1 ## 2: A2 ## 3: C1 ## 4: C2 ## 5: B ## 6: A1B ## 7: A2B ## 8: C1B ## 9: C2B # G = 12 Nested groups (misspecified, for the demonstration of the software only.) ## g1: A1, A2, C1, C2, B, A1B, A2B, C1B, C2B ## g2: A1B, A2B, A1B, A2B ## g3: C1, C2, C1B, C2B ## g4: 1 ## g5: 2 ## ... ## G12: 9 nested.groups <- list(1:9, c(1, 2, 6, 7), c(3, 4, 8, 9), 1, 2, 3, 4, 5, 6, 7, 8, 9) pars.nested <- nested_structure(nested.groups) str(pars.nested)
# p = 9 Variables: ## 1: A1 ## 2: A2 ## 3: C1 ## 4: C2 ## 5: B ## 6: A1B ## 7: A2B ## 8: C1B ## 9: C2B # G = 12 Nested groups (misspecified, for the demonstration of the software only.) ## g1: A1, A2, C1, C2, B, A1B, A2B, C1B, C2B ## g2: A1B, A2B, A1B, A2B ## g3: C1, C2, C1B, C2B ## g4: 1 ## g5: 2 ## ... ## G12: 9 nested.groups <- list(1:9, c(1, 2, 6, 7), c(3, 4, 8, 9), 1, 2, 3, 4, 5, 6, 7, 8, 9) pars.nested <- nested_structure(nested.groups) str(pars.nested)
Automatically generate objects used to describe the structure of the overlapping group lasso penalty The output is then used by sox()
and sox_cv()
.
overlap_structure(group_list)
overlap_structure(group_list)
group_list |
A list containing the indices of the group members. |
A list of objects describing the group structure.
groups |
Required by |
groups_var |
Required by |
group_weights |
Required by |
# p = 9 Variables: ## 1: A1 ## 2: A2 ## 3: C1 ## 4: C2 ## 5: B ## 6: A1B ## 7: A2B ## 8: C1B ## 9: C2B # G = 5 Overlapping groups: ## g1: A1, A2, A1B, A2B ## g2: B, A1B, A2B, C1B, C2B ## g3: A1B, A2B ## g4: C1, C2, C1B, C2B ## g5: C1B, C2B overlapping.groups <- list(c(1, 2, 6, 7), c(5, 6, 7, 8, 9), c(6, 7), c(3, 4, 8, 9), c(8, 9)) pars.overlapping <- overlap_structure(overlapping.groups) str(pars.overlapping)
# p = 9 Variables: ## 1: A1 ## 2: A2 ## 3: C1 ## 4: C2 ## 5: B ## 6: A1B ## 7: A2B ## 8: C1B ## 9: C2B # G = 5 Overlapping groups: ## g1: A1, A2, A1B, A2B ## g2: B, A1B, A2B, C1B, C2B ## g3: A1B, A2B ## g4: C1, C2, C1B, C2B ## g5: C1B, C2B overlapping.groups <- list(c(1, 2, 6, 7), c(5, 6, 7, 8, 9), c(6, 7), c(3, 4, 8, 9), c(8, 9)) pars.overlapping <- overlap_structure(overlapping.groups) str(pars.overlapping)
sox()
Plot the solution path generated by sox()
.
## S3 method for class 'sox' plot(x, type = "l", log = "x", ...)
## S3 method for class 'sox' plot(x, type = "l", log = "x", ...)
x |
Fitted |
type |
Graphical argument to be passed to |
log |
Graphical argument to be passed to |
... |
Further arguments of |
Produces a coefficient profile plot of the coefficient paths for a fitted sox
model.
x <- as.matrix(sim[, c("A1","A2","C1","C2","B","A1B","A2B","C1B","C2B")]) lam.seq <- exp(seq(log(1e0), log(1e-3), length.out = 20)) overlapping.groups <- list(c(1, 2, 6, 7), c(5, 6, 7, 8, 9), c(6, 7), c(3, 4, 8, 9), c(8, 9)) pars.overlapping <- overlap_structure(overlapping.groups) fit.overlapping <- sox( x = x, ID = sim$Id, time = sim$Start, time2 = sim$Stop, event = sim$Event, penalty = "overlapping", lambda = lam.seq, group = pars.overlapping$groups, group_variable = pars.overlapping$groups_var, penalty_weights = pars.overlapping$group_weights, tol = 1e-4, maxit = 1e3, verbose = FALSE ) plot(fit.overlapping) cv.overlapping <- sox_cv( x = x, ID = sim$Id, time = sim$Start, time2 = sim$Stop, event = sim$Event, penalty = "overlapping", lambda = lam.seq, group = pars.overlapping$groups, group_variable = pars.overlapping$groups_var, penalty_weights = pars.overlapping$group_weights, nfolds = 5, tol = 1e-4, maxit = 1e3, verbose = FALSE ) plot(cv.overlapping$sox.fit)
x <- as.matrix(sim[, c("A1","A2","C1","C2","B","A1B","A2B","C1B","C2B")]) lam.seq <- exp(seq(log(1e0), log(1e-3), length.out = 20)) overlapping.groups <- list(c(1, 2, 6, 7), c(5, 6, 7, 8, 9), c(6, 7), c(3, 4, 8, 9), c(8, 9)) pars.overlapping <- overlap_structure(overlapping.groups) fit.overlapping <- sox( x = x, ID = sim$Id, time = sim$Start, time2 = sim$Stop, event = sim$Event, penalty = "overlapping", lambda = lam.seq, group = pars.overlapping$groups, group_variable = pars.overlapping$groups_var, penalty_weights = pars.overlapping$group_weights, tol = 1e-4, maxit = 1e3, verbose = FALSE ) plot(fit.overlapping) cv.overlapping <- sox_cv( x = x, ID = sim$Id, time = sim$Start, time2 = sim$Stop, event = sim$Event, penalty = "overlapping", lambda = lam.seq, group = pars.overlapping$groups, group_variable = pars.overlapping$groups_var, penalty_weights = pars.overlapping$group_weights, nfolds = 5, tol = 1e-4, maxit = 1e3, verbose = FALSE ) plot(cv.overlapping$sox.fit)
sox_cv
Plot the solution path or cross-validation curves produced by sox_cv()
.
## S3 method for class 'sox_cv' plot(x, type = "cv-curve", ...)
## S3 method for class 'sox_cv' plot(x, type = "cv-curve", ...)
x |
The |
type |
Character string, " |
... |
Other graphical parameters to plot |
The "solution-path
" plot produces a coefficient profile plot of the coefficient paths for a fitted sox
model. The "cv-curve
" plot is the cvm
(red dot) for each lambda with its standard error (vertical bar). The two vertical dashed lines corresponds to the lambda.min
and lambda.1se
x <- as.matrix(sim[, c("A1","A2","C1","C2","B","A1B","A2B","C1B","C2B")]) lam.seq <- exp(seq(log(1e0), log(1e-3), length.out = 20)) overlapping.groups <- list(c(1, 2, 6, 7), c(5, 6, 7, 8, 9), c(6, 7), c(3, 4, 8, 9), c(8, 9)) pars.overlapping <- overlap_structure(overlapping.groups) cv.overlapping <- sox_cv( x = x, ID = sim$Id, time = sim$Start, time2 = sim$Stop, event = sim$Event, penalty = "overlapping", lambda = lam.seq, group = pars.overlapping$groups, group_variable = pars.overlapping$groups_var, penalty_weights = pars.overlapping$group_weights, nfolds = 5, tol = 1e-4, maxit = 1e3, verbose = FALSE ) plot(cv.overlapping) plot(cv.overlapping, type = "solution-path")
x <- as.matrix(sim[, c("A1","A2","C1","C2","B","A1B","A2B","C1B","C2B")]) lam.seq <- exp(seq(log(1e0), log(1e-3), length.out = 20)) overlapping.groups <- list(c(1, 2, 6, 7), c(5, 6, 7, 8, 9), c(6, 7), c(3, 4, 8, 9), c(8, 9)) pars.overlapping <- overlap_structure(overlapping.groups) cv.overlapping <- sox_cv( x = x, ID = sim$Id, time = sim$Start, time2 = sim$Stop, event = sim$Event, penalty = "overlapping", lambda = lam.seq, group = pars.overlapping$groups, group_variable = pars.overlapping$groups_var, penalty_weights = pars.overlapping$group_weights, nfolds = 5, tol = 1e-4, maxit = 1e3, verbose = FALSE ) plot(cv.overlapping) plot(cv.overlapping, type = "solution-path")
sim
A simulated demo dataset sim
data(sim)
data(sim)
A simulated data frame that is used to illustrate the use of the sox package. The max follow-up time for each subject is set to be 5. The total number of subject is 50.
The ID of each subject.
During the time from Start
to Stop
, if the subject experience the event. We use the function permalgorithm
in the R
package PermAlgo
to generate the Event.
Start time.
Stop time.
The total follow-up time for the subject.
A1, A2, C1, C2, B, A1B, A2B, C1B, C2B. The dataset contains 5 variables (9 columns after one-hot encoding). Variable A is a e 3-level categorical variable, which results in 2 binary variables (A1 and A2), the same with the variable C. B is a continuous variable. The interaction term AB and CB are also two 3-level categorical variables. The code for generating the covariates is given below.
PermAlgo
# generate B gen_con=function(m){ X=rnorm(m/5) XX=NULL for (i in 1:length(X)) { if (length(XX)<m){ X.rep=rep(X[i],round(runif(1,5,10),0)) XX=c(XX,X.rep) } } return(XX[1:m]) } # generate A and C gen_cat=function(m){ X=sample.int(3, m/5,replace = TRUE) XX=NULL for (i in 1:length(X)) { if (length(XX)<m){ X.rep=rep(X[i],round(runif(1,5,10),0)) XX=c(XX,X.rep) } } return(XX[1:m]) } # generate covariate for one subject gen_X=function(m){ A=gen_cat(m);B=gen_con(m);C=gen_cat(m) A1=ifelse(A==1,1,0);A2=ifelse(A==2,1,0) C1=ifelse(C==1,1,0);C2=ifelse(C==2,1,0) A1B=A1*B;A2B=A2*B C1B=C1*B;C2B=C2*B return(as.matrix(cbind(A1,A2,C1,C2,B,A1B,A2B,C1B,C2B))) } # generate covariate for all subject gen_X_n=function(m,n){ Xn=NULL for (i in 1:n) { X=gen_X(m) Xn=rbind(Xn,X) } return(Xn) } n=50;m=5 covariates=gen_X_n(m,n) # generate outcomes # library(PermAlgo) # data <- permalgorithm(n, m, covariates, # XmatNames = c("A1","A2","C1","C2","B","A1B","A2B","C1B","C2B"), # #change according to scenario 1/2 # betas = c(rep(log(3),2),rep(0,2), log(4), rep(log(3),2),rep(0,2)), # groupByD=FALSE ) # fit.original = coxph(Surv(Start, Stop, Event) ~ . ,data[,-c(1,3)])
# generate B gen_con=function(m){ X=rnorm(m/5) XX=NULL for (i in 1:length(X)) { if (length(XX)<m){ X.rep=rep(X[i],round(runif(1,5,10),0)) XX=c(XX,X.rep) } } return(XX[1:m]) } # generate A and C gen_cat=function(m){ X=sample.int(3, m/5,replace = TRUE) XX=NULL for (i in 1:length(X)) { if (length(XX)<m){ X.rep=rep(X[i],round(runif(1,5,10),0)) XX=c(XX,X.rep) } } return(XX[1:m]) } # generate covariate for one subject gen_X=function(m){ A=gen_cat(m);B=gen_con(m);C=gen_cat(m) A1=ifelse(A==1,1,0);A2=ifelse(A==2,1,0) C1=ifelse(C==1,1,0);C2=ifelse(C==2,1,0) A1B=A1*B;A2B=A2*B C1B=C1*B;C2B=C2*B return(as.matrix(cbind(A1,A2,C1,C2,B,A1B,A2B,C1B,C2B))) } # generate covariate for all subject gen_X_n=function(m,n){ Xn=NULL for (i in 1:n) { X=gen_X(m) Xn=rbind(Xn,X) } return(Xn) } n=50;m=5 covariates=gen_X_n(m,n) # generate outcomes # library(PermAlgo) # data <- permalgorithm(n, m, covariates, # XmatNames = c("A1","A2","C1","C2","B","A1B","A2B","C1B","C2B"), # #change according to scenario 1/2 # betas = c(rep(log(3),2),rep(0,2), log(4), rep(log(3),2),rep(0,2)), # groupByD=FALSE ) # fit.original = coxph(Surv(Start, Stop, Event) ~ . ,data[,-c(1,3)])
Fit a (time-dependent) Cox model with overlapping (including nested) group lasso penalty. The regularization path is computed at a grid of values for the regularization parameter lambda.
sox( x, ID, time, time2, event, penalty, lambda, group, group_variable, own_variable, no_own_variable, penalty_weights, par_init, stepsize_init = 1, stepsize_shrink = 0.8, tol = 1e-05, maxit = 1000L, verbose = FALSE )
sox( x, ID, time, time2, event, penalty, lambda, group, group_variable, own_variable, no_own_variable, penalty_weights, par_init, stepsize_init = 1, stepsize_shrink = 0.8, tol = 1e-05, maxit = 1000L, verbose = FALSE )
x |
Predictor matrix with dimension |
ID |
The ID of each subjects, each subject has one ID (multiple rows in |
time |
Represents the start of each time interval. |
time2 |
Represents the stop of each time interval. |
event |
Indicator of event. |
penalty |
Character string, indicating whether " |
lambda |
Sequence of regularization coefficients |
group |
A |
group_variable |
A |
own_variable |
A non-decreasing integer vector of length |
no_own_variable |
An integer vector of length |
penalty_weights |
Optional, vector of length |
par_init |
Optional, vector of initial values of the optimization algorithm. Default initial value is zero for all |
stepsize_init |
Initial value of the stepsize of the optimization algorithm. Default is 1.0. |
stepsize_shrink |
Factor in |
tol |
Convergence criterion. Algorithm stops when the |
maxit |
Maximum number of iterations allowed. |
verbose |
Logical, whether progress is printed. |
The predictor matrix should be of dimension . Each row records the values of covariates for one subject at one time, for example, the values at the day from
time
(Start) to time2
(Stop). An example dataset sim
is provided. The dataset has the format produced by the R
package PermAlgo.
The specification of the arguments group
, group_variable
, own_variable
and no_own_variable
for the grouping structure can be found in https://thoth.inrialpes.fr/people/mairal/spams/doc-R/html/doc_spams006.html#sec26 and https://thoth.inrialpes.fr/people/mairal/spams/doc-R/html/doc_spams006.html#sec27.
In the Examples below, , the group structure is:
where is a subset of
and
, and
is a subset of
and
.
A list with the following three elements.
lambdas |
The user-specified regularization coefficients |
estimates |
A matrix, with each column corresponding to the coefficient estimates at each |
iterations |
A vector of number of iterations it takes to converge at each |
x <- as.matrix(sim[, c("A1","A2","C1","C2","B","A1B","A2B","C1B","C2B")]) lam.seq <- exp(seq(log(1e0), log(1e-3), length.out = 20)) # Variables: ## 1: A1 ## 2: A2 ## 3: C1 ## 4: C2 ## 5: B ## 6: A1B ## 7: A2B ## 8: C1B ## 9: C2B # Overlapping groups: ## g1: A1, A2, A1B, A2B ## g2: B, A1B, A2B, C1B, C2B ## g3: A1B, A2B ## g4: C1, C2, C1B, C2B ## g5: C1B, C2B overlapping.groups <- list(c(1, 2, 6, 7), c(5, 6, 7, 8, 9), c(6, 7), c(3, 4, 8, 9), c(8, 9)) pars.overlapping <- overlap_structure(overlapping.groups) fit.overlapping <- sox( x = x, ID = sim$Id, time = sim$Start, time2 = sim$Stop, event = sim$Event, penalty = "overlapping", lambda = lam.seq, group = pars.overlapping$groups, group_variable = pars.overlapping$groups_var, penalty_weights = pars.overlapping$group_weights, tol = 1e-4, maxit = 1e3, verbose = FALSE ) str(fit.overlapping) # Nested groups (misspecified, for the demonstration of the software only.) ## g1: A1, A2, C1, C2, B, A1B, A2B, C1B, C2B ## g2: A1B, A2B, A1B, A2B ## g3: C1, C2, C1B, C2B ## g4: 1 ## g5: 2 ## ... ## G12: 9 nested.groups <- list(1:9, c(1, 2, 6, 7), c(3, 4, 8, 9), 1, 2, 3, 4, 5, 6, 7, 8, 9) pars.nested <- nested_structure(nested.groups) fit.nested <- sox( x = x, ID = sim$Id, time = sim$Start, time2 = sim$Stop, event = sim$Event, penalty = "nested", lambda = lam.seq, group = pars.nested$groups, own_variable = pars.nested$own_variables, no_own_variable = pars.nested$N_own_variables, penalty_weights = pars.nested$group_weights, tol = 1e-4, maxit = 1e3, verbose = FALSE ) str(fit.nested)
x <- as.matrix(sim[, c("A1","A2","C1","C2","B","A1B","A2B","C1B","C2B")]) lam.seq <- exp(seq(log(1e0), log(1e-3), length.out = 20)) # Variables: ## 1: A1 ## 2: A2 ## 3: C1 ## 4: C2 ## 5: B ## 6: A1B ## 7: A2B ## 8: C1B ## 9: C2B # Overlapping groups: ## g1: A1, A2, A1B, A2B ## g2: B, A1B, A2B, C1B, C2B ## g3: A1B, A2B ## g4: C1, C2, C1B, C2B ## g5: C1B, C2B overlapping.groups <- list(c(1, 2, 6, 7), c(5, 6, 7, 8, 9), c(6, 7), c(3, 4, 8, 9), c(8, 9)) pars.overlapping <- overlap_structure(overlapping.groups) fit.overlapping <- sox( x = x, ID = sim$Id, time = sim$Start, time2 = sim$Stop, event = sim$Event, penalty = "overlapping", lambda = lam.seq, group = pars.overlapping$groups, group_variable = pars.overlapping$groups_var, penalty_weights = pars.overlapping$group_weights, tol = 1e-4, maxit = 1e3, verbose = FALSE ) str(fit.overlapping) # Nested groups (misspecified, for the demonstration of the software only.) ## g1: A1, A2, C1, C2, B, A1B, A2B, C1B, C2B ## g2: A1B, A2B, A1B, A2B ## g3: C1, C2, C1B, C2B ## g4: 1 ## g5: 2 ## ... ## G12: 9 nested.groups <- list(1:9, c(1, 2, 6, 7), c(3, 4, 8, 9), 1, 2, 3, 4, 5, 6, 7, 8, 9) pars.nested <- nested_structure(nested.groups) fit.nested <- sox( x = x, ID = sim$Id, time = sim$Start, time2 = sim$Stop, event = sim$Event, penalty = "nested", lambda = lam.seq, group = pars.nested$groups, own_variable = pars.nested$own_variables, no_own_variable = pars.nested$N_own_variables, penalty_weights = pars.nested$group_weights, tol = 1e-4, maxit = 1e3, verbose = FALSE ) str(fit.nested)
sox
Conduct cross-validation (cv) for sox
.
sox_cv( x, ID, time, time2, event, penalty, lambda, group, group_variable, own_variable, no_own_variable, penalty_weights, par_init, nfolds = 10, foldid = NULL, stepsize_init = 1, stepsize_shrink = 0.8, tol = 1e-05, maxit = 1000L, verbose = FALSE )
sox_cv( x, ID, time, time2, event, penalty, lambda, group, group_variable, own_variable, no_own_variable, penalty_weights, par_init, nfolds = 10, foldid = NULL, stepsize_init = 1, stepsize_shrink = 0.8, tol = 1e-05, maxit = 1000L, verbose = FALSE )
x |
Predictor matrix with dimension |
ID |
The ID of each subjects, each subject has one ID (multiple rows in |
time |
Represents the start of each time interval. |
time2 |
Represents the stop of each time interval. |
event |
Indicator of event. |
penalty |
Character string, indicating whether " |
lambda |
Sequence of regularization coefficients |
group |
A |
group_variable |
A |
own_variable |
A non-decreasing integer vector of length |
no_own_variable |
An integer vector of length |
penalty_weights |
Optional, vector of length |
par_init |
Optional, vector of initial values of the optimization algorithm. Default initial value is zero for all |
nfolds |
Optional, the folds of cross-validation. Default is 10. |
foldid |
Optional, user-specified vector indicating the cross-validation fold in which each observation should be included. Values in this vector should range from 1 to |
stepsize_init |
Initial value of the stepsize of the optimization algorithm. Default is 1. |
stepsize_shrink |
Factor in |
tol |
Convergence criterion. Algorithm stops when the |
maxit |
Maximum number of iterations allowed. |
verbose |
Logical, whether progress is printed. |
For each lambda, 10 folds cross-validation (by default) is performed. The cv error is defined as follows. Suppose we perform -fold cross-validation, denote
by the estimate obtained from the rest of
folds (training set). The error of the
-th fold (test set) is defined as
divided by
, where
is the log partial likelihood evaluated at
using the entire dataset, Q is the log partial likelihood evaluated at
using the training set, and R is the number of events in the test set. We do not use the negative log partial likelihood evaluated at
using the test set because the former definition can efficiently use the risk set, and thus it is more stable when the number of events in each test set is small (think of leave-one-out). The cv error is used in parameter tuning. To account for balance in outcomes among the randomly formed test set, we divide the deviance
by R.
To get the estimated coefficients that has the minimum cv error, use
sox_cv()$Estimates[, sox_cv$index["min",]]
. To apply the 1-se rule, use sox_cv()$Estimates[, sox_cv$index["1se",]]
.
A list.
lambdas |
A vector of lambda used for each cross-validation. |
cvm |
The cv error averaged across all folds for each lambda. |
cvsd |
The standard error of the cv error for each lambda. |
cvup |
The cv error plus its standard error for each lambda. |
cvlo |
The cv error minus its standard error for each lambda. |
nzero |
The number of non-zero coefficients at each lambda. |
sox.fit |
A fitted model for the full data at all lambdas of class " |
lambda.min |
The lambda such that the |
lambda.1se |
The maximum of lambda such that the |
foldid |
The fold assignments used. |
index |
A one column matrix with the indices of |
iterations |
A vector of number of iterations it takes to converge at each |
x <- as.matrix(sim[, c("A1","A2","C1","C2","B","A1B","A2B","C1B","C2B")]) lam.seq <- exp(seq(log(1e0), log(1e-3), length.out = 20)) # Variables: ## 1: A1 ## 2: A2 ## 3: C1 ## 4: C2 ## 5: B ## 6: A1B ## 7: A2B ## 8: C1B ## 9: C2B # Overlapping groups: ## g1: A1, A2, A1B, A2B ## g2: B, A1B, A2B, C1B, C2B ## g3: A1B, A2B ## g4: C1, C2, C1B, C2B ## g5: C1B, C2B overlapping.groups <- list(c(1, 2, 6, 7), c(5, 6, 7, 8, 9), c(6, 7), c(3, 4, 8, 9), c(8, 9)) pars.overlapping <- overlap_structure(overlapping.groups) cv.overlapping <- sox_cv( x = x, ID = sim$Id, time = sim$Start, time2 = sim$Stop, event = sim$Event, penalty = "overlapping", lambda = lam.seq, group = pars.overlapping$groups, group_variable = pars.overlapping$groups_var, penalty_weights = pars.overlapping$group_weights, nfolds = 5, tol = 1e-4, maxit = 1e3, verbose = FALSE ) str(cv.overlapping) # Nested groups (misspecified, for the demonstration of the software only.) ## g1: A1, A2, C1, C2, B, A1B, A2B, C1B, C2B ## g2: A1B, A2B, A1B, A2B ## g3: C1, C2, C1B, C2B ## g4: 1 ## g5: 2 ## ... ## G12: 9 nested.groups <- list(1:9, c(1, 2, 6, 7), c(3, 4, 8, 9), 1, 2, 3, 4, 5, 6, 7, 8, 9) pars.nested <- nested_structure(nested.groups) cv.nested <- sox_cv( x = x, ID = sim$Id, time = sim$Start, time2 = sim$Stop, event = sim$Event, penalty = "nested", lambda = lam.seq, group = pars.nested$groups, own_variable = pars.nested$own_variables, no_own_variable = pars.nested$N_own_variables, penalty_weights = pars.nested$group_weights, nfolds = 5, tol = 1e-4, maxit = 1e3, verbose = FALSE ) str(cv.nested)
x <- as.matrix(sim[, c("A1","A2","C1","C2","B","A1B","A2B","C1B","C2B")]) lam.seq <- exp(seq(log(1e0), log(1e-3), length.out = 20)) # Variables: ## 1: A1 ## 2: A2 ## 3: C1 ## 4: C2 ## 5: B ## 6: A1B ## 7: A2B ## 8: C1B ## 9: C2B # Overlapping groups: ## g1: A1, A2, A1B, A2B ## g2: B, A1B, A2B, C1B, C2B ## g3: A1B, A2B ## g4: C1, C2, C1B, C2B ## g5: C1B, C2B overlapping.groups <- list(c(1, 2, 6, 7), c(5, 6, 7, 8, 9), c(6, 7), c(3, 4, 8, 9), c(8, 9)) pars.overlapping <- overlap_structure(overlapping.groups) cv.overlapping <- sox_cv( x = x, ID = sim$Id, time = sim$Start, time2 = sim$Stop, event = sim$Event, penalty = "overlapping", lambda = lam.seq, group = pars.overlapping$groups, group_variable = pars.overlapping$groups_var, penalty_weights = pars.overlapping$group_weights, nfolds = 5, tol = 1e-4, maxit = 1e3, verbose = FALSE ) str(cv.overlapping) # Nested groups (misspecified, for the demonstration of the software only.) ## g1: A1, A2, C1, C2, B, A1B, A2B, C1B, C2B ## g2: A1B, A2B, A1B, A2B ## g3: C1, C2, C1B, C2B ## g4: 1 ## g5: 2 ## ... ## G12: 9 nested.groups <- list(1:9, c(1, 2, 6, 7), c(3, 4, 8, 9), 1, 2, 3, 4, 5, 6, 7, 8, 9) pars.nested <- nested_structure(nested.groups) cv.nested <- sox_cv( x = x, ID = sim$Id, time = sim$Start, time2 = sim$Stop, event = sim$Event, penalty = "nested", lambda = lam.seq, group = pars.nested$groups, own_variable = pars.nested$own_variables, no_own_variable = pars.nested$N_own_variables, penalty_weights = pars.nested$group_weights, nfolds = 5, tol = 1e-4, maxit = 1e3, verbose = FALSE ) str(cv.nested)