Title: | Competing Proximal Gradients Library |
---|---|
Description: | Functions to generate ensembles of generalized linear models using competing proximal gradients. The optimal sparsity and diversity tuning parameters are selected via an alternating grid search. |
Authors: | Anthony Christidis [aut, cre], Stefan Van Aelst [aut], Ruben Zamar [aut] |
Maintainer: | Anthony Christidis <[email protected]> |
License: | GPL (>= 2) |
Version: | 1.1.1 |
Built: | 2024-12-12 07:07:16 UTC |
Source: | CRAN |
coef.CPGLIB
returns the coefficients for a CPGLIB object.
## S3 method for class 'CPGLIB' coef(object, groups = NULL, ensemble_average = FALSE, ...)
## S3 method for class 'CPGLIB' coef(object, groups = NULL, ensemble_average = FALSE, ...)
object |
An object of class CPGLIB. |
groups |
The groups in the ensemble for the coefficients. Default is all of the groups in the ensemble. |
ensemble_average |
Option to return the average of the coefficients over all the groups in the ensemble. Default is FALSE. |
... |
Additional arguments for compatibility. |
The coefficients for the CPGLIB object.
Anthony-Alexander Christidis, [email protected]
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 300 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 150 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # CPGLIB - Multiple Groups cpg.out <- cpg(x.train, y.train, glm_type="Logistic", G=5, include_intercept=TRUE, alpha_s=3/4, alpha_d=1, lambda_sparsity=0.01, lambda_diversity=1, tolerance=1e-5, max_iter=1e5) # Coefficients for each group cpg.coef <- coef(cpg.out, ensemble_average = FALSE)
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 300 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 150 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # CPGLIB - Multiple Groups cpg.out <- cpg(x.train, y.train, glm_type="Logistic", G=5, include_intercept=TRUE, alpha_s=3/4, alpha_d=1, lambda_sparsity=0.01, lambda_diversity=1, tolerance=1e-5, max_iter=1e5) # Coefficients for each group cpg.coef <- coef(cpg.out, ensemble_average = FALSE)
coef.cv.CPGLIB
returns the coefficients for a cv.CPGLIB object.
## S3 method for class 'cv.CPGLIB' coef(object, groups = NULL, ensemble_average = FALSE, ...)
## S3 method for class 'cv.CPGLIB' coef(object, groups = NULL, ensemble_average = FALSE, ...)
object |
An object of class cv.CPGLIB. |
groups |
The groups in the ensemble for the coefficients. Default is all of the groups in the ensemble. |
ensemble_average |
Option to return the average of the coefficients over all the groups in the ensemble. Default is FALSE. |
... |
Additional arguments for compatibility. |
The coefficients for the cv.CPGLIB object. Default is FALSE.
Anthony-Alexander Christidis, [email protected]
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 300 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 150 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) mean(y.test) # CV CPGLIB - Multiple Groups cpg.out <- cv.cpg(x.train, y.train, glm_type = "Logistic", G = 5, include_intercept = TRUE, alpha_s = 3/4, alpha_d = 1, n_lambda_sparsity = 100, n_lambda_diversity = 100, tolerance = 1e-5, max_iter = 1e5) cpg.coef <- coef(cpg.out) # Coefficients for each group cpg.coef <- coef(cpg.out, ensemble_average = FALSE)
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 300 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 150 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) mean(y.test) # CV CPGLIB - Multiple Groups cpg.out <- cv.cpg(x.train, y.train, glm_type = "Logistic", G = 5, include_intercept = TRUE, alpha_s = 3/4, alpha_d = 1, n_lambda_sparsity = 100, n_lambda_diversity = 100, tolerance = 1e-5, max_iter = 1e5) cpg.coef <- coef(cpg.out) # Coefficients for each group cpg.coef <- coef(cpg.out, ensemble_average = FALSE)
coef.cv.ProxGrad
returns the coefficients for a cv.ProxGrad object.
## S3 method for class 'cv.ProxGrad' coef(object, ...)
## S3 method for class 'cv.ProxGrad' coef(object, ...)
object |
An object of class cv.ProxGrad. |
... |
Additional arguments for compatibility. |
The coefficients for the cv.ProxGrad object.
Anthony-Alexander Christidis, [email protected]
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 1000 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 100 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # CV ProxGrad - Single Group proxgrad.out <- cv.ProxGrad(x.train, y.train, glm_type = "Logistic", include_intercept = TRUE, alpha_s = 3/4, n_lambda_sparsity = 100, tolerance = 1e-5, max_iter = 1e5) # Coefficients coef(proxgrad.out)
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 1000 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 100 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # CV ProxGrad - Single Group proxgrad.out <- cv.ProxGrad(x.train, y.train, glm_type = "Logistic", include_intercept = TRUE, alpha_s = 3/4, n_lambda_sparsity = 100, tolerance = 1e-5, max_iter = 1e5) # Coefficients coef(proxgrad.out)
coef.ProxGrad
returns the coefficients for a ProxGrad object.
## S3 method for class 'ProxGrad' coef(object, ...)
## S3 method for class 'ProxGrad' coef(object, ...)
object |
An object of class ProxGrad. |
... |
Additional arguments for compatibility. |
The coefficients for the ProxGrad object.
Anthony-Alexander Christidis, [email protected]
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 1000 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 100 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # ProxGrad - Single Group proxgrad.out <- ProxGrad(x.train, y.train, glm_type = "Logistic", include_intercept = TRUE, alpha_s = 3/4, lambda_sparsity = 0.01, tolerance = 1e-5, max_iter = 1e5) # Coefficients coef(proxgrad.out)
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 1000 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 100 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # ProxGrad - Single Group proxgrad.out <- ProxGrad(x.train, y.train, glm_type = "Logistic", include_intercept = TRUE, alpha_s = 3/4, lambda_sparsity = 0.01, tolerance = 1e-5, max_iter = 1e5) # Coefficients coef(proxgrad.out)
cpg
computes the coefficients for ensembles of generalized linear models via competing proximal gradients.
cpg( x, y, glm_type = c("Linear", "Logistic")[1], G = 5, include_intercept = TRUE, alpha_s = 3/4, alpha_d = 1, lambda_sparsity, lambda_diversity, tolerance = 1e-08, max_iter = 1e+05 )
cpg( x, y, glm_type = c("Linear", "Logistic")[1], G = 5, include_intercept = TRUE, alpha_s = 3/4, alpha_d = 1, lambda_sparsity, lambda_diversity, tolerance = 1e-08, max_iter = 1e+05 )
x |
Design matrix. |
y |
Response vector. |
glm_type |
Description of the error distribution and link function to be used for the model. Must be one of "Linear" or "Logistic". Default is "Linear". |
G |
Number of groups in the ensemble. |
include_intercept |
Argument to determine whether there is an intercept. Default is TRUE. |
alpha_s |
Sparsity mixing parmeter. Default is 3/4. |
alpha_d |
Diversity mixing parameter. Default is 1. |
lambda_sparsity |
Sparsity tuning parameter value. |
lambda_diversity |
Diversity tuning parameter value. |
tolerance |
Convergence criteria for the coefficients. Default is 1e-8. |
max_iter |
Maximum number of iterations in the algorithm. Default is 1e5. |
An object of class cpg
Anthony-Alexander Christidis, [email protected]
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 300 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 150 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # CPGLIB - Multiple Groups cpg.out <- cpg(x.train, y.train, glm_type = "Logistic", G = 5, include_intercept = TRUE, alpha_s = 3/4, alpha_d = 1, lambda_sparsity = 0.01, lambda_diversity = 1, tolerance = 1e-5, max_iter = 1e5) # Predictions cpg.prob <- predict(cpg.out, newx = x.test, type = "prob", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") cpg.class <- predict(cpg.out, newx = x.test, type = "prob", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") plot(prob.test, cpg.prob, pch = 20) abline(h = 0.5,v = 0.5) mean((prob.test-cpg.prob)^2) mean(abs(y.test-cpg.class))
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 300 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 150 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # CPGLIB - Multiple Groups cpg.out <- cpg(x.train, y.train, glm_type = "Logistic", G = 5, include_intercept = TRUE, alpha_s = 3/4, alpha_d = 1, lambda_sparsity = 0.01, lambda_diversity = 1, tolerance = 1e-5, max_iter = 1e5) # Predictions cpg.prob <- predict(cpg.out, newx = x.test, type = "prob", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") cpg.class <- predict(cpg.out, newx = x.test, type = "prob", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") plot(prob.test, cpg.prob, pch = 20) abline(h = 0.5,v = 0.5) mean((prob.test-cpg.prob)^2) mean(abs(y.test-cpg.class))
cv.cpg
computes and cross-validates the coefficients for ensembles of generalized linear models via competing proximal gradients.
cv.cpg( x, y, glm_type = c("Linear", "Logistic")[1], G = 5, full_diversity = FALSE, include_intercept = TRUE, alpha_s = 3/4, alpha_d = 1, n_lambda_sparsity = 100, n_lambda_diversity = 100, tolerance = 1e-08, max_iter = 1e+05, n_folds = 10, n_threads = 1 )
cv.cpg( x, y, glm_type = c("Linear", "Logistic")[1], G = 5, full_diversity = FALSE, include_intercept = TRUE, alpha_s = 3/4, alpha_d = 1, n_lambda_sparsity = 100, n_lambda_diversity = 100, tolerance = 1e-08, max_iter = 1e+05, n_folds = 10, n_threads = 1 )
x |
Design matrix. |
y |
Response vector. |
glm_type |
Description of the error distribution and link function to be used for the model. Must be one of "Linear" or "Logistic". Default is "Linear". |
G |
Number of groups in the ensemble. |
full_diversity |
Argument to determine if the overlap between the models should be zero. Default is FALSE. |
include_intercept |
Argument to determine whether there is an intercept. Default is TRUE. |
alpha_s |
Sparsity mixing parmeter. Default is 3/4. |
alpha_d |
Diversity mixing parameter. Default is 1. |
n_lambda_sparsity |
Number of candidates for sparsity tuning parameter. Default is 100. |
n_lambda_diversity |
Number of candidates for diveristy tuning parameter. Default is 100. |
tolerance |
Convergence criteria for the coefficients. Default is 1e-8. |
max_iter |
Maximum number of iterations in the algorithm. Default is 1e5. |
n_folds |
Number of cross-validation folds. Default is 10. |
n_threads |
Number of threads. Default is a single thread. |
An object of class cv.cpg
Anthony-Alexander Christidis, [email protected]
coef.cv.CPGLIB
, predict.cv.CPGLIB
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 300 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 150 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # CV CPGLIB - Multiple Groups cpg.out <- cv.cpg(x.train, y.train, glm_type = "Logistic", G = 5, include_intercept = TRUE, alpha_s = 3/4, alpha_d = 1, n_lambda_sparsity = 100, n_lambda_diversity = 100, tolerance = 1e-5, max_iter = 1e5) # Predictions cpg.prob <- predict(cpg.out, newx = x.test, type = "prob", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") cpg.class <- predict(cpg.out, newx = x.test, type = "class", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") plot(prob.test, cpg.prob, pch = 20) abline(h = 0.5,v = 0.5) mean((prob.test-cpg.prob)^2) mean(abs(y.test-cpg.class))
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 300 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 150 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # CV CPGLIB - Multiple Groups cpg.out <- cv.cpg(x.train, y.train, glm_type = "Logistic", G = 5, include_intercept = TRUE, alpha_s = 3/4, alpha_d = 1, n_lambda_sparsity = 100, n_lambda_diversity = 100, tolerance = 1e-5, max_iter = 1e5) # Predictions cpg.prob <- predict(cpg.out, newx = x.test, type = "prob", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") cpg.class <- predict(cpg.out, newx = x.test, type = "class", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") plot(prob.test, cpg.prob, pch = 20) abline(h = 0.5,v = 0.5) mean((prob.test-cpg.prob)^2) mean(abs(y.test-cpg.class))
cv.ProxGrad
computes and cross-validates the coefficients for generalized linear models using proximal gradients.
cv.ProxGrad( x, y, glm_type = c("Linear", "Logistic")[1], include_intercept = TRUE, alpha_s = 3/4, n_lambda_sparsity = 100, tolerance = 1e-08, max_iter = 1e+05, n_folds = 10, n_threads = 1 )
cv.ProxGrad( x, y, glm_type = c("Linear", "Logistic")[1], include_intercept = TRUE, alpha_s = 3/4, n_lambda_sparsity = 100, tolerance = 1e-08, max_iter = 1e+05, n_folds = 10, n_threads = 1 )
x |
Design matrix. |
y |
Response vector. |
glm_type |
Description of the error distribution and link function to be used for the model. Must be one of "Linear" or "Logistic". Default is "Linear". |
include_intercept |
Argument to determine whether there is an intercept. Default is TRUE. |
alpha_s |
Elastic net mixing parmeter. Default is 3/4. |
n_lambda_sparsity |
Sparsity tuning parameter value. Default is 100. |
tolerance |
Convergence criteria for the coefficients. Default is 1e-8. |
max_iter |
Maximum number of iterations in the algorithm. Default is 1e5. |
n_folds |
Number of cross-validation folds. Default is 10. |
n_threads |
Number of threads. Default is a single thread. |
An object of class cv.ProxGrad
Anthony-Alexander Christidis, [email protected]
coef.cv.ProxGrad
, predict.cv.ProxGrad
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 1000 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 100 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # ProxGrad - Single Groups proxgrad.out <- cv.ProxGrad(x.train, y.train, glm_type = "Logistic", include_intercept = TRUE, alpha_s = 3/4, n_lambda_sparsity = 100, tolerance = 1e-5, max_iter = 1e5) # Predictions proxgrad.prob <- predict(proxgrad.out, newx = x.test, type = "prob") proxgrad.class <- predict(proxgrad.out, newx = x.test, type = "class") plot(prob.test, proxgrad.prob, pch = 20) abline(h = 0.5,v = 0.5) mean((prob.test-proxgrad.prob)^2) mean(abs(y.test-proxgrad.class))
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 1000 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 100 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # ProxGrad - Single Groups proxgrad.out <- cv.ProxGrad(x.train, y.train, glm_type = "Logistic", include_intercept = TRUE, alpha_s = 3/4, n_lambda_sparsity = 100, tolerance = 1e-5, max_iter = 1e5) # Predictions proxgrad.prob <- predict(proxgrad.out, newx = x.test, type = "prob") proxgrad.class <- predict(proxgrad.out, newx = x.test, type = "class") plot(prob.test, proxgrad.prob, pch = 20) abline(h = 0.5,v = 0.5) mean((prob.test-proxgrad.prob)^2) mean(abs(y.test-proxgrad.class))
predict.CPGLIB
returns the predictions for a CPGLIB object.
## S3 method for class 'CPGLIB' predict( object, newx, groups = NULL, ensemble_type = c("Model-Avg", "Coef-Avg", "Weighted-Prob", "Majority-Vote")[1], class_type = c("prob", "class")[1], ... )
## S3 method for class 'CPGLIB' predict( object, newx, groups = NULL, ensemble_type = c("Model-Avg", "Coef-Avg", "Weighted-Prob", "Majority-Vote")[1], class_type = c("prob", "class")[1], ... )
object |
An object of class CPGLIB. |
newx |
New data for predictions. |
groups |
The groups in the ensemble for the predictions. Default is all of the groups in the ensemble. |
ensemble_type |
The type of ensembling function for the models. Options are "Model-Avg", "Coef-Avg" or "Weighted-Prob" for classifications predictions. Default is "Model-Avg". |
class_type |
The type of predictions for classification. Options are "prob" and "class". Default is "prob". |
... |
Additional arguments for compatibility. |
The predictions for the CPGLIB object.
Anthony-Alexander Christidis, [email protected]
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 300 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 150 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # CPGLIB - Multiple Groups cpg.out <- cpg(x.train, y.train, glm_type = "Logistic", G = 5, include_intercept = TRUE, alpha_s = 3/4, alpha_d = 1, lambda_sparsity = 0.01, lambda_diversity = 1, tolerance = 1e-5, max_iter = 1e5) # Predictions cpg.prob <- predict(cpg.out, newx = x.test, type = "prob", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") cpg.class <- predict(cpg.out, newx = x.test, type = "prob", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") plot(prob.test, cpg.prob, pch=20) abline(h=0.5,v=0.5) mean((prob.test-cpg.prob)^2) mean(abs(y.test-cpg.class))
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 300 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 150 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # CPGLIB - Multiple Groups cpg.out <- cpg(x.train, y.train, glm_type = "Logistic", G = 5, include_intercept = TRUE, alpha_s = 3/4, alpha_d = 1, lambda_sparsity = 0.01, lambda_diversity = 1, tolerance = 1e-5, max_iter = 1e5) # Predictions cpg.prob <- predict(cpg.out, newx = x.test, type = "prob", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") cpg.class <- predict(cpg.out, newx = x.test, type = "prob", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") plot(prob.test, cpg.prob, pch=20) abline(h=0.5,v=0.5) mean((prob.test-cpg.prob)^2) mean(abs(y.test-cpg.class))
predict.cv.CPGLIB
returns the predictions for a ProxGrad object.
## S3 method for class 'cv.CPGLIB' predict( object, newx, groups = NULL, ensemble_type = c("Model-Avg", "Coef-Avg", "Weighted-Prob", "Majority-Vote")[1], class_type = c("prob", "class")[1], ... )
## S3 method for class 'cv.CPGLIB' predict( object, newx, groups = NULL, ensemble_type = c("Model-Avg", "Coef-Avg", "Weighted-Prob", "Majority-Vote")[1], class_type = c("prob", "class")[1], ... )
object |
An object of class cv.CPGLIB. |
newx |
New data for predictions. |
groups |
The groups in the ensemble for the predictions. Default is all of the groups in the ensemble. |
ensemble_type |
The type of ensembling function for the models. Options are "Model-Avg", "Coef-Avg" or "Weighted-Prob" for classifications predictions. Default is "Model-Avg". |
class_type |
The type of predictions for classification. Options are "prob" and "class". Default is "prob". |
... |
Additional arguments for compatibility. |
The predictions for the cv.CPGLIB object.
Anthony-Alexander Christidis, [email protected]
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 300 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 150 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) mean(y.test) # CV CPGLIB - Multiple Groups cpg.out <- cv.cpg(x.train, y.train, glm_type = "Logistic", G = 5, include_intercept = TRUE, alpha_s = 3/4, alpha_d = 1, n_lambda_sparsity = 100, n_lambda_diversity = 100, tolerance = 1e-5, max_iter = 1e5) # Predictions cpg.prob <- predict(cpg.out, newx = x.test, type = "prob", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") cpg.class <- predict(cpg.out, newx = x.test, type = "class", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") plot(prob.test, cpg.prob, pch = 20) abline(h = 0.5,v = 0.5) mean((prob.test-cpg.prob)^2) mean(abs(y.test-cpg.class))
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 300 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 150 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) mean(y.test) # CV CPGLIB - Multiple Groups cpg.out <- cv.cpg(x.train, y.train, glm_type = "Logistic", G = 5, include_intercept = TRUE, alpha_s = 3/4, alpha_d = 1, n_lambda_sparsity = 100, n_lambda_diversity = 100, tolerance = 1e-5, max_iter = 1e5) # Predictions cpg.prob <- predict(cpg.out, newx = x.test, type = "prob", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") cpg.class <- predict(cpg.out, newx = x.test, type = "class", groups = 1:cpg.out$G, ensemble_type = "Model-Avg") plot(prob.test, cpg.prob, pch = 20) abline(h = 0.5,v = 0.5) mean((prob.test-cpg.prob)^2) mean(abs(y.test-cpg.class))
predict.cv.ProxGrad
returns the predictions for a ProxGrad object.
## S3 method for class 'cv.ProxGrad' predict(object, newx, type = c("prob", "class")[1], ...)
## S3 method for class 'cv.ProxGrad' predict(object, newx, type = c("prob", "class")[1], ...)
object |
An object of class cv.ProxGrad. |
newx |
New data for predictions. |
type |
The type of predictions for binary response. Options are "prob" (default) and "class". |
... |
Additional arguments for compatibility. |
The predictions for the cv.ProxGrad object.
Anthony-Alexander Christidis, [email protected]
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 1000 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 100 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # CV ProxGrad - Single Group proxgrad.out <- cv.ProxGrad(x.train, y.train, glm_type = "Logistic", include_intercept = TRUE, alpha_s = 3/4, n_lambda_sparsity = 100, tolerance = 1e-5, max_iter = 1e5) # Predictions proxgrad.prob <- predict(proxgrad.out, newx = x.test, type = "prob") proxgrad.class <- predict(proxgrad.out, newx = x.test, type = "class") plot(prob.test, proxgrad.prob, pch = 20) abline(h = 0.5,v = 0.5) mean((prob.test-proxgrad.prob)^2) mean(abs(y.test-proxgrad.class))
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 1000 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 100 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # CV ProxGrad - Single Group proxgrad.out <- cv.ProxGrad(x.train, y.train, glm_type = "Logistic", include_intercept = TRUE, alpha_s = 3/4, n_lambda_sparsity = 100, tolerance = 1e-5, max_iter = 1e5) # Predictions proxgrad.prob <- predict(proxgrad.out, newx = x.test, type = "prob") proxgrad.class <- predict(proxgrad.out, newx = x.test, type = "class") plot(prob.test, proxgrad.prob, pch = 20) abline(h = 0.5,v = 0.5) mean((prob.test-proxgrad.prob)^2) mean(abs(y.test-proxgrad.class))
predict.ProxGrad
returns the predictions for a ProxGrad object.
## S3 method for class 'ProxGrad' predict(object, newx, type = c("prob", "class")[1], ...)
## S3 method for class 'ProxGrad' predict(object, newx, type = c("prob", "class")[1], ...)
object |
An object of class ProxGrad |
newx |
New data for predictions. |
type |
The type of predictions for binary response. Options are "prob" (default) and "class". |
... |
Additional arguments for compatibility. |
The predictions for the ProxGrad object.
Anthony-Alexander Christidis, [email protected]
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 1000 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 100 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # ProxGrad - Single Group proxgrad.out <- ProxGrad(x.train, y.train, glm_type = "Logistic", include_intercept = TRUE, alpha_s = 3/4, lambda_sparsity = 0.01, tolerance = 1e-5, max_iter = 1e5) # Predictions proxgrad.prob <- predict(proxgrad.out, newx = x.test, type = "prob") proxgrad.class <- predict(proxgrad.out, newx = x.test, type = "class") plot(prob.test, proxgrad.prob, pch = 20) abline(h = 0.5,v = 0.5) mean((prob.test-proxgrad.prob)^2) mean(abs(y.test-proxgrad.class))
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 1000 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 100 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # ProxGrad - Single Group proxgrad.out <- ProxGrad(x.train, y.train, glm_type = "Logistic", include_intercept = TRUE, alpha_s = 3/4, lambda_sparsity = 0.01, tolerance = 1e-5, max_iter = 1e5) # Predictions proxgrad.prob <- predict(proxgrad.out, newx = x.test, type = "prob") proxgrad.class <- predict(proxgrad.out, newx = x.test, type = "class") plot(prob.test, proxgrad.prob, pch = 20) abline(h = 0.5,v = 0.5) mean((prob.test-proxgrad.prob)^2) mean(abs(y.test-proxgrad.class))
ProxGrad
computes the coefficients for generalized linear models using proximal gradients.
ProxGrad( x, y, glm_type = c("Linear", "Logistic")[1], include_intercept = TRUE, alpha_s = 3/4, lambda_sparsity, tolerance = 1e-08, max_iter = 1e+05 )
ProxGrad( x, y, glm_type = c("Linear", "Logistic")[1], include_intercept = TRUE, alpha_s = 3/4, lambda_sparsity, tolerance = 1e-08, max_iter = 1e+05 )
x |
Design matrix. |
y |
Response vector. |
glm_type |
Description of the error distribution and link function to be used for the model. Must be one of "Linear" or "Logistic" . Default is "Linear". |
include_intercept |
Argument to determine whether there is an intercept. Default is TRUE. |
alpha_s |
Elastic net mixing parmeter. Default is 3/4. |
lambda_sparsity |
Sparsity tuning parameter value. |
tolerance |
Convergence criteria for the coefficients. Default is 1e-8. |
max_iter |
Maximum number of iterations in the algorithm. Default is 1e5. |
An object of class ProxGrad.
Anthony-Alexander Christidis, [email protected]
coef.ProxGrad
, predict.ProxGrad
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 1000 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 100 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # ProxGrad - Single Group proxgrad.out <- ProxGrad(x.train, y.train, glm_type = "Logistic", include_intercept = TRUE, alpha_s = 3/4, lambda_sparsity = 0.01, tolerance = 1e-5, max_iter = 1e5) # Predictions proxgrad.prob <- predict(proxgrad.out, newx = x.test, type = "prob") proxgrad.class <- predict(proxgrad.out, newx = x.test, type = "class") plot(prob.test, proxgrad.prob, pch = 20) abline(h = 0.5,v = 0.5) mean((prob.test-proxgrad.prob)^2) mean(abs(y.test-proxgrad.class))
# Data simulation set.seed(1) n <- 50 N <- 2000 p <- 1000 beta.active <- c(abs(runif(p, 0, 1/2))*(-1)^rbinom(p, 1, 0.3)) # Parameters p.active <- 100 beta <- c(beta.active[1:p.active], rep(0, p-p.active)) Sigma <- matrix(0, p, p) Sigma[1:p.active, 1:p.active] <- 0.5 diag(Sigma) <- 1 # Train data x.train <- mvnfast::rmvn(n, mu = rep(0, p), sigma = Sigma) prob.train <- exp(x.train %*% beta)/ (1+exp(x.train %*% beta)) y.train <- rbinom(n, 1, prob.train) # Test data x.test <- mvnfast::rmvn(N, mu = rep(0, p), sigma = Sigma) prob.test <- exp(x.test %*% beta)/ (1+exp(x.test %*% beta)) y.test <- rbinom(N, 1, prob.test) # ProxGrad - Single Group proxgrad.out <- ProxGrad(x.train, y.train, glm_type = "Logistic", include_intercept = TRUE, alpha_s = 3/4, lambda_sparsity = 0.01, tolerance = 1e-5, max_iter = 1e5) # Predictions proxgrad.prob <- predict(proxgrad.out, newx = x.test, type = "prob") proxgrad.class <- predict(proxgrad.out, newx = x.test, type = "class") plot(prob.test, proxgrad.prob, pch = 20) abline(h = 0.5,v = 0.5) mean((prob.test-proxgrad.prob)^2) mean(abs(y.test-proxgrad.class))