Title: | Optimal Testing |
---|---|
Description: | Optimal testing under general dependence. The R package implements procedures proposed in Wang, Han, and Tong (2022). The package includes parameter estimation procedures, the computation for the posterior probabilities, and the testing procedure. |
Authors: | Lijia Wang [aut, cre, cph], Xu Han [aut], Xin Tong [aut] |
Maintainer: | Lijia Wang <[email protected]> |
License: | GPL-2 |
Version: | 1.0.0 |
Built: | 2024-10-29 06:19:04 UTC |
Source: | CRAN |
Estimate the parameters in the three-part mixture
AEB( Z, Sigma, eig = eigs_sym(Sigma, min(400, length(Z)), which = "LM"), eig_tol = 1, set_nu = c(0), set1 = c(0:10) * 0.01 + 0.01, set2 = c(0:10) * 0.01 + 0.01, setp = c(1:7) * 0.1 )
AEB( Z, Sigma, eig = eigs_sym(Sigma, min(400, length(Z)), which = "LM"), eig_tol = 1, set_nu = c(0), set1 = c(0:10) * 0.01 + 0.01, set2 = c(0:10) * 0.01 + 0.01, setp = c(1:7) * 0.1 )
Z |
a vector of test statistics |
Sigma |
covariance matrix |
eig |
eig value information |
eig_tol |
the smallest eigen value used in the calulation |
set_nu |
a search region for nu_0 |
set1 |
a search region for tau_sqr_1 |
set2 |
a search region for tau_sqr_2 |
setp |
a search region for proportion |
Estimate the parameters in the three-part mixture
where
The return of the function is a list in the form of list(nu_0, tau_sqr_1, tau_sqr_2, pi_0, pi_1, pi_2, mu_1, mu_2, Z_hat).
nu_0, tau_sqr_1, tau_sqr_2: The best combination of that minimize the total variance from the regions
.
pi_0, pi_1, pi_2, mu_1, mu_2: The estimates of parameters .
Z_hat: A vector of simulated data base on the parameter estimates.
p = 500 n_col = 10 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix Z = rnorm(p,0,1) #this is just an example for testing the algorithm. #Not true test statistics with respect to Sigma best_set = AEB(Z,Sigma) print(c(best_set$pi_0, best_set$pi_1, best_set$pi_2, best_set$mu_1, best_set$mu_2)) library(MASS) ###################################### #construct a test statistic vector Z p = 1000 n_col = 4 pi_0 = 0.6 pi_1 = 0.2 pi_2 = 0.2 nu_0 = 0 mu_1 = -1.5 mu_2 = 1.5 tau_sqr_1 = 0.1 tau_sqr_2 = 0.1 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix b = rmultinom(p, size = 1, prob = c(pi_0,pi_1,pi_2)) ui = b[1,]*nu_0 + b[2,]*rnorm(p, mean = mu_1, sd = sqrt(tau_sqr_1)) + b[3,]*rnorm(p, mean = mu_2, sd = sqrt(tau_sqr_2)) # actual situation Z = mvrnorm(n = 1,ui, Sigma, tol = 1e-6, empirical = FALSE, EISPACK = FALSE) best_set =AEB(Z,Sigma) print(c(best_set$pi_0, best_set$pi_1, best_set$pi_2, best_set$mu_1, best_set$mu_2))
p = 500 n_col = 10 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix Z = rnorm(p,0,1) #this is just an example for testing the algorithm. #Not true test statistics with respect to Sigma best_set = AEB(Z,Sigma) print(c(best_set$pi_0, best_set$pi_1, best_set$pi_2, best_set$mu_1, best_set$mu_2)) library(MASS) ###################################### #construct a test statistic vector Z p = 1000 n_col = 4 pi_0 = 0.6 pi_1 = 0.2 pi_2 = 0.2 nu_0 = 0 mu_1 = -1.5 mu_2 = 1.5 tau_sqr_1 = 0.1 tau_sqr_2 = 0.1 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix b = rmultinom(p, size = 1, prob = c(pi_0,pi_1,pi_2)) ui = b[1,]*nu_0 + b[2,]*rnorm(p, mean = mu_1, sd = sqrt(tau_sqr_1)) + b[3,]*rnorm(p, mean = mu_2, sd = sqrt(tau_sqr_2)) # actual situation Z = mvrnorm(n = 1,ui, Sigma, tol = 1e-6, empirical = FALSE, EISPACK = FALSE) best_set =AEB(Z,Sigma) print(c(best_set$pi_0, best_set$pi_1, best_set$pi_2, best_set$mu_1, best_set$mu_2))
Calculating the estimates for
d_value( Z, Sigma, best_set = AEB(Z, Sigma), eig = eigs_sym(Sigma, min(400, length(Z)), which = "LM"), sim_size = 3000, eig_value = 0.35 )
d_value( Z, Sigma, best_set = AEB(Z, Sigma), eig = eigs_sym(Sigma, min(400, length(Z)), which = "LM"), sim_size = 3000, eig_value = 0.35 )
Z |
a vector of test statistics |
Sigma |
covariance matrix |
best_set |
a list of parameters (list(nu_0 = ..., tau_sqr_1 = ..., tau_sqr_2 = ..., pi_0 = ..., pi_1= ..., pi_2 = ..., mu_1 = ..., mu_2 = ...)) or returns from AEB |
eig |
eig value information |
sim_size |
simulation size |
eig_value |
the smallest eigen value used in the calulation |
a vector of estimates for
p = 500 n_col = 10 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix Z = rnorm(p,0,1) #this is just an example for testing the algorithm. #Not true test statistics with respect to Sigma d_value(Z,Sigma,sim_size = 5) #To save time, we set the simulation size to be 10, but the default value is much better. library(MASS) ###################################### #construct a test statistic vector Z p = 1000 n_col = 4 pi_0 = 0.6 pi_1 = 0.2 pi_2 = 0.2 nu_0 = 0 mu_1 = -1.5 mu_2 = 1.5 tau_sqr_1 = 0.1 tau_sqr_2 = 0.1 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix b = rmultinom(p, size = 1, prob = c(pi_0,pi_1,pi_2)) ui = b[1,]*nu_0 + b[2,]*rnorm(p, mean = mu_1, sd = sqrt(tau_sqr_1)) + b[3,]*rnorm(p, mean = mu_2, sd = sqrt(tau_sqr_2)) # actual situation Z = mvrnorm(n = 1,ui, Sigma, tol = 1e-6, empirical = FALSE, EISPACK = FALSE) d_value(Z,Sigma)
p = 500 n_col = 10 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix Z = rnorm(p,0,1) #this is just an example for testing the algorithm. #Not true test statistics with respect to Sigma d_value(Z,Sigma,sim_size = 5) #To save time, we set the simulation size to be 10, but the default value is much better. library(MASS) ###################################### #construct a test statistic vector Z p = 1000 n_col = 4 pi_0 = 0.6 pi_1 = 0.2 pi_2 = 0.2 nu_0 = 0 mu_1 = -1.5 mu_2 = 1.5 tau_sqr_1 = 0.1 tau_sqr_2 = 0.1 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix b = rmultinom(p, size = 1, prob = c(pi_0,pi_1,pi_2)) ui = b[1,]*nu_0 + b[2,]*rnorm(p, mean = mu_1, sd = sqrt(tau_sqr_1)) + b[3,]*rnorm(p, mean = mu_2, sd = sqrt(tau_sqr_2)) # actual situation Z = mvrnorm(n = 1,ui, Sigma, tol = 1e-6, empirical = FALSE, EISPACK = FALSE) d_value(Z,Sigma)
False discovery proportion and False non-discovery proportion computation
FDP_compute(decision, ui, positive)
FDP_compute(decision, ui, positive)
decision |
returns from the function Optimal_procedure_3 |
ui |
true mean vector |
positive |
TRUE/FALSE valued. TRUE: H0: ui no greater than 0. FALSE: H0: ui no less than 0. |
False discovery proportion (FDP) and False non-discovery proportion (FNP)
ui = rnorm(10,0,1) #assume this is true parameter decision = rbinom(10,1, 0.4) #assume this is decision vector FDP_compute(decision,ui,TRUE) library(MASS) ###################################### #construct a test statistic vector Z p = 1000 n_col = 4 pi_0 = 0.6 pi_1 = 0.2 pi_2 = 0.2 nu_0 = 0 mu_1 = -1.5 mu_2 = 1.5 tau_sqr_1 = 0.1 tau_sqr_2 = 0.1 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix b = rmultinom(p, size = 1, prob = c(pi_0,pi_1,pi_2)) ui = b[1,]*nu_0 + b[2,]*rnorm(p, mean = mu_1, sd = sqrt(tau_sqr_1)) + b[3,]*rnorm(p, mean = mu_2, sd = sqrt(tau_sqr_2)) # actual situation Z = mvrnorm(n = 1,ui, Sigma, tol = 1e-6, empirical = FALSE, EISPACK = FALSE) prob_p = d_value(Z,Sigma) #decision level = 0.1 #significance level decision_p = Optimal_procedure_3(prob_p,level) FDP_compute(decision_p$ai,ui,TRUE)
ui = rnorm(10,0,1) #assume this is true parameter decision = rbinom(10,1, 0.4) #assume this is decision vector FDP_compute(decision,ui,TRUE) library(MASS) ###################################### #construct a test statistic vector Z p = 1000 n_col = 4 pi_0 = 0.6 pi_1 = 0.2 pi_2 = 0.2 nu_0 = 0 mu_1 = -1.5 mu_2 = 1.5 tau_sqr_1 = 0.1 tau_sqr_2 = 0.1 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix b = rmultinom(p, size = 1, prob = c(pi_0,pi_1,pi_2)) ui = b[1,]*nu_0 + b[2,]*rnorm(p, mean = mu_1, sd = sqrt(tau_sqr_1)) + b[3,]*rnorm(p, mean = mu_2, sd = sqrt(tau_sqr_2)) # actual situation Z = mvrnorm(n = 1,ui, Sigma, tol = 1e-6, empirical = FALSE, EISPACK = FALSE) prob_p = d_value(Z,Sigma) #decision level = 0.1 #significance level decision_p = Optimal_procedure_3(prob_p,level) FDP_compute(decision_p$ai,ui,TRUE)
Calculating the estimates for
l_value( Z, Sigma, best_set = AEB(Z, Sigma), eig = eigs_sym(Sigma, min(400, length(Z)), which = "LM"), sim_size = 3000, eig_value = 0.35 )
l_value( Z, Sigma, best_set = AEB(Z, Sigma), eig = eigs_sym(Sigma, min(400, length(Z)), which = "LM"), sim_size = 3000, eig_value = 0.35 )
Z |
a vector of test statistics |
Sigma |
covariance matrix |
best_set |
a list of parameters (list(nu_0 = ..., tau_sqr_1 = ..., tau_sqr_2 = ..., pi_0 = ..., pi_1= ..., pi_2 = ..., mu_1 = ..., mu_2 = ...)) or returns from Fund_parameter_estimation |
eig |
eig value information |
sim_size |
simulation size |
eig_value |
the smallest eigen value used in the calulation |
a vector of estimates for
p = 500 n_col = 10 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix Z = rnorm(p,0,1) #this is just an example for testing the algorithm. #Not true test statistics with respect to Sigma l_value(Z,Sigma,sim_size = 5) #To save time, we set the simulation size to be 10, but the default value is much better. library(MASS) ###################################### #construct a test statistic vector Z p = 1000 n_col = 4 pi_0 = 0.6 pi_1 = 0.2 pi_2 = 0.2 nu_0 = 0 mu_1 = -1.5 mu_2 = 1.5 tau_sqr_1 = 0.1 tau_sqr_2 = 0.1 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix b = rmultinom(p, size = 1, prob = c(pi_0,pi_1,pi_2)) ui = b[1,]*nu_0 + b[2,]*rnorm(p, mean = mu_1, sd = sqrt(tau_sqr_1)) + b[3,]*rnorm(p, mean = mu_2, sd = sqrt(tau_sqr_2)) # actual situation Z = mvrnorm(n = 1,ui, Sigma, tol = 1e-6, empirical = FALSE, EISPACK = FALSE) l_value(Z,Sigma)
p = 500 n_col = 10 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix Z = rnorm(p,0,1) #this is just an example for testing the algorithm. #Not true test statistics with respect to Sigma l_value(Z,Sigma,sim_size = 5) #To save time, we set the simulation size to be 10, but the default value is much better. library(MASS) ###################################### #construct a test statistic vector Z p = 1000 n_col = 4 pi_0 = 0.6 pi_1 = 0.2 pi_2 = 0.2 nu_0 = 0 mu_1 = -1.5 mu_2 = 1.5 tau_sqr_1 = 0.1 tau_sqr_2 = 0.1 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix b = rmultinom(p, size = 1, prob = c(pi_0,pi_1,pi_2)) ui = b[1,]*nu_0 + b[2,]*rnorm(p, mean = mu_1, sd = sqrt(tau_sqr_1)) + b[3,]*rnorm(p, mean = mu_2, sd = sqrt(tau_sqr_2)) # actual situation Z = mvrnorm(n = 1,ui, Sigma, tol = 1e-6, empirical = FALSE, EISPACK = FALSE) l_value(Z,Sigma)
decision process
Optimal_procedure_3(prob_0, alpha)
Optimal_procedure_3(prob_0, alpha)
prob_0 |
d-values or l-values |
alpha |
significance level |
ai: a vector of decisions. (1 indicates rejection)
cj: The number of rejections
FDR_hat: The estimated false discovery rate (FDR).
FNR_hat: The estimated false non-discovery rate (FNR).
prob = runif(100,0,1) #assume this is the posterior probability vector level = 0.3 #the significance level Optimal_procedure_3(prob,level) library(MASS) ###################################### #construct a test statistic vector Z p = 1000 n_col = 4 pi_0 = 0.6 pi_1 = 0.2 pi_2 = 0.2 nu_0 = 0 mu_1 = -1.5 mu_2 = 1.5 tau_sqr_1 = 0.1 tau_sqr_2 = 0.1 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix b = rmultinom(p, size = 1, prob = c(pi_0,pi_1,pi_2)) ui = b[1,]*nu_0 + b[2,]*rnorm(p, mean = mu_1, sd = sqrt(tau_sqr_1)) + b[3,]*rnorm(p, mean = mu_2, sd = sqrt(tau_sqr_2)) # actual situation Z = mvrnorm(n = 1,ui, Sigma, tol = 1e-6, empirical = FALSE, EISPACK = FALSE) prob_p = d_value(Z,Sigma) #decision level = 0.1 #significance level decision_p = Optimal_procedure_3(prob_p,level) decision_p$cj head(decision_p$ai)
prob = runif(100,0,1) #assume this is the posterior probability vector level = 0.3 #the significance level Optimal_procedure_3(prob,level) library(MASS) ###################################### #construct a test statistic vector Z p = 1000 n_col = 4 pi_0 = 0.6 pi_1 = 0.2 pi_2 = 0.2 nu_0 = 0 mu_1 = -1.5 mu_2 = 1.5 tau_sqr_1 = 0.1 tau_sqr_2 = 0.1 A = matrix(rnorm(p*n_col,0,1), nrow = p, ncol = n_col, byrow = TRUE) Sigma = A %*% t(A) +diag(p) Sigma = cov2cor(Sigma) #covariance matrix b = rmultinom(p, size = 1, prob = c(pi_0,pi_1,pi_2)) ui = b[1,]*nu_0 + b[2,]*rnorm(p, mean = mu_1, sd = sqrt(tau_sqr_1)) + b[3,]*rnorm(p, mean = mu_2, sd = sqrt(tau_sqr_2)) # actual situation Z = mvrnorm(n = 1,ui, Sigma, tol = 1e-6, empirical = FALSE, EISPACK = FALSE) prob_p = d_value(Z,Sigma) #decision level = 0.1 #significance level decision_p = Optimal_procedure_3(prob_p,level) decision_p$cj head(decision_p$ai)