Title: | Coarsened Mixtures of Hierarchical Skew Kernels |
---|---|
Description: | Bayesian fit of a Dirichlet Process Mixture with hierarchical multivariate skew normal kernels and coarsened posteriors. For more information, see Gorsky, Chan and Ma (2020) <arXiv:2001.06451>. |
Authors: | S. Gorsky [aut, cre], C. Chan [ctb], L. Ma [ctb] |
Maintainer: | S. Gorsky <[email protected]> |
License: | CC0 |
Version: | 1.0.0 |
Built: | 2024-12-30 08:25:18 UTC |
Source: | CRAN |
The function computes (and by default plots) estimates of the autocovariance or autocorrelation function for the different parameters of the model. This is a wrapper for coda::acf.
acfParams( res, params = c("w", "xi", "xi0", "psi", "G", "E", "eta"), only_non_trivial_clusters = TRUE, lag.max = NULL, type = c("correlation", "covariance", "partial"), plot = TRUE, ... )
acfParams( res, params = c("w", "xi", "xi0", "psi", "G", "E", "eta"), only_non_trivial_clusters = TRUE, lag.max = NULL, type = c("correlation", "covariance", "partial"), plot = TRUE, ... )
res |
An object of class |
params |
A character vector naming the parameters to compute and plot the autocorrelation plots for. |
only_non_trivial_clusters |
Logical, if |
lag.max |
maximum lag at which to calculate the autocorrelation. See more details at ?acf. |
type |
Character string giving the type of autocorrelation to be computed. See more details at ?acf. |
plot |
Logical. If |
... |
Other arguments passed to |
An acfParamsCOMIX
object which is a named list,
with a named element for each requested parameter. Each element is
an object of class acf
(from the coda
package).
#' @examples
library(COMIX)
# Number of observations for each sample (row) and cluster (column):
njk <-
matrix(
c(
150, 300,
250, 200
),
nrow = 2,
byrow = TRUE
)
# Dimension of data: p <- 3
# Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p)
# Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p)
# Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) )
C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2]))
prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior)
# Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) effssz <- effectiveSampleSize(res_relab, "w") # Or: tidy_chain <- tidyChain(res_relab, "w") acf_w <- acfParams(tidy_chain, "w")
# (see vignette for a more detailed example)
This function aligns multiple samples so that their location parameters are equal.
calibrate(x, reference.group = NULL)
calibrate(x, reference.group = NULL)
x |
An object of class COMIX. |
reference.group |
An integer between 1 and the number of groups in the data
( |
A named list of 3:
Y_cal
: a nrow(x$data$Y)
ncol(x$data$Y)
matrix, a calibrated version of the original data.
calibration_distribution
: an x$pmc$nsave
ncol(x$data$Y)
nrow(x$data$Y)
array storing the
difference between the estimated sample-specific location parameter and the group
location parameter for each saved step of the chain.
calibration_median
: a nrow(x$data$Y)
ncol(x$data$Y)
matrix storing the median difference between the estimated sample-specific location parameter and the group
location parameter for each saved step of the chain. This matrix is equal to the
difference between the uncalibrated data (x$data$Y
) and the calibrated
data (Y_cal
).
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) # Generate calibrated data: cal <- calibrateNoDist(res_relab) # Compare raw and calibrated data: (see plot in vignette) # par(mfrow=c(1, 2)) # plot(Y, col = C, xlim = range(Y[,1]), ylim = range(Y[,2]) ) # Get posterior estimates for the model parameters: res_summary <- summarizeChain(res_relab) # Check for instance, the cluster assignment labels: table(res_summary$t) # Indeed the same as colSums(njk) # Or examine the skewness parameter for the non-trivial clusters: res_summary$alpha[ , unique(res_summary$t)] # And compare those to cbind(alpha1, alpha2) # (see vignette for a more detailed example)
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) # Generate calibrated data: cal <- calibrateNoDist(res_relab) # Compare raw and calibrated data: (see plot in vignette) # par(mfrow=c(1, 2)) # plot(Y, col = C, xlim = range(Y[,1]), ylim = range(Y[,2]) ) # Get posterior estimates for the model parameters: res_summary <- summarizeChain(res_relab) # Check for instance, the cluster assignment labels: table(res_summary$t) # Indeed the same as colSums(njk) # Or examine the skewness parameter for the non-trivial clusters: res_summary$alpha[ , unique(res_summary$t)] # And compare those to cbind(alpha1, alpha2) # (see vignette for a more detailed example)
This function aligns multiple samples so that their location parameters are equal.
calibrateNoDist(x, reference.group = NULL)
calibrateNoDist(x, reference.group = NULL)
x |
An object of class COMIX. |
reference.group |
An integer between 1 and the number of groups in the data
( |
A named list of 2:
Y_cal
: a nrow(x$data$Y)
ncol(x$data$Y)
matrix, a calibrated version of the original data.
calibration_median
: a nrow(x$data$Y)
ncol(x$data$Y)
matrix storing the median difference between the estimated sample-specific location parameter and the group
location parameter for each saved step of the chain. This matrix is equal to the
difference between the uncalibrated data (x$data$Y
) and the calibrated
data (Y_cal
).
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) # Generate calibrated data: cal <- calibrateNoDist(res_relab) # Compare raw and calibrated data: (see plot in vignette) # par(mfrow=c(1, 2)) # plot(Y, col = C, xlim = range(Y[,1]), ylim = range(Y[,2]) ) # Get posterior estimates for the model parameters: res_summary <- summarizeChain(res_relab) # Check for instance, the cluster assignment labels: table(res_summary$t) # Indeed the same as colSums(njk) # Or examine the skewness parameter for the non-trivial clusters: res_summary$alpha[ , unique(res_summary$t)] # And compare those to cbind(alpha1, alpha2) # (see vignette for a more detailed example)
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) # Generate calibrated data: cal <- calibrateNoDist(res_relab) # Compare raw and calibrated data: (see plot in vignette) # par(mfrow=c(1, 2)) # plot(Y, col = C, xlim = range(Y[,1]), ylim = range(Y[,2]) ) # Get posterior estimates for the model parameters: res_summary <- summarizeChain(res_relab) # Check for instance, the cluster assignment labels: table(res_summary$t) # Indeed the same as colSums(njk) # Or examine the skewness parameter for the non-trivial clusters: res_summary$alpha[ , unique(res_summary$t)] # And compare those to cbind(alpha1, alpha2) # (see vignette for a more detailed example)
This function generates a sample from the posterior of COMIX.
comix(Y, C, prior = NULL, pmc = NULL, state = NULL, ncores = 2)
comix(Y, C, prior = NULL, pmc = NULL, state = NULL, ncores = 2)
Y |
Matrix of the data. Each row represents an observation. |
C |
Vector of the group label of each observation. Labels must be integers starting from 1. |
prior |
A list giving the prior information. If unspecified, a default prior is used. The list includes the following parameters:
|
pmc |
A list giving the Population Monte Carlo (PMC) parameters:
|
state |
A list giving the initial cluster labels:
|
ncores |
The number of CPU cores to utilize in parallel. Defaults to 2. |
An object of class COMIX, a list of 4:
chain
, a named list:
t
: an nsave
nrow(Y)
matrix with estimated cluster labels
for each saved step of the chain and each observation in the data Y
.
z
: a nsave
nrow(Y)
matrix with estimated values of
the latent variable for the parameterization of the
multivariate skew normal distribution used in the sampler for each saved step of
the chain and each observation in the data
Y
.
W
: an length(unique(C))
K
nsave
: array storing the estimated sample- and cluster-specific weights for each
saved step of the chain.
xi
: an length(unique(C))
(ncol(Y) x K)
nsave
array storing the estimated sample- and cluster-specific
multivariate skew normal location parameters of the kernel for each saved step of the chain.
xi0
: an ncol(Y)
K
nsave
: array storing the estimated cluster-specific
group location parameters for each saved step of the chain.
psi
: an ncol(Y)
K
nsave
array storing the estimated cluster-specific skew parameters of the kernels in
the parameterization of the
multivariate skew normal distribution used in the sampler
for each saved step of the chain.
G
: an ncol(Y)
(ncol(Y) x K)
nsave
array storing the estimated cluster-specific
multivariate skew normal scale matrix (in row format) of the kernel
used in the sampler for each saved step of the chain.
E
: an ncol(Y)
(ncol(Y) x K)
nsave
array storing the estimated covariance matrix
(in row format) of the multivariate normal distribution from which the
sample- and cluster-specific location parameters are drawn for each saved step
of the chain.
eta
: a nsave
1
matrix storing the
estimated Dirichlet Process Mixture concentration parameter for each saved step of the chain.
Sigma
: an ncol(Y)
(ncol(Y) x K)
nsave
array storing the estimated cluster-specific
multivariate skew normal scale matrix (in row format) of the kernel for each saved step of the chain.
alpha
: an ncol(Y)
K
nsave
array storing the estimated cluster-specific skew parameters of the
kernel's multivariate skew normal distribution
for each saved step of the chain.
data
, a named list that includes the matrix of the data Y
and C
the vector of the group label of each observation.
prior
and pmc
, the lists, as above, that were provided as inputs to
the function.
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) # Generate calibrated data: cal <- calibrateNoDist(res_relab) # Compare raw and calibrated data: (see plot in vignette) # par(mfrow=c(1, 2)) # plot(Y, col = C, xlim = range(Y[,1]), ylim = range(Y[,2]) ) # Get posterior estimates for the model parameters: res_summary <- summarizeChain(res_relab) # Check for instance, the cluster assignment labels: table(res_summary$t) # Indeed the same as colSums(njk) # Or examine the skewness parameter for the non-trivial clusters: res_summary$alpha[ , unique(res_summary$t)] # And compare those to cbind(alpha1, alpha2) # (see vignette for a more detailed example)
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) # Generate calibrated data: cal <- calibrateNoDist(res_relab) # Compare raw and calibrated data: (see plot in vignette) # par(mfrow=c(1, 2)) # plot(Y, col = C, xlim = range(Y[,1]), ylim = range(Y[,2]) ) # Get posterior estimates for the model parameters: res_summary <- summarizeChain(res_relab) # Check for instance, the cluster assignment labels: table(res_summary$t) # Indeed the same as colSums(njk) # Or examine the skewness parameter for the non-trivial clusters: res_summary$alpha[ , unique(res_summary$t)] # And compare those to cbind(alpha1, alpha2) # (see vignette for a more detailed example)
This function creates an object that summarizes the effective sample size for the parameters of the model.
effectiveSampleSize(res, params = c("w", "xi", "xi0", "psi", "G", "E", "eta"))
effectiveSampleSize(res, params = c("w", "xi", "xi0", "psi", "G", "E", "eta"))
res |
An object of class |
params |
A character vector naming the parameters to compute the effective sample size for. |
An effectiveSampleSizeCOMIX
object which is a named list,
with a named element for each requested parameter. Each element is a data
frame that includes the effective sample size for the parameter.
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) effssz <- effectiveSampleSize(res_relab, "w") # Or: tidy_chain <- tidyChain(res_relab, "w") effssz <- effectiveSampleSize(tidy_chain, "w") # (see vignette for a more detailed example)
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) effssz <- effectiveSampleSize(res_relab, "w") # Or: tidy_chain <- tidyChain(res_relab, "w") effssz <- effectiveSampleSize(tidy_chain, "w") # (see vignette for a more detailed example)
This function creates an object that summarizes the Geweke convergence diagnostic.
gewekeParams( res, params = c("w", "xi", "xi0", "psi", "G", "E", "eta"), frac1 = 0.1, frac2 = 0.5, probs = c(0.025, 0.975) )
gewekeParams( res, params = c("w", "xi", "xi0", "psi", "G", "E", "eta"), frac1 = 0.1, frac2 = 0.5, probs = c(0.025, 0.975) )
res |
An object of class |
params |
A character vector naming the parameters to compute the Geweke diagnostic for. |
frac1 |
Double, fraction to use from beginning of chain. |
frac2 |
Double, fraction to use from end of chain. |
probs |
A vector of 2 doubles, probabilities denoting the limits of a confidence interval for the convergence diagnostic. |
An gewekeParamsCOMIX
object which is a named list,
with a named element for each requested parameter. Each element is a data
frame that includes the Geweke diagnostic and result of a stationarity test
for the parameter.
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) effssz <- effectiveSampleSize(res_relab, "w") # Or: tidy_chain <- tidyChain(res_relab, "w") gwk <- gewekeParams(tidy_chain, "w") # (see vignette for a more detailed example)
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) effssz <- effectiveSampleSize(res_relab, "w") # Or: tidy_chain <- tidyChain(res_relab, "w") gwk <- gewekeParams(tidy_chain, "w") # (see vignette for a more detailed example)
This function creates an object that summarizes the Heidelberg-Welch convergence diagnostic.
heidelParams( res, params = c("w", "xi", "xi0", "psi", "G", "E", "eta"), eps = 0.1, pvalue = 0.05 )
heidelParams( res, params = c("w", "xi", "xi0", "psi", "G", "E", "eta"), eps = 0.1, pvalue = 0.05 )
res |
An object of class |
params |
A character vector naming the parameters to compute the Heidelberg-Welch diagnostic for. |
eps |
Target value for ratio of halfwidth to sample mean. |
pvalue |
Significance level to use. |
An heidelParamsCOMIX
object which is a named list,
with a named element for each requested parameter. Each element is a data
frame that includes the Heidelberg-Welch diagnostic and results of a
stationarity test for the parameter.
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) effssz <- effectiveSampleSize(res_relab, "w") # Or: tidy_chain <- tidyChain(res_relab, "w") hd <- heidelParams(tidy_chain, "w") # (see vignette for a more detailed example)
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) effssz <- effectiveSampleSize(res_relab, "w") # Or: tidy_chain <- tidyChain(res_relab, "w") hd <- heidelParams(tidy_chain, "w") # (see vignette for a more detailed example)
This function creates plots for the effective sample size for the parameters of the model.
plotEffectiveSampleSize(effssz, param)
plotEffectiveSampleSize(effssz, param)
effssz |
An object of class |
param |
Character, naming the parameter to create a plot of effective sample sizes. |
A ggplot2
plot containing the effective sample size plot.
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) effssz <- effectiveSampleSize(res_relab, "w") # Or: tidy_chain <- tidyChain(res_relab, "w") effssz <- effectiveSampleSize(tidy_chain, "w") plotEffectiveSampleSize(effssz, "w") # (see vignette for a more detailed example)
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) effssz <- effectiveSampleSize(res_relab, "w") # Or: tidy_chain <- tidyChain(res_relab, "w") effssz <- effectiveSampleSize(tidy_chain, "w") plotEffectiveSampleSize(effssz, "w") # (see vignette for a more detailed example)
This function creates plots for the Geweke diagnostic and results of test of stationarity for the parameters of the model.
plotGewekeParams(gwk, param)
plotGewekeParams(gwk, param)
gwk |
An object of class |
param |
Character, naming the parameter to create a plot of the Geweke diagnostic for. |
A ggplot2
plot containing the Geweke diagnostic plot.
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) effssz <- effectiveSampleSize(res_relab, "w") # Or: tidy_chain <- tidyChain(res_relab, "w") gwk <- gewekeParams(tidy_chain, "w") plotGewekeParams(gwk, "w") # (see vignette for a more detailed example)
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) effssz <- effectiveSampleSize(res_relab, "w") # Or: tidy_chain <- tidyChain(res_relab, "w") gwk <- gewekeParams(tidy_chain, "w") plotGewekeParams(gwk, "w") # (see vignette for a more detailed example)
This function creates plots for the Heidelberg-Welch diagnostic and results of test of stationarity for the parameters of the model.
plotHeidelParams(hd, param)
plotHeidelParams(hd, param)
hd |
An object of class |
param |
Character, naming the parameter to create a plot of the Heidelberg-Welch diagnostic for. |
A ggplot2
plot containing the Heidelberg-Welch diagnostic plot.
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) effssz <- effectiveSampleSize(res_relab, "w") # Or: tidy_chain <- tidyChain(res_relab, "w") hd <- heidelParams(tidy_chain, "w") plotHeidelParams(hd, "w") # (see vignette for a more detailed example)
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) effssz <- effectiveSampleSize(res_relab, "w") # Or: tidy_chain <- tidyChain(res_relab, "w") hd <- heidelParams(tidy_chain, "w") plotHeidelParams(hd, "w") # (see vignette for a more detailed example)
This function creates trace plots for different parameters of the MCMC chain.
plotTracePlots(res, param)
plotTracePlots(res, param)
res |
An object of class |
param |
Character, naming the parameter to create a trace plot for. |
A ggplot2
plot containing the trace plot.
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) plotTracePlots(res_relab, "w") # Or: tidy_chain <- tidyChain(res_relab, "w") plotTracePlots(tidy_chain, "w") # (see vignette for a more detailed example)
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) plotTracePlots(res_relab, "w") # Or: tidy_chain <- tidyChain(res_relab, "w") plotTracePlots(tidy_chain, "w") # (see vignette for a more detailed example)
This function relabels the chain to avoid label switching issues.
relabelChain(res)
relabelChain(res)
res |
An object of class COMIX. |
An object of class COMIX where res$chain$t
is replaced with the
new labels.
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) # Generate calibrated data: cal <- calibrateNoDist(res_relab) # Compare raw and calibrated data: (see plot in vignette) # par(mfrow=c(1, 2)) # plot(Y, col = C, xlim = range(Y[,1]), ylim = range(Y[,2]) ) # Get posterior estimates for the model parameters: res_summary <- summarizeChain(res_relab) # Check for instance, the cluster assignment labels: table(res_summary$t) # Indeed the same as colSums(njk) # Or examine the skewness parameter for the non-trivial clusters: res_summary$alpha[ , unique(res_summary$t)] # And compare those to cbind(alpha1, alpha2) # (see vignette for a more detailed example)
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) # Generate calibrated data: cal <- calibrateNoDist(res_relab) # Compare raw and calibrated data: (see plot in vignette) # par(mfrow=c(1, 2)) # plot(Y, col = C, xlim = range(Y[,1]), ylim = range(Y[,2]) ) # Get posterior estimates for the model parameters: res_summary <- summarizeChain(res_relab) # Check for instance, the cluster assignment labels: table(res_summary$t) # Indeed the same as colSums(njk) # Or examine the skewness parameter for the non-trivial clusters: res_summary$alpha[ , unique(res_summary$t)] # And compare those to cbind(alpha1, alpha2) # (see vignette for a more detailed example)
This function provides post-hoc estimates of the model parameters.
summarizeChain(res)
summarizeChain(res)
res |
An object of class COMIX. |
A named list:
xi0
: a ncol(res$data$Y)
res$prior$K
matrix storing
the posterior mean of the group location parameter.
psi
: a ncol(res$data$Y)
res$prior$K
matrix storing
the posterior mean of the multivariate skew normal kernels skewness parameter (in the parameterization used in
the sampler).
alpha
: a ncol(res$data$Y)
res$prior$K
matrix storing
the posterior mean of the multivariate skew normal kernels skewness parameter.
W
: a length(unique(res$data$C))
res$prior$K
matrix storing
the posterior mean of the mixture weights for each sample and cluster.
xi
: an length(unique(res$data$C))
ncol(res$data$Y)
res$prior$K
array storing the the posterior mean of the
multivariate skew normal kernels location parameter for each sample and cluster.
Sigma
: an ncol(res$data$Y)
ncol(res$data$Y)
res$prior$K
array storing the the posterior mean of the
scaling matrix of the multivariate skew normal kernels for each cluster.
G
: an ncol(res$data$Y)
ncol(res$data$Y)
res$prior$K
array storing the the posterior mean of the
scaling matrix of the multivariate skew normal kernels for each cluster (in the
parameterization used in the sampler).
E
: an ncol(res$data$Y)
ncol(res$data$Y)
res$prior$K
array storing the the posterior mean of the
covariance matrix of the multivariate normal distributions for each cluster form which
the sample specific location parameters are drawn.
meanvec
: an length(unique(res$data$C))
ncol(res$data$Y)
res$prior$K
array storing the the posterior mean of the
multivariate skew normal kernels mean parameter for each sample and cluster.
meanvec0
: a ncol(res$data$Y)
res$prior$K
matrix storing
the posterior mean of the group mean parameter.
t
: Vector of length nrow(x$data$Y)
. Each element is the mode
of the posterior distribution of cluster labels.
eta
: scalar, the mean of the posterior distribution of the estimated
Dirichlet Process Mixture concentration parameter.
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) # Generate calibrated data: cal <- calibrateNoDist(res_relab) # Compare raw and calibrated data: (see plot in vignette) # par(mfrow=c(1, 2)) # plot(Y, col = C, xlim = range(Y[,1]), ylim = range(Y[,2]) ) # Get posterior estimates for the model parameters: res_summary <- summarizeChain(res_relab) # Check for instance, the cluster assignment labels: table(res_summary$t) # Indeed the same as colSums(njk) # Or examine the skewness parameter for the non-trivial clusters: res_summary$alpha[ , unique(res_summary$t)] # And compare those to cbind(alpha1, alpha2) # (see vignette for a more detailed example)
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) # Generate calibrated data: cal <- calibrateNoDist(res_relab) # Compare raw and calibrated data: (see plot in vignette) # par(mfrow=c(1, 2)) # plot(Y, col = C, xlim = range(Y[,1]), ylim = range(Y[,2]) ) # Get posterior estimates for the model parameters: res_summary <- summarizeChain(res_relab) # Check for instance, the cluster assignment labels: table(res_summary$t) # Indeed the same as colSums(njk) # Or examine the skewness parameter for the non-trivial clusters: res_summary$alpha[ , unique(res_summary$t)] # And compare those to cbind(alpha1, alpha2) # (see vignette for a more detailed example)
This function creates tidy versions of the stored chain. This object can then be used as input for the other diagnostic functions in this package.
tidyChain( res, params = c("t", "w", "xi", "xi0", "psi", "G", "E", "eta", "Sigma", "alpha") )
tidyChain( res, params = c("t", "w", "xi", "xi0", "psi", "G", "E", "eta", "Sigma", "alpha") )
res |
An object of class COMIX. |
params |
A character vector naming the parameters to tidy. |
A tidyChainCOMIX
object: a named list of class whose length is the length
of params
. Each element of the list contains a tibble with a tidy version of the samples
from the MCMC chain.
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) tidy_chain <- tidyChain(res_relab) # (see vignette for a more detailed example)
library(COMIX) # Number of observations for each sample (row) and cluster (column): njk <- matrix( c( 150, 300, 250, 200 ), nrow = 2, byrow = TRUE ) # Dimension of data: p <- 3 # Scale and skew parameters for first cluster: Sigma1 <- matrix(0.5, nrow = p, ncol = p) + diag(0.5, nrow = p) alpha1 <- rep(0, p) alpha1[1] <- -5 # location parameter for first cluster in first sample: xi11 <- rep(0, p) # location parameter for first cluster in second sample (aligned with first): xi21 <- rep(0, p) # Scale and skew parameters for second cluster: Sigma2 <- matrix(-1/3, nrow = p, ncol = p) + diag(1 + 1/3, nrow = p) alpha2 <- rep(0, p) alpha2[2] <- 5 # location parameter for second cluster in first sample: xi12 <- rep(3, p) # location parameter for second cluster in second sample (misaligned with first): xi22 <- rep(4, p) # Sample data: set.seed(1) Y <- rbind( sn::rmsn(njk[1, 1], xi = xi11, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[1, 2], xi = xi12, Omega = Sigma2, alpha = alpha2), sn::rmsn(njk[2, 1], xi = xi21, Omega = Sigma1, alpha = alpha1), sn::rmsn(njk[2, 2], xi = xi22, Omega = Sigma2, alpha = alpha2) ) C <- c(rep(1, rowSums(njk)[1]), rep(2, rowSums(njk)[2])) prior <- list(zeta = 1, K = 10) pmc <- list(naprt = 5, nburn = 200, nsave = 200) # Reasonable usage pmc <- list(naprt = 5, nburn = 2, nsave = 5) # Minimal usage for documentation # Fit the model: res <- comix(Y, C, pmc = pmc, prior = prior) # Relabel to resolve potential label switching issues: res_relab <- relabelChain(res) tidy_chain <- tidyChain(res_relab) # (see vignette for a more detailed example)
Convert between parameterizations of the multivariate skew normal distribution.
transform_params(Sigma, alpha)
transform_params(Sigma, alpha)
Sigma |
A scale matrix. |
alpha |
A vector for the skew parameter. |
A list:
delta
: a reparameterized skewness vector, a transformed
version of alpha
.
omega
: a diagonal matrix of the same dimensions as Sigma
,
the diagonal elements are the square roots of the diagonal elements of Sigma
.
psi
: another reparameterized skewness vector, utilized in the sampler.
G
: a reparameterized version of Sigma
, utilized in the sampler.
library(COMIX) # Scale and skew parameters: Sigma <- matrix(0.5, nrow = 4, ncol = 4) + diag(0.5, nrow = 4) alpha <- c(0, 0, 0, 5) transformed_parameters <- transform_params(Sigma, alpha)
library(COMIX) # Scale and skew parameters: Sigma <- matrix(0.5, nrow = 4, ncol = 4) + diag(0.5, nrow = 4) alpha <- c(0, 0, 0, 5) transformed_parameters <- transform_params(Sigma, alpha)