| Title: | Discrete Choice Models for Economic Applications |
|---|---|
| Description: | Fast estimation of discrete-choice models for applied economics. Likelihoods, analytical gradients and Hessians are implemented in C++ with 'OpenMP' parallelism, scaling efficiently to specifications with many alternative-specific constants. Post-estimation routines return predicted shares, own- and cross-price elasticities, and diversion ratios. Supports multinomial logit ('MNL'), mixed logit ('MXL'), and nested logit ('NL'). |
| Authors: | Fernando Cordeiro [aut, cre, cph] |
| Maintainer: | Fernando Cordeiro <[email protected]> |
| License: | LGPL (>= 3) |
| Version: | 0.1.0 |
| Built: | 2026-05-20 13:46:19 UTC |
| Source: | https://github.com/cran/choicer |
Finds the ASC (delta) parameters such that predicted market shares match target shares, using the contraction mapping of Berry, Levinsohn, and Pakes (1995) doi:10.2307/2171802.
blp(object, target_shares, ...)blp(object, target_shares, ...)
object |
A fitted model object. |
target_shares |
Numeric vector of target market shares (length J). |
... |
Additional arguments passed to methods. |
Converged delta (ASC) vector.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) blp(fit, target_shares = rep(1/J, J))library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) blp(fit, target_shares = rep(1/J, J))
BLP95 contraction mapping to find delta given target shares
blp_contraction( delta, target_shares, X, beta, alt_idx, M, weights, include_outside_option = FALSE, tol = 1e-08, max_iter = 1000L )blp_contraction( delta, target_shares, X, beta, alt_idx, M, weights, include_outside_option = FALSE, tol = 1e-08, max_iter = 1000L )
delta |
J x 1 vector with initial guess for deltas (ASCs) |
target_shares |
J x 1 vector with target shares for each alternative |
X |
sum(M) x K design matrix with covariates. M[i] x K matrix for individual i |
beta |
K x 1 vector with model parameters |
alt_idx |
sum(M) x 1 vector with indices of alternatives within each choice set; 1-based indexing |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
include_outside_option |
whether to include outside option normalized to 0 (if so, the outside option is not included in the data) |
tol |
convergence tolerance |
max_iter |
maximum number of iterations |
vector with contraction's delta (ASCs) output
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) beta <- coef(fit)[fit$param_map$beta] delta <- blp_contraction(rep(0, J), rep(1/J, J), fit$data$X, beta, fit$data$alt_idx, fit$data$M, fit$data$weights) deltalibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) beta <- coef(fit)[fit$param_map$beta] delta <- blp_contraction(rep(0, J), rep(1/J, J), fit$data$X, beta, fit$data$alt_idx, fit$data$M, fit$data$weights) delta
BLP contraction mapping for multinomial logit model
## S3 method for class 'choicer_mnl' blp( object, target_shares, delta_init = NULL, tol = 1e-08, max_iter = 1000, ... )## S3 method for class 'choicer_mnl' blp( object, target_shares, delta_init = NULL, tol = 1e-08, max_iter = 1000, ... )
object |
A |
target_shares |
Numeric vector of target market shares.
Length |
delta_init |
Initial guess for delta (ASC) values. If |
tol |
Convergence tolerance (default 1e-8). |
max_iter |
Maximum iterations (default 1000). |
... |
Additional arguments (ignored). |
Converged delta (ASC) vector.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) blp(fit, target_shares = rep(1/J, J))library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) blp(fit, target_shares = rep(1/J, J))
BLP contraction mapping for mixed logit model
## S3 method for class 'choicer_mxl' blp( object, target_shares, delta_init = NULL, tol = 1e-08, max_iter = 1000, ... )## S3 method for class 'choicer_mxl' blp( object, target_shares, delta_init = NULL, tol = 1e-08, max_iter = 1000, ... )
object |
A |
target_shares |
Numeric vector of target market shares.
Length |
delta_init |
Initial guess for delta (ASC) values. If |
tol |
Convergence tolerance (default 1e-8). |
max_iter |
Maximum iterations (default 1000). |
... |
Additional arguments (ignored). |
Converged delta (ASC) vector.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) blp(fit, target_shares = rep(1/J, J))library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) blp(fit, target_shares = rep(1/J, J))
Reconstruct variance matrix L from L_params
build_var_mat(L_params, K_w, rc_correlation)build_var_mat(L_params, K_w, rc_correlation)
L_params |
flattened choleski decomposition version of the random coefficient parameters matrix |
K_w |
dimension of the random coefficient parameter (symmetric) matrix |
rc_correlation |
whether random coefficients are correlated |
matrix equal to LL', where L is the choleski decomposition of random coefficient matrix
L_params <- c(log(1.0), 0.3, log(0.5)) Sigma <- build_var_mat(L_params, K_w = 2, rc_correlation = TRUE) Sigma # 2x2 covariance matrixL_params <- c(log(1.0), 0.3, log(0.5)) Sigma <- build_var_mat(L_params, K_w = 2, rc_correlation = TRUE) Sigma # 2x2 covariance matrix
Extract coefficients from a choicer_fit object
## S3 method for class 'choicer_fit' coef(object, ...)## S3 method for class 'choicer_fit' coef(object, ...)
object |
A choicer_fit object. |
... |
Additional arguments (ignored). |
Named numeric vector of estimated coefficients.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) coef(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) coef(fit)
Computes a J x J matrix of diversion ratios. Entry (i, j) is the fraction of demand lost by alternative j that is captured by alternative i when alternative j becomes less attractive.
diversion_ratios(object, ...)diversion_ratios(object, ...)
object |
A fitted model object. |
... |
Additional arguments passed to methods. |
A J x J diversion ratio matrix with alternative labels.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) diversion_ratios(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) diversion_ratios(fit)
Diversion ratios for multinomial logit model
## S3 method for class 'choicer_mnl' diversion_ratios(object, ...)## S3 method for class 'choicer_mnl' diversion_ratios(object, ...)
object |
A |
... |
Additional arguments (ignored). |
A J x J diversion ratio matrix with alternative labels.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) diversion_ratios(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) diversion_ratios(fit)
Computes the attribute-based diversion ratio matrix. Entry (k, j) is the
fraction of demand lost by alternative j that is captured by alternative k
when a marginal change in alternative j's wrt_var attribute reduces
s_j.
## S3 method for class 'choicer_mxl' diversion_ratios(object, wrt_var, is_random_coef = FALSE, ...)## S3 method for class 'choicer_mxl' diversion_ratios(object, wrt_var, is_random_coef = FALSE, ...)
object |
A |
wrt_var |
Variable used to perturb alternative j's utility: a column
name (character) or 1-based index. Indexes into X columns for fixed
coefficients, or W columns for random coefficients (when
|
is_random_coef |
Logical. |
... |
Additional arguments (ignored). |
Unlike MNL, the MXL diversion ratio depends on which variable is perturbed:
the realised coefficient varies across individuals and
draws and does not cancel in the ratio. For a variable with a fixed
coefficient the result is independent of the variable ( cancels);
for a random-coefficient variable it is not.
A J x J diversion ratio matrix with alternative labels. Cross-products are averaged across simulation draws inside the integration to avoid Jensen-style bias.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) diversion_ratios(fit, "x1") diversion_ratios(fit, "w1", is_random_coef = TRUE)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) diversion_ratios(fit, "x1") diversion_ratios(fit, "w1", is_random_coef = TRUE)
Computes a J x J matrix of aggregate elasticities. Entry (i, j) is the percentage change in the probability of choosing alternative i when the attribute of alternative j changes by 1\
elasticities(object, ...)elasticities(object, ...)
object |
A fitted model object. |
... |
Additional arguments passed to methods. |
A J x J elasticity matrix with alternative labels.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) elasticities(fit, "x1")library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) elasticities(fit, "x1")
Elasticities for multinomial logit model
## S3 method for class 'choicer_mnl' elasticities(object, elast_var, ...)## S3 method for class 'choicer_mnl' elasticities(object, elast_var, ...)
object |
A |
elast_var |
Variable for elasticity computation: a column name (character) or 1-based index into the design matrix X. |
... |
Additional arguments (ignored). |
A J x J elasticity matrix with alternative labels.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) elasticities(fit, "x1")library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) elasticities(fit, "x1")
Elasticities for mixed logit model
## S3 method for class 'choicer_mxl' elasticities(object, elast_var, is_random_coef = FALSE, ...)## S3 method for class 'choicer_mxl' elasticities(object, elast_var, is_random_coef = FALSE, ...)
object |
A |
elast_var |
Variable for elasticity computation: a column name (character)
or 1-based index. Indexes into X columns for fixed coefficients, or W columns
for random coefficients (when |
is_random_coef |
Logical. |
... |
Additional arguments (ignored). |
A J x J elasticity matrix with alternative labels.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) elasticities(fit, "x1") elasticities(fit, "w1", is_random_coef = TRUE)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) elasticities(fit, "x1") elasticities(fit, "w1", is_random_coef = TRUE)
Create halton normal draws in appropriate format for mixed logit estimation
get_halton_normals(S, N, K_w)get_halton_normals(S, N, K_w)
S |
Number of draws for each choice situation |
N |
number of choice situations |
K_w |
dimension of random coefficients (number of columns in W matrix) |
K_w x S x N array with halton standard normal draws
draws <- get_halton_normals(S = 50, N = 10, K_w = 2) dim(draws) # 2 x 50 x 10draws <- get_halton_normals(S = 50, N = 10, K_w = 2) dim(draws) # 2 x 50 x 10
Utility to compute analytical Jacobian of random coefficient matrix transformed by vech (dVech(Sigma) / dTheta)
jacobian_vech_Sigma(L_params, K_w, rc_correlation = TRUE)jacobian_vech_Sigma(L_params, K_w, rc_correlation = TRUE)
L_params |
flattened choleski decomposition version of the random coefficient parameters matrix |
K_w |
dimension of the random coefficient parameter (symmetric) matrix |
rc_correlation |
whether random coefficients are correlated |
Jacobian (dVech(Sigma) / dTheta)
L_params <- c(log(0.8), 0.2, log(0.6)) J_mat <- jacobian_vech_Sigma(L_params, K_w = 2, rc_correlation = TRUE) dim(J_mat) # 3 x 3 for K_w=2 correlatedL_params <- c(log(0.8), 0.2, log(0.6)) J_mat <- jacobian_vech_Sigma(L_params, K_w = 2, rc_correlation = TRUE) dim(J_mat) # 3 x 3 for K_w=2 correlated
Returns a logLik object, which enables AIC() and BIC() automatically.
## S3 method for class 'choicer_fit' logLik(object, ...)## S3 method for class 'choicer_fit' logLik(object, ...)
object |
A choicer_fit object. |
... |
Additional arguments (ignored). |
A logLik object with df and nobs attributes.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) logLik(fit) AIC(fit) BIC(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) logLik(fit) AIC(fit) BIC(fit)
Consumes a choicer_mc object and returns per-parameter asymptotic
diagnostics: Monte Carlo bias (with MC standard error), empirical SD of
the estimates, mean of the reported standard errors, SE-to-SD ratio
(information-matrix-equality check), Wald coverage at nominal 90 / 95 /
99 percent with Wilson confidence bands, moments of the studentized
statistic z = (theta_hat - theta_0) / se, and four normality tests on
z (Shapiro-Wilk, Anderson-Darling via goftest::ad.test, a hand-coded
Jarque-Bera statistic, and a one-sample Kolmogorov-Smirnov test against
N(0, 1)).
mc_asymptotics( mc, level = 0.95, se_col = "se", conv_threshold = 0.99, se_ratio_threshold_floor = 0.1 )mc_asymptotics( mc, level = 0.95, se_col = "se", conv_threshold = 0.99, se_ratio_threshold_floor = 0.1 )
mc |
A |
level |
Confidence level for the Wilson bands on coverage rates.
Defaults to |
se_col |
Name of the column in |
conv_threshold |
Numeric in |
se_ratio_threshold_floor |
Numeric scalar. Minimum half-width for
the |
Six logical pass / fail flags are attached to every parameter row:
pass_bias requires |bias_mc_se| < 3; pass_se_ratio requires
|se_ratio - 1| to lie within max(se_ratio_threshold_floor, 3 * 1.4 / sqrt(R_used)) (a noise-aware band that widens at small
R_used and tightens to the floor at large R_used); pass_cov95
requires the nominal 95 percent level to lie in the Wilson band for
empirical coverage; pass_skew requires |skew_z| < 0.3; pass_kurt
requires excess kurtosis of z in [-0.5, 1.0]; pass_convergence
requires the per-parameter convergence rate (R_used / R_total) to
meet conv_threshold.
Non-converged replications are excluded per parameter (reported in
R_excluded). Winsorized (5 percent / 95 percent) versions of bias,
sd_emp, and mean_se are reported in parallel columns
(bias_w, sd_emp_w, mean_se_w) so silent outlier exclusion is
transparent to the reader. Two robust SE-to-SD ratios accompany the
Hessian-mean-based se_ratio: se_ratio_med (median SE divided by
the empirical SD) and se_ratio_w (winsorized mean SE divided by the
winsorized empirical SD); both stay near 1 when 1-2 replications
produce near-singular Hessians that inflate mean_se. The companion
se_med column reports the median per-replication SE used by
se_ratio_med. Neither robust ratio drives a pass_* flag — they
are purely informational.
Winsorized z-moment counterparts (mean_z_w, sd_z_w, skew_z_w,
kurt_excess_z_w) are reported alongside the raw z-moments and feed an
additional pass_z_w flag (Winsorized skew within the same band as
pass_skew AND Winsorized excess kurtosis within the same band as
pass_kurt). A companion pass_cov95_w flag is TRUE when either
pass_cov95 is TRUE OR the per-rep Winsorized z-CI (the empirical
2.5 / 97.5 percentiles of the Winsorized z) covers truth-zero. These
two flags are designed for boundary scenarios (e.g., near-zero variance
components) where a small number of reps with vanishing SE inflate the
raw z-moments without indicating an estimator defect.
An object of class choicer_mc_asymptotics — a data.table
with one row per unique parameter and columns documented above — with
meta attached as an attribute (attr(x, "meta")).
sim_fun <- function(seed) simulate_mnl_data(N = 1000, J = 3, seed = seed) fit_fun <- function(sim) run_mnlogit( data = sim$data, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), outside_opt_label = 0L, include_outside_option = FALSE, use_asc = TRUE, control = list(print_level = 0L) ) mc <- monte_carlo(sim_fun, fit_fun, R = 50L, seed = 1L, progress = FALSE) mc_asymptotics(mc)sim_fun <- function(seed) simulate_mnl_data(N = 1000, J = 3, seed = seed) fit_fun <- function(sim) run_mnlogit( data = sim$data, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), outside_opt_label = 0L, include_outside_option = FALSE, use_asc = TRUE, control = list(print_level = 0L) ) mc <- monte_carlo(sim_fun, fit_fun, R = 50L, seed = 1L, progress = FALSE) mc_asymptotics(mc)
Computes the diversion ratio matrix DR(j->k), which measures the fraction of demand lost by alternative j that is captured by alternative k. For MNL: DR(j->k) = sum_n(w_n * P_nj * P_nk) / sum_n(w_n * P_nj * (1 - P_nj))
mnl_diversion_ratios_parallel( theta, X, alt_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )mnl_diversion_ratios_parallel( theta, X, alt_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )
theta |
K + J - 1 or K + J vector with model parameters |
X |
sum(M) x K design matrix with covariates. |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
use_asc |
whether to use alternative-specific constants |
include_outside_option |
whether to include outside option |
J x J matrix where entry (k, j) = DR(j->k). Diagonal is 0.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) dr <- mnl_diversion_ratios_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$M, fit$data$weights) drlibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) dr <- mnl_diversion_ratios_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$M, fit$data$weights) dr
Computes the aggregate elasticity matrix (weighted average of individual elasticities) for the Multinomial Logit model.
mnl_elasticities_parallel( theta, X, alt_idx, choice_idx, M, weights, elast_var_idx, use_asc = TRUE, include_outside_option = FALSE )mnl_elasticities_parallel( theta, X, alt_idx, choice_idx, M, weights, elast_var_idx, use_asc = TRUE, include_outside_option = FALSE )
theta |
K + J - 1 or K + J vector with model parameters |
X |
sum(M) x K design matrix with covariates. |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing |
choice_idx |
N x 1 vector (kept for API consistency, but not used) |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
elast_var_idx |
1-based index of the column in X for which to compute the elasticity |
use_asc |
whether to use alternative-specific constants |
include_outside_option |
whether to include outside option |
J x J matrix of aggregate elasticities
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) elas <- mnl_elasticities_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$choice_idx, fit$data$M, fit$data$weights, elast_var_idx = 1L) elaslibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) elas <- mnl_elasticities_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$choice_idx, fit$data$M, fit$data$weights, elast_var_idx = 1L) elas
Computes the log-likelihood and its gradient for the Multinomial Logit model using OpenMP for parallelization. Allows for inclusion of alternative-specific constants, outside option, and observation weights.
mnl_loglik_gradient_parallel( theta, X, alt_idx, choice_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )mnl_loglik_gradient_parallel( theta, X, alt_idx, choice_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )
theta |
K + J - 1 or K + J vector with model parameters |
X |
sum(M) x K design matrix with covariates. Stacks M[i] x K matrices for individual i. |
alt_idx |
sum(M) x 1 vector with indices of alternatives within each choice set; 1-based indexing |
choice_idx |
N x 1 vector with indices of chosen alternatives; 1-based indexing relative to X; 0 is used if include_outside_option=True |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
use_asc |
whether to use alternative-specific constants |
include_outside_option |
whether to include outside option normalized to 0 (if so, the outside option is not included in the data) |
List with loglikelihood and gradient evaluated at input arguments
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mnl_data(dt, "id", "alt", "choice", c("x1", "x2")) theta <- rep(0, ncol(d$X) + nrow(d$alt_mapping) - 1) result <- mnl_loglik_gradient_parallel(theta, d$X, d$alt_idx, d$choice_idx, d$M, d$weights) result$objective # negative log-likelihoodlibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mnl_data(dt, "id", "alt", "choice", c("x1", "x2")) theta <- rep(0, ncol(d$X) + nrow(d$alt_mapping) - 1) result <- mnl_loglik_gradient_parallel(theta, d$X, d$alt_idx, d$choice_idx, d$M, d$weights) result$objective # negative log-likelihood
Hessian matrix for multinomial logit model
mnl_loglik_hessian_parallel( theta, X, alt_idx, choice_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )mnl_loglik_hessian_parallel( theta, X, alt_idx, choice_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )
theta |
K + J - 1 or K + J vector with model parameters |
X |
sum(M) x K design matrix with covariates. Stacks M[i] x K matrices for individual i. |
alt_idx |
sum(M) x 1 vector with indices of alternatives within each choice set; 1-based indexing |
choice_idx |
N x 1 vector with indices of chosen alternatives; 1-based indexing relative to X; 0 is used if include_outside_option=True |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
use_asc |
whether to use alternative-specific constants |
include_outside_option |
whether to include outside option normalized to 0 (if so, the outside option is not included in the data) |
Hessian matrix of the negative log-likelihood
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) H <- mnl_loglik_hessian_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$choice_idx, fit$data$M, fit$data$weights) dim(H)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) H <- mnl_loglik_hessian_parallel(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$choice_idx, fit$data$M, fit$data$weights) dim(H)
Prediction of choice probabilities and utilities based on fitted model
mnl_predict( theta, X, alt_idx, M, use_asc = TRUE, include_outside_option = FALSE )mnl_predict( theta, X, alt_idx, M, use_asc = TRUE, include_outside_option = FALSE )
theta |
K + J - 1 or K + J vector with model parameters |
X |
sum(M) x K design matrix with covariates. Stacks M[i] x K matrices for individual i. |
alt_idx |
sum(M) x 1 vector with indices of alternatives within each choice set; 1-based indexing |
M |
N x 1 vector with number of alternatives for each individual |
use_asc |
whether to use alternative-specific constants |
include_outside_option |
whether to include outside option normalized to 0 (if so, the outside option is not included in the data) |
List with choice probability and utility for each choice situation evaluated at input arguments
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) pred <- mnl_predict(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$M, use_asc = TRUE) head(pred$choice_prob)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) pred <- mnl_predict(coef(fit), fit$data$X, fit$data$alt_idx, fit$data$M, use_asc = TRUE) head(pred$choice_prob)
Replicates a (DGP -> fit) cycle R times with independent seeds and
collects per-parameter estimates, standard errors, bias, and coverage.
Returns a choicer_mc object; call summary() for aggregated statistics
(mean estimate, bias, RMSE, coverage rate, convergence rate).
monte_carlo( sim_fun, fit_fun, R = 100, seed = 1L, parallel = FALSE, progress = TRUE, ... )monte_carlo( sim_fun, fit_fun, R = 100, seed = 1L, parallel = FALSE, progress = TRUE, ... )
sim_fun |
Function of |
fit_fun |
Function of a |
R |
Number of replications. |
seed |
Base integer seed. Replication |
parallel |
Logical; if |
progress |
Logical; print a one-line progress update per iteration in
serial mode. Ignored when |
... |
Unused. |
Each iteration calls sim_fun(seed = seed + r - 1L), then fit_fun(sim).
Write sim_fun as a closure that captures N, J, and other DGP settings
and forwards seed. Write fit_fun as a closure that takes a
choicer_sim and returns a fitted choicer_fit object, wrapping any
data-preparation, draws, or optimizer-control setup.
A choicer_mc object: a list with elements replications (a long
data.table with one row per estimated parameter per replication) and
meta (run metadata).
sim_fun <- function(seed) simulate_mnl_data(N = 1000, J = 4, seed = seed) fit_fun <- function(sim) run_mnlogit( data = sim$data, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), outside_opt_label = 0L, include_outside_option = FALSE, use_asc = TRUE, control = list(print_level = 0L) ) mc <- monte_carlo(sim_fun, fit_fun, R = 5, seed = 1L, progress = FALSE) summary(mc)sim_fun <- function(seed) simulate_mnl_data(N = 1000, J = 4, seed = seed) fit_fun <- function(sim) run_mnlogit( data = sim$data, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), outside_opt_label = 0L, include_outside_option = FALSE, use_asc = TRUE, control = list(print_level = 0L) ) mc <- monte_carlo(sim_fun, fit_fun, R = 5, seed = 1L, progress = FALSE) summary(mc)
Computes the BHHH approximation to the observed information matrix for the
Mixed Logit model: , where
is the per-individual score (gradient of ).
This outer product of gradients (OPG) estimator provides an alternative to
the analytical Hessian for standard error computation that scales to large
problems where the analytical Hessian is infeasible (e.g., many alternatives
or simulation draws).
mxl_bhhh_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )mxl_bhhh_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )
theta |
vector collecting model parameters (beta, mu, L, delta (ASCs)) |
X |
design matrix for covariates with fixed coefficients; sum(M_i) x K_x |
W |
design matrix for covariates with random coefficients; sum(M_i) x K_w or J x K_w |
alt_idx |
sum(M) x 1 vector with indices of alternatives within each choice set; 1-based indexing |
choice_idx |
N x 1 vector with indices of chosen alternatives; 1-based indexing relative to X; 0 is used if include_outside_option=True |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
eta_draws |
Array with choice situation draws; K_w x S x N |
rc_dist |
K_w x 1 integer vector indicating distribution of random coefficients: 0 = normal, 1 = log-normal |
rc_correlation |
whether random coefficients should be correlated |
rc_mean |
whether to estimate means for random coefficients. |
use_asc |
whether to use alternative-specific constants. |
include_outside_option |
whether to include outside option normalized to 0 (if so, the outside option is not included in the data) |
n_params x n_params PSD matrix representing the observed information
matrix estimated by the outer product of gradients (same sign convention
as the negated Hessian returned by mxl_hessian_parallel, so it can
be inverted directly to obtain vcov).
The BHHH/OPG estimator is only asymptotically equivalent to the Hessian-based information matrix at the true MLE. In finite samples it can underestimate standard errors, particularly when the model is mis-specified or away from the optimum.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) theta <- rep(0, ncol(d$X) + ncol(d$W) + nrow(d$alt_mapping) - 1) H <- mxl_bhhh_parallel(theta, d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), rc_correlation = FALSE, rc_mean = FALSE) dim(H)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) theta <- rep(0, ncol(d$X) + ncol(d$W) + nrow(d$alt_mapping) - 1) H <- mxl_bhhh_parallel(theta, d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), rc_correlation = FALSE, rc_mean = FALSE) dim(H)
Finds the ASC (delta) parameters such that predicted market shares match target shares, using the contraction mapping of Berry, Levinsohn, and Pakes (1995).
mxl_blp_contraction( delta, target_shares, X, W, beta, mu, L_params, alt_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, include_outside_option = FALSE, tol = 1e-08, max_iter = 1000L )mxl_blp_contraction( delta, target_shares, X, W, beta, mu, L_params, alt_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, include_outside_option = FALSE, tol = 1e-08, max_iter = 1000L )
delta |
J-1 or J vector with initial guess for deltas (ASCs) |
target_shares |
J vector with target market shares |
X |
design matrix for fixed coefficients; sum(M_i) x K_x |
W |
design matrix for random coefficients; sum(M_i) x K_w or J x K_w |
beta |
K_x vector with fixed coefficients |
mu |
K_w vector with mean parameters (raw, will be transformed if log-normal) |
L_params |
Cholesky parameters vector |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
eta_draws |
Array with draws; K_w x S x N |
rc_dist |
K_w vector indicating distribution (0=normal, 1=log-normal) |
rc_correlation |
whether random coefficients are correlated |
rc_mean |
whether mu parameters represent means (TRUE) or are zero (FALSE) |
include_outside_option |
whether outside option is included |
tol |
convergence tolerance (default 1e-8) |
max_iter |
maximum iterations (default 1000) |
vector with converged delta (ASC) values
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) fit <- run_mxlogit(input_data = d, eta_draws = eta) pm <- fit$param_map delta <- mxl_blp_contraction(rep(0, J), rep(1/J, J), d$X, d$W, coef(fit)[pm$beta], rep(0, ncol(d$W)), coef(fit)[pm$sigma], d$alt_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), rc_correlation = FALSE, rc_mean = FALSE) deltalibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) fit <- run_mxlogit(input_data = d, eta_draws = eta) pm <- fit$param_map delta <- mxl_blp_contraction(rep(0, J), rep(1/J, J), d$X, d$W, coef(fit)[pm$beta], rep(0, ncol(d$W)), coef(fit)[pm$sigma], d$alt_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), rc_correlation = FALSE, rc_mean = FALSE) delta
Computes the matrix of attribute-based diversion ratios for a fitted
Mixed Logit model. DR(k, j) is the fraction of demand lost by alternative
j that is captured by alternative k when a marginal change in
alternative j's elast_var attribute reduces s_j.
mxl_diversion_ratios_parallel( theta, X, W, alt_idx, M, weights, eta_draws, rc_dist, elast_var_idx, is_random_coef, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )mxl_diversion_ratios_parallel( theta, X, W, alt_idx, M, weights, eta_draws, rc_dist, elast_var_idx, is_random_coef, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )
theta |
parameter vector (beta, [mu], L, delta) |
X |
design matrix for fixed coefficients; sum(M_i) x K_x |
W |
design matrix for random coefficients; sum(M_i) x K_w or J x K_w |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
eta_draws |
Array with draws; K_w x S x N |
rc_dist |
K_w vector indicating distribution (0=normal, 1=log-normal) |
elast_var_idx |
1-based index of the perturbed variable |
is_random_coef |
TRUE if the variable is in W (random coef), FALSE if in X (fixed) |
rc_correlation |
whether random coefficients are correlated |
rc_mean |
whether mu parameters are estimated |
use_asc |
whether ASCs are included |
include_outside_option |
whether outside option is included |
In MNL the per-draw realized coefficient is a constant, so it cancels in
the ratio and the result is independent of the variable chosen. In MXL,
the realized coefficient varies across individuals
and draws, so the diversion ratio depends on which attribute is perturbed.
For a variable with a fixed coefficient the dependence again vanishes
(the constant cancels); for a random-coefficient variable it does not.
J x J (or (J+1) x (J+1)) matrix of diversion ratios with zero diagonal.
Computes the aggregate elasticity matrix (weighted average of individual elasticities) for the Mixed Logit model. The elasticity E(i,j) represents the percentage change in the probability of choosing alternative i when the attribute of alternative j changes by 1%.
mxl_elasticities_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, elast_var_idx, is_random_coef, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )mxl_elasticities_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, elast_var_idx, is_random_coef, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )
theta |
parameter vector (beta, [mu], L, delta) |
X |
design matrix for fixed coefficients; sum(M_i) x K_x |
W |
design matrix for random coefficients; sum(M_i) x K_w or J x K_w |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing |
choice_idx |
N x 1 vector (kept for API consistency, not used) |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
eta_draws |
Array with draws; K_w x S x N |
rc_dist |
K_w vector indicating distribution (0=normal, 1=log-normal) |
elast_var_idx |
1-based index of the variable for elasticity computation |
is_random_coef |
TRUE if variable is in W (random coef), FALSE if in X (fixed coef) |
rc_correlation |
whether random coefficients are correlated |
rc_mean |
whether mu parameters are estimated |
use_asc |
whether ASCs are included |
include_outside_option |
whether outside option is included |
J x J matrix of aggregate elasticities
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) fit <- run_mxlogit(input_data = d, eta_draws = eta) elas <- mxl_elasticities_parallel(coef(fit), d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), elast_var_idx = 1L, is_random_coef = FALSE, rc_correlation = FALSE, rc_mean = FALSE) elaslibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) fit <- run_mxlogit(input_data = d, eta_draws = eta) elas <- mxl_elasticities_parallel(coef(fit), d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), elast_var_idx = 1L, is_random_coef = FALSE, rc_correlation = FALSE, rc_mean = FALSE) elas
Computes the Hessian of the log-likelihood for the Mixed Logit model using OpenMP for parallelization. Mirrors the parameters of mxl_loglik_gradient_parallel.
mxl_hessian_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )mxl_hessian_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )
theta |
vector collecting model parameters (beta, mu, L, delta (ASCs)) |
X |
design matrix for covariates with fixed coefficients; sum(M_i) x K_x |
W |
design matrix for covariates with random coefficients; sum(M_i) x K_w or J x K_w |
alt_idx |
sum(M) x 1 vector with indices of alternatives within each choice set; 1-based indexing |
choice_idx |
N x 1 vector with indices of chosen alternatives; 1-based indexing relative to X; 0 is used if include_outside_option=True |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
eta_draws |
Array with choice situation draws; K_w x S x N |
rc_dist |
K_w x 1 integer vector indicating distribution of random coefficients: 0 = normal, 1 = log-normal |
rc_correlation |
whether random coefficients should be correlated |
rc_mean |
whether to estimate means for random coefficients. |
use_asc |
whether to use alternative-specific constants. |
include_outside_option |
whether to include outside option normalized to 0 (if so, the outside option is not included in the data) |
Hessian evaluated at input arguments
For log-normal random coefficients (rc_dist=1) with rc_mean=TRUE, the distribution is a shifted log-normal: beta_k = exp(mu_k) + exp(L_k * eta), where exp(mu_k) shifts the location and exp(L_k * eta) ~ LogNormal(0, sigma_k^2). This differs from the textbook parameterization exp(mu_k + L_k * eta).
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) theta <- rep(0, ncol(d$X) + ncol(d$W) + nrow(d$alt_mapping) - 1) H <- mxl_hessian_parallel(theta, d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), rc_correlation = FALSE, rc_mean = FALSE) dim(H)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) theta <- rep(0, ncol(d$X) + ncol(d$W) + nrow(d$alt_mapping) - 1) H <- mxl_hessian_parallel(theta, d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, ncol(d$W)), rc_correlation = FALSE, rc_mean = FALSE) dim(H)
Computes the log-likelihood and its gradient for the Mixed Logit model using OpenMP for parallelization. Allows for inclusion of alternative-specific constants, outside option, observation weights, correlated random coefficients.
mxl_loglik_gradient_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )mxl_loglik_gradient_parallel( theta, X, W, alt_idx, choice_idx, M, weights, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )
theta |
vector collecting model parameters (beta, mu, L, delta (ASCs)) |
X |
design matrix for covariates with fixed coefficients; sum(M_i) x K_x |
W |
design matrix for covariates with random coefficients; sum(M_i) x K_w or J x K_w |
alt_idx |
sum(M) x 1 vector with indices of alternatives within each choice set; 1-based indexing |
choice_idx |
N x 1 vector with indices of chosen alternatives; 1-based indexing relative to X; 0 is used if include_outside_option=True |
M |
N x 1 vector with number of alternatives for each individual |
weights |
N x 1 vector with weights for each observation |
eta_draws |
Array with choice situation draws; K_w x S x N |
rc_dist |
K_w x 1 integer vector indicating distribution of random coefficients: 0 = normal, 1 = log-normal |
rc_correlation |
whether random coefficients should be correlated |
rc_mean |
whether to estimate means for random coefficients. If so, mean parameters (mu) should be included in theta after beta parameters. |
use_asc |
whether to use alternative-specific constants. If so, parameters should be included in theta after beta and L (and mu, if applicable). |
include_outside_option |
whether to include outside option normalized to 0 (if so, the outside option is not included in the data) |
List with loglikelihood and gradient evaluated at input arguments
For log-normal random coefficients (rc_dist=1) with rc_mean=TRUE, the distribution is a shifted log-normal: beta_k = exp(mu_k) + exp(L_k * eta), where exp(mu_k) shifts the location and exp(L_k * eta) ~ LogNormal(0, sigma_k^2). This differs from the textbook parameterization exp(mu_k + L_k * eta).
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) K_x <- ncol(d$X); K_w <- ncol(d$W); J <- nrow(d$alt_mapping) theta <- rep(0, K_x + K_w + J - 1) result <- mxl_loglik_gradient_parallel(theta, d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, K_w), rc_correlation = FALSE, rc_mean = FALSE) result$objectivelibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", "w1") eta <- get_halton_normals(50, d$N, ncol(d$W)) K_x <- ncol(d$X); K_w <- ncol(d$W); J <- nrow(d$alt_mapping) theta <- rep(0, K_x + K_w + J - 1) result <- mxl_loglik_gradient_parallel(theta, d$X, d$W, d$alt_idx, d$choice_idx, d$M, d$weights, eta, rc_dist = rep(0L, K_w), rc_correlation = FALSE, rc_mean = FALSE) result$objective
Returns the simulated choice probability for each (individual, alternative)
row of X, averaged over the supplied Halton draws. Mirrors mnl_predict.
mxl_predict( theta, X, W, alt_idx, M, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )mxl_predict( theta, X, W, alt_idx, M, eta_draws, rc_dist, rc_correlation = TRUE, rc_mean = FALSE, use_asc = TRUE, include_outside_option = FALSE )
theta |
parameter vector (beta, [mu], L, delta) |
X |
design matrix for fixed coefficients; sum(M_i) x K_x |
W |
design matrix for random coefficients; sum(M_i) x K_w or J x K_w |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing |
M |
N x 1 vector with number of alternatives for each individual |
eta_draws |
Array with draws; K_w x S x N |
rc_dist |
K_w vector indicating distribution (0=normal, 1=log-normal) |
rc_correlation |
whether random coefficients are correlated |
rc_mean |
whether mu parameters are estimated |
use_asc |
whether ASCs are included |
include_outside_option |
whether the outside option is present |
List with choice_prob (length sum(M)), utility (length sum(M),
simulated mean of the deterministic + W*gamma component), and, when
include_outside_option = TRUE, choice_prob_outside (length N).
choicer_sim objectWraps simulated data, true parameter values, and DGP settings into a
classed list. Returned by simulate_mnl_data(), simulate_mxl_data(),
and simulate_nl_data(), and consumed by recovery_table().
new_choicer_sim(data, true_params, settings, model)new_choicer_sim(data, true_params, settings, model)
data |
A |
true_params |
Named list of true DGP parameters
(e.g. |
settings |
Named list of DGP settings (e.g. |
model |
Character scalar: |
A list of class choicer_sim.
Computes the log-likelihood and its gradient for the Nested Logit model using OpenMP for parallelization. Especially handles singleton nests by fixing their lambda parameters to 1. Only non-singleton nests have a inclusive value coefficient estimated in theta.
nl_loglik_gradient_parallel( theta, X, alt_idx, choice_idx, nest_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )nl_loglik_gradient_parallel( theta, X, alt_idx, choice_idx, nest_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE )
theta |
(K + n_non_singleton_nests + n_delta) vector with model parameters.
Order: |
X |
sum(M) x K design matrix with covariates. |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing. |
choice_idx |
N x 1 vector with indices of chosen alternatives; 0 for outside option, 1-based index relative to rows in X_i otherwise. |
nest_idx |
J x 1 vector with indices of nests for each alternative; 1-based indexing (1 to n_nests). |
M |
N x 1 vector with number of alternatives for each individual. |
weights |
N x 1 vector with weights for each observation. |
use_asc |
whether to use alternative-specific constants. |
include_outside_option |
whether to include outside option normalized to V=0, lambda=1. |
List with loglikelihood and gradient evaluated at input arguments
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_nl_data(dt, "id", "alt", "choice", c("x1", "x2"), "nest") K_x <- ncol(d$X); K_l <- length(unique(d$nest_idx)) theta <- c(rep(0, K_x), rep(0.5, K_l), rep(0, J - 1)) result <- nl_loglik_gradient_parallel(theta, d$X, d$alt_idx, d$choice_idx, d$nest_idx, d$M, d$weights) result$objectivelibrary(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_nl_data(dt, "id", "alt", "choice", c("x1", "x2"), "nest") K_x <- ncol(d$X); K_l <- length(unique(d$nest_idx)) theta <- c(rep(0, K_x), rep(0.5, K_l), rep(0, J - 1)) result <- nl_loglik_gradient_parallel(theta, d$X, d$alt_idx, d$choice_idx, d$nest_idx, d$M, d$weights) result$objective
Numerical Hessian of the log-likelihood via finite differences
nl_loglik_numeric_hessian( theta, X, alt_idx, choice_idx, nest_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE, eps = 1e-06 )nl_loglik_numeric_hessian( theta, X, alt_idx, choice_idx, nest_idx, M, weights, use_asc = TRUE, include_outside_option = FALSE, eps = 1e-06 )
theta |
(K + n_delta + n_nests) vector with model parameters.
Order: |
X |
sum(M) x K design matrix with covariates. |
alt_idx |
sum(M) x 1 vector with indices of alternatives; 1-based indexing. |
choice_idx |
N x 1 vector with indices of chosen alternatives; 0 for outside option, 1-based index relative to rows in X_i otherwise. |
nest_idx |
J x 1 vector with indices of nests for each alternative; 1-based indexing (1 to n_nests). |
M |
N x 1 vector with number of alternatives for each individual. |
weights |
N x 1 vector with weights for each observation. |
use_asc |
whether to use alternative-specific constants. |
include_outside_option |
whether to include outside option normalized to V=0, lambda=1. |
eps |
finite difference step size |
Hessian evaluated at input arguments
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_nl_data(dt, "id", "alt", "choice", c("x1", "x2"), "nest") K_x <- ncol(d$X); K_l <- length(unique(d$nest_idx)) theta <- c(rep(0, K_x), rep(0.5, K_l), rep(0, J - 1)) H <- nl_loglik_numeric_hessian(theta, d$X, d$alt_idx, d$choice_idx, d$nest_idx, d$M, d$weights) dim(H)library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] d <- prepare_nl_data(dt, "id", "alt", "choice", c("x1", "x2"), "nest") K_x <- ncol(d$X); K_l <- length(unique(d$nest_idx)) theta <- c(rep(0, K_x), rep(0.5, K_l), rep(0, J - 1)) H <- nl_loglik_numeric_hessian(theta, d$X, d$alt_idx, d$choice_idx, d$nest_idx, d$M, d$weights) dim(H)
Extract number of observations from a choicer_fit object
## S3 method for class 'choicer_fit' nobs(object, ...)## S3 method for class 'choicer_fit' nobs(object, ...)
object |
A choicer_fit object. |
... |
Additional arguments (ignored). |
Integer number of choice situations.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) nobs(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) nobs(fit)
Computes choice probabilities or aggregate market shares.
## S3 method for class 'choicer_mnl' predict(object, type = c("probabilities", "shares"), ...)## S3 method for class 'choicer_mnl' predict(object, type = c("probabilities", "shares"), ...)
object |
A choicer_mnl object. |
type |
One of "probabilities" (individual-level choice probabilities) or "shares" (aggregate market shares). |
... |
Additional arguments (ignored). |
For "probabilities": a list with choice_prob and utility vectors.
For "shares": a named numeric vector of market shares per alternative.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) predict(fit, type = "shares") predict(fit, type = "probabilities")library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) predict(fit, type = "shares") predict(fit, type = "probabilities")
Computes simulated choice probabilities or aggregate market shares using stored Halton draws.
## S3 method for class 'choicer_mxl' predict(object, type = c("probabilities", "shares"), ...)## S3 method for class 'choicer_mxl' predict(object, type = c("probabilities", "shares"), ...)
object |
A choicer_mxl object. |
type |
Either "probabilities" (per-observation simulated choice probabilities) or "shares" (aggregate simulated market shares). |
... |
Additional arguments (ignored). |
For "probabilities": a list with choice_prob and utility
vectors averaged across simulation draws. For "shares": a named numeric
vector of simulated market shares per alternative.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) predict(fit, type = "shares") predict(fit, type = "probabilities")library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) predict(fit, type = "shares") predict(fit, type = "probabilities")
mnl_loglik_gradient_parallel()
Prepares and validates inputs for multinomial logit estimation routine.
prepare_mnl_data( data, id_col, alt_col, choice_col, covariate_cols, weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE )prepare_mnl_data( data, id_col, alt_col, choice_col, covariate_cols, weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE )
data |
Data frame containing choice data. |
id_col |
Name of the column identifying choice situations (individuals). |
alt_col |
Name of the column identifying alternatives. |
choice_col |
Name of the column indicating chosen alternative (1 = chosen, 0 = not chosen). |
covariate_cols |
Vector of names of columns to be used as covariates. |
weights |
Optional vector of weights for each choice situation. If |
outside_opt_label |
Label for the outside option (if any). If |
include_outside_option |
Logical indicating whether to include an outside option in the model. |
A list containing:
X: Design matrix (sum(M) x K).
alt_idx: Integer vector of alternative indices.
choice_idx: Integer vector of chosen alternative indices.
M: Integer vector with number of alternatives per choice situation.
N: Number of choice situations.
weights: Vector of weights.
include_outside_option: Logical flag.
alt_mapping: Data.table mapping alternatives to summary statistics.
dropped_cols: Names of columns dropped due to collinearity, if any.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] input <- prepare_mnl_data(dt, "id", "alt", "choice", c("x1", "x2")) str(input$X) input$alt_mappinglibrary(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] input <- prepare_mnl_data(dt, "id", "alt", "choice", c("x1", "x2")) str(input$X) input$alt_mapping
mxl_loglik_gradient_parallel()
Prepares and validates inputs for mixed logit estimation routine.
prepare_mxl_data( data, id_col, alt_col, choice_col, covariate_cols, random_var_cols, weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, rc_correlation = FALSE )prepare_mxl_data( data, id_col, alt_col, choice_col, covariate_cols, random_var_cols, weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, rc_correlation = FALSE )
data |
Data frame containing choice data |
id_col |
Name of the column identifying choice situations (individuals) |
alt_col |
Name of the column identifying alternatives |
choice_col |
Name of the column indicating chosen alternative (1 = chosen, 0 = not chosen) |
covariate_cols |
Vector of names of columns to be used as covariates |
random_var_cols |
Vector of names of columns to be used as random variables |
weights |
Optional vector of weights for each choice situation. If NULL, equal weights are used. |
outside_opt_label |
Label for the outside option (if any). If NULL, no outside option is assumed. |
include_outside_option |
Logical indicating whether to include an outside option in the model. |
rc_correlation |
Logical indicating whether random coefficients are correlated. Default is FALSE. |
A choicer_data_mxl object (list) containing:
X: Fixed-coefficient design matrix (sum(M) x K_x).
W: Random-coefficient design matrix (sum(M) x K_w).
alt_idx: Integer vector of alternative indices.
choice_idx: Integer vector of chosen alternative indices.
M: Integer vector with number of alternatives per choice situation.
N: Number of choice situations.
weights: Vector of weights.
include_outside_option: Logical flag.
rc_correlation: Logical flag.
alt_mapping: data.table mapping alternatives to summary statistics.
dropped_cols: Names of columns dropped due to collinearity, if any.
data_spec: List with column-name metadata.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N), w2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] input <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", c("w1", "w2")) str(input$X) str(input$W)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N), w2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] input <- prepare_mxl_data(dt, "id", "alt", "choice", "x1", c("w1", "w2")) str(input$X) str(input$W)
Validates inputs, builds design matrices, and constructs nest structure
for nested logit estimation. Calls prepare_mnl_data internally
for base data preparation, then adds nest-specific fields.
prepare_nl_data( data, id_col, alt_col, choice_col, covariate_cols, nest_col, weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE )prepare_nl_data( data, id_col, alt_col, choice_col, covariate_cols, nest_col, weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE )
data |
Data frame containing choice data. |
id_col |
Name of the column identifying choice situations (individuals). |
alt_col |
Name of the column identifying alternatives. |
choice_col |
Name of the column indicating chosen alternative (1 = chosen, 0 = not chosen). |
covariate_cols |
Vector of names of columns to be used as covariates. |
nest_col |
Name of the column mapping each alternative to its nest. Every alternative must belong to exactly one nest. |
weights |
Optional vector of weights for each choice situation. If |
outside_opt_label |
Label for the outside option (if any). If |
include_outside_option |
Logical indicating whether to include an outside option in the model. |
A choicer_data_nl object (list) containing:
All fields from prepare_mnl_data (X, alt_idx,
choice_idx, M, N, weights, include_outside_option,
alt_mapping, dropped_cols).
nest_idx: Integer vector of length J mapping each alternative
(in alt_mapping row order) to its nest.
data_spec: List with column name metadata including nest_col.
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] input <- prepare_nl_data(dt, "id", "alt", "choice", c("x1", "x2"), "nest") input$nest_idx input$alt_mappinglibrary(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] input <- prepare_nl_data(dt, "id", "alt", "choice", c("x1", "x2"), "nest") input$nest_idx input$alt_mapping
Prints a brief summary of the fitted model.
## S3 method for class 'choicer_fit' print(x, ...)## S3 method for class 'choicer_fit' print(x, ...)
x |
A choicer_fit object. |
... |
Additional arguments (ignored). |
The object invisibly.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) print(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) print(fit)
Print summary for multinomial logit model
## S3 method for class 'summary.choicer_mnl' print(x, ...)## S3 method for class 'summary.choicer_mnl' print(x, ...)
x |
A summary.choicer_mnl object. |
... |
Additional arguments (ignored). |
The object invisibly.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) print(summary(fit))library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) print(summary(fit))
Print summary for mixed logit model
## S3 method for class 'summary.choicer_mxl' print(x, ...)## S3 method for class 'summary.choicer_mxl' print(x, ...)
x |
A summary.choicer_mxl object. |
... |
Additional arguments (ignored). |
The object invisibly.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) print(summary(fit))library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) print(summary(fit))
Print summary for nested logit model
## S3 method for class 'summary.choicer_nl' print(x, ...)## S3 method for class 'summary.choicer_nl' print(x, ...)
x |
A summary.choicer_nl object. |
... |
Additional arguments (ignored). |
The object invisibly.
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), nest_col = "nest" ) print(summary(fit))library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), nest_col = "nest" ) print(summary(fit))
Compares fitted coefficients to a set of true parameter values on the same scale as the estimator's internal parameterization. Returns one row per estimated parameter with true value, estimate, standard error, bias, relative bias (%), z-score against the truth, Wald CI, and a coverage indicator.
recovery_table(object, truth = NULL, level = 0.95, ...) ## S3 method for class 'choicer_fit' recovery_table(object, truth = NULL, level = 0.95, ...) ## S3 method for class 'choicer_mc' recovery_table(object, truth = NULL, level = 0.95, ...)recovery_table(object, truth = NULL, level = 0.95, ...) ## S3 method for class 'choicer_fit' recovery_table(object, truth = NULL, level = 0.95, ...) ## S3 method for class 'choicer_mc' recovery_table(object, truth = NULL, level = 0.95, ...)
object |
A |
truth |
Either a |
level |
Confidence level for the Wald CI and coverage indicator.
Default |
... |
Unused. |
For MXL fits the sigma block compares the raw Cholesky parameters
(L_params), not the reconstructed covariance matrix. For log-normal
random-coefficient means the raw mu estimate is compared directly; callers
who want recovery on the DGP scale (exp(mu)) should transform both sides
before calling.
When the estimator has normalized the first inside alternative's ASC to
zero (which happens for MNL/MXL with include_outside_option = FALSE and
no outside option baked into the fit), the first entry of truth$delta is
dropped before the comparison so lengths match.
See class-specific methods.
recovery_table(choicer_fit): Returns a choicer_recovery object (a
data.table) with columns parameter, group, true, estimate,
se, bias, rel_bias_pct, z_vs_true, lower_ci, upper_ci,
covers.
recovery_table(choicer_mc): For a choicer_mc object, delegates to
summary(object, level) and returns a choicer_mc_summary. Inspect
object$replications directly for per-rep detail.
sim <- simulate_mnl_data(N = 2000, J = 4, seed = 123) fit <- run_mnlogit( data = sim$data, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), outside_opt_label = 0L, include_outside_option = FALSE, use_asc = TRUE ) recovery_table(fit, sim)sim <- simulate_mnl_data(N = 2000, J = 4, seed = 123) fit <- run_mnlogit( data = sim$data, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), outside_opt_label = 0L, include_outside_option = FALSE, use_asc = TRUE ) recovery_table(fit, sim)
Estimates a multinomial logit model via maximum likelihood.
run_mnlogit( data = NULL, id_col = NULL, alt_col = NULL, choice_col = NULL, covariate_cols = NULL, input_data = NULL, optimizer = NULL, control = list(), weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, use_asc = TRUE, keep_data = TRUE, nloptr_opts = NULL )run_mnlogit( data = NULL, id_col = NULL, alt_col = NULL, choice_col = NULL, covariate_cols = NULL, input_data = NULL, optimizer = NULL, control = list(), weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, use_asc = TRUE, keep_data = TRUE, nloptr_opts = NULL )
data |
Data frame containing choice data (convenience workflow).
Mutually exclusive with |
id_col |
Name of the column identifying choice situations (individuals). |
alt_col |
Name of the column identifying alternatives. |
choice_col |
Name of the column indicating chosen alternative (1 = chosen, 0 = not chosen). |
covariate_cols |
Vector of names of columns to be used as covariates. |
input_data |
List output from |
optimizer |
Optimizer to use: |
control |
List of optimizer-specific control parameters passed to the
chosen optimizer (e.g., |
weights |
Optional vector of weights for each choice situation. If |
outside_opt_label |
Label for the outside option (if any). If |
include_outside_option |
Logical indicating whether to include an outside option in the model. |
use_asc |
Logical indicating whether to include alternative-specific constants (ASCs) in the model. |
keep_data |
Logical. If |
nloptr_opts |
Deprecated. Use |
Two workflows are supported:
Supply data and column names. Data
preparation (prepare_mnl_data) is handled automatically.
Call prepare_mnl_data yourself and pass the
result via input_data.
A choicer_mnl object (inherits from choicer_fit).
Standard S3 methods available: summary(), coef(), vcov(),
logLik(), AIC(), BIC(), nobs(), predict().
library(data.table) set.seed(42) N <- 100; J <- 3; beta_true <- c(1.0, -0.5) dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, V := drop(as.matrix(.SD) %*% beta_true), .SDcols = c("x1","x2")] dt[, prob := exp(V) / sum(exp(V)), by = id] dt[, choice := as.integer(alt == sample(alt, 1, prob = prob)), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) summary(fit) coef(fit) AIC(fit) predict(fit, type = "shares")library(data.table) set.seed(42) N <- 100; J <- 3; beta_true <- c(1.0, -0.5) dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, V := drop(as.matrix(.SD) %*% beta_true), .SDcols = c("x1","x2")] dt[, prob := exp(V) / sum(exp(V)), by = id] dt[, choice := as.integer(alt == sample(alt, 1, prob = prob)), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) summary(fit) coef(fit) AIC(fit) predict(fit, type = "shares")
Estimates a mixed logit model via simulated maximum likelihood.
run_mxlogit( data = NULL, id_col = NULL, alt_col = NULL, choice_col = NULL, covariate_cols = NULL, random_var_cols = NULL, input_data = NULL, eta_draws = NULL, S = 100L, rc_dist = NULL, rc_mean = FALSE, rc_correlation = FALSE, use_asc = TRUE, theta_init = NULL, lower = NULL, upper = NULL, optimizer = NULL, control = list(), se_method = c("hessian", "bhhh"), scale_vars = c("none", "sd", "mad", "iqr"), weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, keep_data = TRUE, nloptr_opts = NULL )run_mxlogit( data = NULL, id_col = NULL, alt_col = NULL, choice_col = NULL, covariate_cols = NULL, random_var_cols = NULL, input_data = NULL, eta_draws = NULL, S = 100L, rc_dist = NULL, rc_mean = FALSE, rc_correlation = FALSE, use_asc = TRUE, theta_init = NULL, lower = NULL, upper = NULL, optimizer = NULL, control = list(), se_method = c("hessian", "bhhh"), scale_vars = c("none", "sd", "mad", "iqr"), weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, keep_data = TRUE, nloptr_opts = NULL )
data |
Data frame containing choice data (convenience workflow).
Mutually exclusive with |
id_col |
Name of the column identifying choice situations. |
alt_col |
Name of the column identifying alternatives. |
choice_col |
Name of the column indicating chosen alternative (1/0). |
covariate_cols |
Vector of column names for fixed covariates. |
random_var_cols |
Vector of column names for random coefficients. |
input_data |
List output from |
eta_draws |
Array of shape K_w x S x N with standard normal draws.
Required for the advanced workflow; auto-generated from |
S |
Integer number of Halton draws per individual (convenience workflow only). Default 100. |
rc_dist |
Integer vector indicating distribution of random coefficients (0 = normal, 1 = log-normal). Default: all normal. |
rc_mean |
Logical indicating whether to estimate means for random coefficients. |
rc_correlation |
Logical indicating whether random coefficients are
correlated (convenience workflow). Ignored when |
use_asc |
Logical indicating whether to include alternative-specific constants. |
theta_init |
Initial parameter vector in natural-scale units. If
|
lower, upper
|
Optional parameter bounds for the optimizer, in
natural-scale units (forward-transformed internally to scaled space when
|
optimizer |
Optimizer to use: |
control |
List of optimizer-specific control parameters. |
se_method |
Method for computing standard errors. Either
|
scale_vars |
Pre-estimation column scaling for design matrices. One of
|
weights |
Optional weight vector (convenience workflow). If |
outside_opt_label |
Label for the outside option (convenience workflow). |
include_outside_option |
Logical whether to include an outside option (convenience workflow). |
keep_data |
Logical. If |
nloptr_opts |
Deprecated. Use |
Two workflows are supported:
Supply data and column names. Data preparation
(prepare_mxl_data) and Halton draw generation
(get_halton_normals) are handled automatically.
Call prepare_mxl_data and
get_halton_normals yourself, then pass the results via
input_data and eta_draws.
A choicer_mxl object (inherits from choicer_fit).
Standard S3 methods available: summary(), coef(),
vcov(), logLik(), AIC(), BIC(),
nobs().
library(data.table) set.seed(42) N <- 100; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N), w2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = c("w1", "w2"), S = 50L ) summary(fit)library(data.table) set.seed(42) N <- 100; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N), w2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = c("w1", "w2"), S = 50L ) summary(fit)
Estimates a nested logit model via maximum likelihood.
run_nestlogit( data = NULL, id_col = NULL, alt_col = NULL, choice_col = NULL, covariate_cols = NULL, nest_col = NULL, input_data = NULL, use_asc = TRUE, theta_init = NULL, param_names = NULL, optimizer = NULL, control = list(), weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, keep_data = TRUE, nloptr_opts = NULL )run_nestlogit( data = NULL, id_col = NULL, alt_col = NULL, choice_col = NULL, covariate_cols = NULL, nest_col = NULL, input_data = NULL, use_asc = TRUE, theta_init = NULL, param_names = NULL, optimizer = NULL, control = list(), weights = NULL, outside_opt_label = NULL, include_outside_option = FALSE, keep_data = TRUE, nloptr_opts = NULL )
data |
Data frame containing choice data (convenience workflow).
Mutually exclusive with |
id_col |
Name of the column identifying choice situations. |
alt_col |
Name of the column identifying alternatives. |
choice_col |
Name of the column indicating chosen alternative (1/0). |
covariate_cols |
Vector of column names for covariates. |
nest_col |
Name of the column mapping each alternative to its nest (convenience workflow). |
input_data |
List containing prepared input data for estimation
(advanced workflow). Mutually exclusive with |
use_asc |
Logical indicating whether to include alternative specific constants (ASCs). |
theta_init |
Optional initial parameter vector. If |
param_names |
Optional vector of parameter names. If |
optimizer |
Optimizer to use: |
control |
List of optimizer-specific control parameters. |
weights |
Optional weight vector (convenience workflow). If |
outside_opt_label |
Label for the outside option (convenience workflow). |
include_outside_option |
Logical whether to include an outside option (convenience workflow). |
keep_data |
Logical. If |
nloptr_opts |
Deprecated. Use |
Two workflows are supported:
Supply data and column names (including
nest_col). Data preparation (prepare_nl_data) is
handled automatically.
Call prepare_nl_data (or build the input
list manually) and pass it via input_data.
A choicer_nl object (inherits from choicer_fit).
Standard S3 methods available: summary(), coef(),
vcov(), logLik(), AIC(), BIC(),
nobs().
library(data.table) set.seed(42) N <- 100; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), nest_col = "nest" ) summary(fit)library(data.table) set.seed(42) N <- 100; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), nest_col = "nest" ) summary(fit)
Generates synthetic choice data with i.i.d. Gumbel errors, optionally with varying choice-set sizes and an outside option (alt = 0). Choices are determined by argmax of utility; covariates are drawn as Uniform(-1, 1).
simulate_mnl_data( N = 5000, J = 5, beta = c(0.8, -0.6), delta = NULL, seed = 123, outside_option = TRUE, vary_choice_set = TRUE )simulate_mnl_data( N = 5000, J = 5, beta = c(0.8, -0.6), delta = NULL, seed = 123, outside_option = TRUE, vary_choice_set = TRUE )
N |
Number of choice situations. |
J |
Number of inside alternatives. |
beta |
Fixed coefficients for |
delta |
Alternative-specific constants for inside alternatives
(length |
seed |
Random seed. Pass |
outside_option |
Logical; if |
vary_choice_set |
Logical; if |
A choicer_sim object.
sim <- simulate_mnl_data(N = 1000, J = 5, seed = 123) print(sim)sim <- simulate_mnl_data(N = 1000, J = 5, seed = 123) print(sim)
Generates synthetic choice data with random coefficients drawn from a
multivariate normal (optionally log-normal per dimension) and an additional
mean shifter mu. Random coefficients are parameterized via the lower
Cholesky factor of Sigma. Covariates are Uniform(-1, 1) by default;
columns named in price_cols are drawn as -Uniform(0.1, 3) to mimic
strictly-negative price variables.
simulate_mxl_data( N = 5000, J = 4, beta = c(0.8, -0.6), delta = NULL, mu = NULL, Sigma = matrix(c(1, 0.5, 0.5, 1.5), nrow = 2), rc_dist = NULL, rc_correlation = NULL, price_cols = NULL, seed = 123, outside_option = TRUE, vary_choice_set = TRUE )simulate_mxl_data( N = 5000, J = 4, beta = c(0.8, -0.6), delta = NULL, mu = NULL, Sigma = matrix(c(1, 0.5, 0.5, 1.5), nrow = 2), rc_dist = NULL, rc_correlation = NULL, price_cols = NULL, seed = 123, outside_option = TRUE, vary_choice_set = TRUE )
N |
Number of choice situations. |
J |
Number of inside alternatives. |
beta |
Fixed coefficients for |
delta |
ASCs for inside alternatives (length |
mu |
Mean shifter for random coefficients (length |
Sigma |
Covariance matrix of random coefficients (square, |
rc_dist |
Integer vector (length |
rc_correlation |
Logical; if |
price_cols |
Character vector of |
seed |
Random seed ( |
outside_option |
Logical; include outside option with |
vary_choice_set |
Logical; if |
Random coefficients are constructed to match the estimator's
parameterization in src/mxlogit.cpp. For every dimension the raw draw
is L %*% eta where eta ~ N(0, I). A normal random coefficient
(rc_dist = 0) is then gamma_k = mu_k + (L %*% eta)_k. A log-normal
random coefficient (rc_dist = 1) follows the shifted log-normal
beta_k = exp(mu_k) + exp((L %*% eta)_k) – not the textbook
exp(mu_k + sigma_k * eta) – so mu_k in true_params$mu is on the
same scale the estimator recovers and recovery_table() can compare
like-for-like.
A choicer_sim object. true_params includes beta, delta,
Sigma, L_params (packed Cholesky parameters), mu, rc_dist,
rc_correlation.
sim <- simulate_mxl_data(N = 1000, J = 4, seed = 123) print(sim)sim <- simulate_mxl_data(N = 1000, J = 4, seed = 123) print(sim)
Generates synthetic choice data with nested logit probabilities computed
analytically (log-sum-exp over inclusive values), then samples choices from
the implied multinomial. The outside option (j = 0) sits in a singleton
nest with lambda = 1.
simulate_nl_data( N = 10000, beta = c(1.5, -0.8), delta = c(`1` = 0.5, `2` = 0.3, `3` = -0.2, `4` = -0.5, `5` = 0.4), nests = list(c(1, 2), c(3, 4, 5)), lambdas = c(0.8, 0.2), seed = 123 )simulate_nl_data( N = 10000, beta = c(1.5, -0.8), delta = c(`1` = 0.5, `2` = 0.3, `3` = -0.2, `4` = -0.5, `5` = 0.4), nests = list(c(1, 2), c(3, 4, 5)), lambdas = c(0.8, 0.2), seed = 123 )
N |
Number of choice situations. |
beta |
Fixed coefficients for covariates |
delta |
Named numeric vector of ASCs for inside alternatives. |
nests |
List of integer vectors defining nest membership for inside alternatives. |
lambdas |
Numeric vector of dissimilarity parameters, one per nest. |
seed |
Random seed ( |
A choicer_sim object. true_params includes beta, delta,
lambdas; settings includes the nest_structure. The returned
data retains a nest column (integer, with 0L for the outside
option) for convenient use with run_nestlogit().
Unlike simulate_mnl_data() and simulate_mxl_data(), this
function does not expose outside_option or vary_choice_set flags.
The outside option (j = 0) is always present as a singleton nest with
lambda = 1, and every individual faces the full set of inside
alternatives. Add these flags if downstream use cases need them.
sim <- simulate_nl_data(N = 2000, seed = 123) print(sim)sim <- simulate_nl_data(N = 2000, seed = 123) print(sim)
Computes and returns a coefficient summary table with standard errors, z-values, p-values, and significance codes. Triggers lazy Hessian computation if standard errors have not been computed yet.
## S3 method for class 'choicer_mnl' summary(object, ...)## S3 method for class 'choicer_mnl' summary(object, ...)
object |
A choicer_mnl object. |
... |
Additional arguments (ignored). |
A summary.choicer_mnl object (list with coefficients table and metadata).
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) summary(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) summary(fit)
Computes coefficient summary with delta-method transformation for variance parameters (Cholesky to covariance scale) and log-normal mean parameters. Triggers lazy Hessian computation if standard errors have not been computed yet.
## S3 method for class 'choicer_mxl' summary(object, ...)## S3 method for class 'choicer_mxl' summary(object, ...)
object |
A choicer_mxl object. |
... |
Additional arguments (ignored). |
A summary.choicer_mxl object.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) summary(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), w1 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mxlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = "x1", random_var_cols = "w1", S = 50L ) summary(fit)
Triggers lazy Hessian computation if standard errors have not been computed yet.
## S3 method for class 'choicer_nl' summary(object, ...)## S3 method for class 'choicer_nl' summary(object, ...)
object |
A choicer_nl object. |
... |
Additional arguments (ignored). |
A summary.choicer_nl object.
library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), nest_col = "nest" ) summary(fit)library(data.table) set.seed(42) N <- 50; J <- 4 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, nest := ifelse(alt <= 2, "A", "B")] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_nestlogit( data = dt, id_col = "id", alt_col = "alt", choice_col = "choice", covariate_cols = c("x1", "x2"), nest_col = "nest" ) summary(fit)
Triggers lazy Hessian computation if vcov has not been computed yet.
## S3 method for class 'choicer_fit' vcov(object, ...)## S3 method for class 'choicer_fit' vcov(object, ...)
object |
A choicer_fit object. |
... |
Additional arguments (ignored). |
Named variance-covariance matrix, or NULL if unavailable.
library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) vcov(fit)library(data.table) set.seed(42) N <- 50; J <- 3 dt <- data.table(id = rep(1:N, each = J), alt = rep(1:J, N)) dt[, `:=`(x1 = rnorm(.N), x2 = rnorm(.N))] dt[, choice := 0L] dt[, choice := sample(c(1L, rep(0L, J - 1))), by = id] fit <- run_mnlogit(dt, "id", "alt", "choice", c("x1", "x2")) vcov(fit)