Worked Example: Complete Analysis

Scenario

We have IPD from a trial of Drug A (index treatment, N=500) and published AgD from a trial of Drug B (comparator, N=400). We want to estimate the treatment effect of Drug A vs Drug B on a binary endpoint.

Two binary covariates (age group and sex) are available in both trials but their distributions differ:

  • Drug A trial: 40% elderly, 55% female
  • Drug B trial: 35% elderly, 50% female

Step 1: Simulate example data

library(mlumr)
set.seed(2026)

# --- IPD for Drug A ---
n_A <- 500
age_A <- rbinom(n_A, 1, 0.40)
sex_A <- rbinom(n_A, 1, 0.55)

# True model: logit(p) = -0.5 + 0.8*age - 0.3*sex
logit_p_A <- -0.5 + 0.8 * age_A - 0.3 * sex_A
y_A <- rbinom(n_A, 1, plogis(logit_p_A))

ipd_df <- data.frame(
  trt = "Drug_A",
  outcome = y_A,
  age_group = age_A,
  sex = sex_A
)

# --- AgD for Drug B ---
# True comparator model: logit(p) = -0.8 + 0.8*age - 0.3*sex
# (same covariate effects, different intercept)
n_B <- 400
age_B <- rbinom(n_B, 1, 0.35)
sex_B <- rbinom(n_B, 1, 0.50)
logit_p_B <- -0.8 + 0.8 * age_B - 0.3 * sex_B
y_B <- rbinom(n_B, 1, plogis(logit_p_B))

agd_df <- data.frame(
  trt = "Drug_B",
  n_total = n_B,
  n_events = sum(y_B),
  age_group_mean = mean(age_B),
  sex_mean = mean(sex_B)
)

Step 2: Prepare data objects

ipd <- set_ipd(ipd_df, treatment = "trt", outcome = "outcome",
               covariates = c("age_group", "sex"))

agd <- set_agd(agd_df, treatment = "trt",
               outcome_n = "n_total", outcome_r = "n_events",
               cov_means = c("age_group_mean", "sex_mean"),
               cov_types = c("binary", "binary"))

dat <- combine_data(ipd, agd)
print(dat)
#> Unanchored Comparison Data (Binary)
#> ====================================
#> 
#> Index treatment (IPD): Drug_A 
#>   N = 500 
#>   Events = 205 (41.0%) 
#> 
#> Comparator treatment (AgD): Drug_B 
#>   N = 400 
#>   Events = 145 (36.2%) 
#> 
#> Covariates ( 2 ): age_group, sex 
#> Integration points: not yet added (use add_integration())

Step 3: Add integration points

dat <- add_integration(
  dat,
  n_int = 64,
  age_group = distr(qbern, prob = age_group_mean),
  sex = distr(qbern, prob = sex_mean)
)

check_integration(
  dat,
  age_group = distr(qbern, prob = age_group_mean),
  sex = distr(qbern, prob = sex_mean)
)
#> Integration check: n_int = 64 vs 128
#> Marginals -- max relative difference: 0.0154
#> CAUTION: 1-5%% marginal relative difference. Consider increasing n_int.
#> Joint -- max |cor(current) - cor(doubled)|: 0.0216
#> OK joint: pairwise correlations agree within 0.05.

Step 4: Naive estimate (benchmark)

naive_result <- naive(dat)
print(naive_result)
#> Naive Unadjusted Indirect Comparison
#> =====================================
#> 
#> Treatments: Drug_A vs Drug_B 
#> 
#> Event rates:
#>   Index (IPD):      0.410 (205/500)
#>   Comparator (AgD): 0.362 (145/400)
#> 
#> Log Odds Ratio: 0.2006 (SE: 0.1382)
#> 95% CI: [-0.0702, 0.4713]

Step 5: STC estimate

stc_result <- stc(dat)
print(stc_result)
#> Simulated Treatment Comparison (G-computation)
#> ===============================================
#> 
#> Treatments: Drug_A vs Drug_B 
#> 
#> Marginalized P(Y=1|index trt, comp pop): 0.4006
#> Observed P(Y=1|comp trt, comp pop):      0.3625
#> 
#> Log Odds Ratio: 0.1614 (SE: 0.1375)
#> 95% CI: [-0.1081, 0.4308]
#> 
#> Outcome model coefficients:
#> (Intercept)   age_group         sex 
#>     -0.6545      0.8477     -0.1063

Step 6: ML-UMR SPFA model

fit_spfa <- mlumr(
  dat,
  model = "spfa",
  prior_intercept = prior_normal(0, 10),
  prior_beta = prior_normal(0, 2.5),
  chains = 2,
  iter = 500,
  warmup = 250,
  seed = 42,
  refresh = 0,
  verbose = FALSE
)

summary(fit_spfa)
#> ML-UMR Model Summary
#> ====================
#> 
#> Model: SPFA 
#> Family: Binary 
#> Link: logit 
#> Engine: rstan 
#> Treatments: Drug_A (IPD) vs Drug_B (AgD)
#> 
#> MCMC Diagnostics:
#>   Divergent transitions: 0 
#>   Max treedepth hits: 0 
#>   Max Rhat: 1.012 
#>   Min ESS: 162 
#> 
#> Intercepts (logit scale):
#>       variable       mean        sd      2.5%      97.5%      Rhat
#>       mu_index -0.6573106 0.1672270 -0.993143 -0.3624862 0.9980482
#>  mu_comparator -0.8191280 0.1663414 -1.147012 -0.5007663 0.9974884
#> 
#> Regression Coefficients:
#>  variable       mean        sd       2.5%     97.5%      Rhat
#>   beta[1]  0.8449056 0.2094787  0.4088864 1.2662142 1.0004707
#>   beta[2] -0.1003458 0.1869238 -0.4636678 0.2604309 0.9971257
#> 
#> Marginal Treatment Effects:
#>   Log Odds Ratios:
#>        variable      mean        sd       2.5%     97.5%
#>       lor_index 0.1549118 0.1468017 -0.1146808 0.4309553
#>  lor_comparator 0.1552103 0.1470833 -0.1149582 0.4317486
#>   Risk Differences:
#>       variable       mean         sd        2.5%     97.5%
#>       rd_index 0.03672208 0.03482325 -0.02764441 0.1012399
#>  rd_comparator 0.03647554 0.03459011 -0.02745721 0.1003960
#>   Risk Ratios:
#>       variable     mean         sd      2.5%    97.5%
#>       rr_index 1.103616 0.09843349 0.9332598 1.299461
#>  rr_comparator 1.105498 0.10023193 0.9318383 1.305447
prior_summary(fit_spfa)
#> Priors for ML-UMR Fit
#> =====================
#> 
#> Intercepts (mu_index, mu_comparator):
#>   normal(0, 10)
#> 
#> Regression coefficients (beta):
#>   normal(0, 2.5) applied to all 2 covariate(s)

Step 7: ML-UMR Relaxed model

fit_relaxed <- mlumr(
  dat,
  model = "relaxed",
  prior_intercept = prior_normal(0, 10),
  prior_beta = prior_normal(0, 2.5),
  chains = 2,
  iter = 500,
  warmup = 250,
  seed = 43,
  refresh = 0,
  verbose = FALSE
)

summary(fit_relaxed)
#> ML-UMR Model Summary
#> ====================
#> 
#> Model: Relaxed SPFA 
#> Family: Binary 
#> Link: logit 
#> Engine: rstan 
#> Treatments: Drug_A (IPD) vs Drug_B (AgD)
#> 
#> MCMC Diagnostics:
#>   Divergent transitions: 0 
#>   Max treedepth hits: 0 
#>   Max Rhat: 1.02 
#>   Min ESS: 127 
#> 
#> Intercepts (logit scale):
#>       variable       mean        sd       2.5%      97.5%     Rhat
#>       mu_index -0.6562519 0.1493087 -0.9388758 -0.3481726 1.008566
#>  mu_comparator -1.0313349 1.6701852 -5.0894368  1.5675868 1.020051
#> 
#> Regression Coefficients:
#>            variable        mean        sd       2.5%    97.5%      Rhat
#>       beta_index[1]  0.84950025 0.1844836  0.4681174 1.193203 1.0095267
#>       beta_index[2] -0.10252807 0.1921008 -0.4724204 0.271530 0.9980625
#>  beta_comparator[1]  0.18564770 3.0293074 -5.5148910 6.339047 1.0153345
#>  beta_comparator[2] -0.03699286 2.7624686 -5.0610367 5.896943 1.0152323
#> 
#> Marginal Treatment Effects:
#>   Log Odds Ratios:
#>        variable      mean        sd        2.5%     97.5%
#>       lor_index 0.1982402 0.1717244 -0.13732936 0.5162436
#>  lor_comparator 0.1693042 0.1384514 -0.08446908 0.4243810
#>   Risk Differences:
#>       variable       mean         sd        2.5%      97.5%
#>       rd_index 0.04648488 0.04022736 -0.03330952 0.12038267
#>  rd_comparator 0.03974320 0.03243848 -0.02001641 0.09903161
#>   Risk Ratios:
#>       variable     mean        sd      2.5%    97.5%
#>       rr_index 1.137488 0.1214905 0.9227139 1.384064
#>  rr_comparator 1.114996 0.0966858 0.9499426 1.305215
prior_summary(fit_relaxed)
#> Priors for ML-UMR Fit
#> =====================
#> 
#> Intercepts (mu_index, mu_comparator):
#>   normal(0, 10)
#> 
#> Regression coefficients (beta):
#>   normal(0, 2.5) applied to all 2 covariate(s)

Step 8: Model comparison

compare_models(fit_spfa, fit_relaxed)
#> 
#> Model Comparison (DIC)
#> ======================
#> 
#>         Model    DIC   pD Delta_DIC
#>  Relaxed SPFA 670.33 3.82      0.00
#>          SPFA 672.56 5.46      2.23
#> 
#> Lower DIC = better fit. Delta_DIC > 5 is a rough heuristic for
#> meaningful difference, not a formally calibrated threshold.
#> DIC should not be the sole basis for model selection.

Step 9: Extract and compare results

# ML-UMR marginal effects
me_spfa <- marginal_effects(fit_spfa, effect = "lor")
me_relaxed <- marginal_effects(fit_relaxed, effect = "lor")

# Build comparison table
results <- data.frame(
  Method = c("Naive", "STC",
             "ML-UMR SPFA (Index)", "ML-UMR SPFA (Comparator)",
             "ML-UMR Relaxed (Index)", "ML-UMR Relaxed (Comparator)"),
  LOR = c(
    naive_result$link_effect,
    stc_result$link_effect,
    me_spfa$mean[me_spfa$population == "Index"],
    me_spfa$mean[me_spfa$population == "Comparator"],
    me_relaxed$mean[me_relaxed$population == "Index"],
    me_relaxed$mean[me_relaxed$population == "Comparator"]
  ),
  SE = c(
    naive_result$se,
    stc_result$se,
    me_spfa$sd[me_spfa$population == "Index"],
    me_spfa$sd[me_spfa$population == "Comparator"],
    me_relaxed$sd[me_relaxed$population == "Index"],
    me_relaxed$sd[me_relaxed$population == "Comparator"]
  )
)
results$CI_lower <- c(
  naive_result$ci_lower,
  stc_result$ci_lower,
  me_spfa$q2.5[me_spfa$population == "Index"],
  me_spfa$q2.5[me_spfa$population == "Comparator"],
  me_relaxed$q2.5[me_relaxed$population == "Index"],
  me_relaxed$q2.5[me_relaxed$population == "Comparator"]
)
results$CI_upper <- c(
  naive_result$ci_upper,
  stc_result$ci_upper,
  me_spfa$q97.5[me_spfa$population == "Index"],
  me_spfa$q97.5[me_spfa$population == "Comparator"],
  me_relaxed$q97.5[me_relaxed$population == "Index"],
  me_relaxed$q97.5[me_relaxed$population == "Comparator"]
)

print(results, digits = 3)
#>                        Method   LOR    SE CI_lower CI_upper
#> 1                       Naive 0.201 0.138  -0.0702    0.471
#> 2                         STC 0.161 0.137  -0.1081    0.431
#> 3         ML-UMR SPFA (Index) 0.155 0.147  -0.1147    0.431
#> 4    ML-UMR SPFA (Comparator) 0.155 0.147  -0.1150    0.432
#> 5      ML-UMR Relaxed (Index) 0.198 0.172  -0.1373    0.516
#> 6 ML-UMR Relaxed (Comparator) 0.169 0.138  -0.0845    0.424

Step 10: Predicted probabilities

# Predicted event probabilities for each treatment in each population
preds <- predict(fit_spfa, population = "both")
print(preds)
#>   treatment population      mean         sd      q2.5       q50     q97.5
#> 1    Drug_A      Index 0.4102872 0.02283798 0.3675360 0.4093954 0.4562505
#> 2    Drug_B      Index 0.3735651 0.02586970 0.3255580 0.3729447 0.4283093
#> 3    Drug_A Comparator 0.4008872 0.02288502 0.3586225 0.4001601 0.4473848
#> 4    Drug_B Comparator 0.3644116 0.02535368 0.3178323 0.3633472 0.4178746

Step 11: Conditional effects at covariate profiles

Population-level effects average over the covariate distribution. Conditional effects evaluate the treatment effect at specific covariate values.

profiles <- data.frame(
  age_group = c(0, 1),
  sex = c(0, 1)
)

conditional_predict(fit_spfa, newdata = profiles)
#>   profile treatment      mean         sd      q2.5       q50     q97.5
#> 1       1    Drug_A 0.3423345 0.03734295 0.2702918 0.3421551 0.4103582
#> 2       1    Drug_B 0.3070765 0.03520549 0.2410356 0.3069501 0.3773606
#> 3       2    Drug_A 0.5216284 0.04493468 0.4240437 0.5218022 0.6099792
#> 4       2    Drug_B 0.4815580 0.05003362 0.3885295 0.4840706 0.5833880
conditional_effects(fit_spfa, newdata = profiles)
#>   profile      effect       mean         sd        q2.5        q50      q97.5
#> 1       1 LINK_EFFECT 0.16181741 0.15338960 -0.12212464 0.16266321 0.44772214
#> 2       1          RD 0.03525798 0.03340056 -0.02619135 0.03500905 0.09634859
#> 3       1          RR 1.12147972 0.11606650  0.91733844 1.11364222 1.35727577
#> 4       2 LINK_EFFECT 0.16181741 0.15338960 -0.12212464 0.16266321 0.44772214
#> 5       2          RD 0.04007038 0.03793533 -0.03013660 0.04029583 0.11128475
#> 6       2          RR 1.08829506 0.08406343  0.94509648 1.08243686 1.25568927

Interpretation

In this simulated example:

  • The true LOR is approximately logit(p_A) - logit(p_B) where treatment effects are captured by the intercept difference (0.3 on logit scale).
  • The naive estimate is biased because of covariate imbalance between populations.
  • STC partially corrects for this by adjusting via outcome regression.
  • ML-UMR SPFA provides population-specific treatment effects in both the index and comparator populations, fully accounting for covariate differences through QMC integration.
  • ML-UMR Relaxed additionally allows for effect modification, though in this simulated case (same covariate effects), SPFA and Relaxed should give similar results.