Plotting with ggalluvial
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(ggalluvial)
library(wompwomp)
set.seed(43)
df <- data.frame(
tissue = c(
"BRAIN", "BRAIN", "BRAIN",
"STOMACH", "STOMACH", "STOMACH", "STOMACH", "STOMACH", "STOMACH",
"HEART", "HEART", "HEART", "HEART", "HEART", "HEART", "HEART",
"T CELL", "T CELL",
"B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL", "B CELL"
),
cluster = c(
1, 1, 2,
1, 2, 2, 2, 2, 2,
1, 3, 3, 3, 3, 3, 3,
4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4
)
)
# preprocess (manual) and plot — unsorted + uncolored
df |>
dplyr::group_by(tissue, cluster) |>
dplyr::summarize(value = dplyr::n()) |>
dplyr::ungroup() |>
dplyr::mutate(dplyr::across(c(tissue, cluster), as.character)) |>
print() -> clus_df_gather
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by tissue and cluster.
## ℹ Output is grouped by tissue.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(tissue, cluster))` for per-operation grouping
## (`?dplyr::dplyr_by`) instead.
## # A tibble: 8 × 3
## tissue cluster value
## <chr> <chr> <int>
## 1 B CELL 4 9
## 2 BRAIN 1 2
## 3 BRAIN 2 1
## 4 HEART 1 1
## 5 HEART 3 6
## 6 STOMACH 1 1
## 7 STOMACH 2 5
## 8 T CELL 4 2
# unsorted plot
clus_df_gather |>
ggplot(aes(y = value, axis1 = tissue, axis2 = cluster)) +
# alluvia color is column1
geom_alluvium(aes(fill = tissue), width = 1/12) +
geom_stratum(aes(fill = after_stat(stratum)), width = 1/12, color = "grey") +
geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
scale_x_discrete(
limits = c("tissue", "cluster"),
expand = c(.05, .05)
)

# sort (tidy) and plot - sorted + uncolored
clus_df_gather |>
sort_to_uncross(cols = c(tissue, cluster), wt = value, options = list(weighted_metric = TRUE)) |>
print() -> clus_df_gather_sort
## # A tibble: 8 × 3
## tissue cluster value
## <fct> <fct> <int>
## 1 B CELL 4 9
## 2 BRAIN 1 2
## 3 BRAIN 2 1
## 4 HEART 1 1
## 5 HEART 3 6
## 6 STOMACH 1 1
## 7 STOMACH 2 5
## 8 T CELL 4 2
clus_df_gather_sort |>
ggplot(aes(y = value, axis1 = tissue, axis2 = cluster)) +
# alluvia color is column1
geom_alluvium(aes(fill = tissue), width = 1/12) +
geom_stratum(aes(fill = after_stat(stratum)), width = 1/12, color = "grey") +
geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
scale_x_discrete(
limits = c("tissue", "cluster"),
expand = c(.05, .05)
)

# color (tidy) and plot - sorted + colored
clus_df_gather_sort |>
get_lode_clusters(cols = c(tissue, cluster), wt = value) |>
print() -> cluster_mapping
## $cluster
## $cluster$`4`
## [1] 1
##
## $cluster$`1`
## [1] 2
##
## $cluster$`2`
## [1] 4
##
## $cluster$`3`
## [1] 3
##
##
## $tissue
## $tissue$`B CELL`
## [1] 1
##
## $tissue$BRAIN
## [1] 2
##
## $tissue$HEART
## [1] 3
##
## $tissue$STOMACH
## [1] 4
##
## $tissue$`T CELL`
## [1] 1
clus_df_gather_sort |>
ggplot(aes(y = value, axis1 = tissue, axis2 = cluster)) +
# alluvia color is column1
geom_alluvium(aes(fill = tissue), width = 1/12) +
geom_stratum(aes(fill = after_stat(stratum)), width = 1/12, color = "grey") +
geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
scale_x_discrete(
limits = c("tissue", "cluster"),
expand = c(.05, .05)
) +
scale_fill_manual(values = lode_cluster_pal(data = clus_df_gather_sort, cols = c(tissue, cluster), mapping = cluster_mapping))

crossing_edges_out <- compute_crossing_objective(clus_df_gather_sort, cols = c("tissue", "cluster"), wt = "value")
print(crossing_edges_out$output_objective)
## [1] 1
## R version 4.6.1 (2026-06-24)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 26.04 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.32.so; LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] wompwomp_0.99.0 ggalluvial_0.12.6 ggplot2_4.0.3 dplyr_1.2.1
## [5] rmarkdown_2.31
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 jsonlite_2.0.0 compiler_4.6.1 tidyselect_1.2.1
## [5] stringr_1.6.0 tidyr_1.3.2 jquerylib_0.1.4 scales_1.4.0
## [9] yaml_2.3.12 fastmap_1.2.0 R6_2.6.1 labeling_0.4.3
## [13] generics_0.1.4 igraph_2.3.3 knitr_1.51 iterators_1.0.14
## [17] tibble_3.3.1 maketools_1.3.2 bslib_0.11.0 pillar_1.11.1
## [21] RColorBrewer_1.1-3 rlang_1.2.0 utf8_1.2.6 stringi_1.8.7
## [25] cachem_1.1.0 xfun_0.59 sass_0.4.10 sys_3.4.3
## [29] S7_0.2.2 TSP_1.2.7 otel_0.2.0 cli_3.6.6
## [33] withr_3.0.3 magrittr_2.0.5 foreach_1.5.2 digest_0.6.39
## [37] grid_4.6.1 lifecycle_1.0.5 vctrs_0.7.3 evaluate_1.0.5
## [41] glue_1.8.1 farver_2.1.2 codetools_0.2-20 buildtools_1.0.0
## [45] purrr_1.2.2 tools_4.6.1 pkgconfig_2.0.3 htmltools_0.5.9