This vignette introduces the PRECAST workflow for the analysis of four spatial transcriptomics datasets. The workflow consists of three steps
We demonstrate the use of PRECAST to four human dorsolateral prefrontal cortex Visium data. Download the data to R
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(SingleCellExperiment))
name_ID4 <- as.character(c(151673, 151674, 151675, 151676))
### Read data in an online manner
n_ID <- length(name_ID4)
url_brainA <- "https://github.com/feiyoung/DR-SC.Analysis/raw/main/data/DLPFC_data/"
url_brainB <- ".rds"
seuList <- list()
if (!require(ProFAST)) {
remotes::install_github("feiyoung/ProFAST")
}
for (i in 1:n_ID) {
# i <- 1
cat("input brain data", i, "\n")
# load and read data
dlpfc <- readRDS(url(paste0(url_brainA, name_ID4[i], url_brainB)))
count <- dlpfc@assays@data$counts
row.names(count) <- ProFAST::transferGeneNames(row.names(count), species = "Human")
seu1 <- CreateSeuratObject(counts = count, meta.data = as.data.frame(colData(dlpfc)), min.cells = 10,
min.features = 10)
seuList[[i]] <- seu1
}
# saveRDS(seuList, file='seuList4.RDS')
The package can be loaded with the command:
First, we view the the spatial transcriptomics data with Visium platform.
check the meta data that must include the spatial coordinates named “row” and “col”, respectively. If the names are not, they are required to rename them.
metadataList <- lapply(seuList, function(x) x@meta.data)
for (r in seq_along(metadataList)) {
meta_data <- metadataList[[r]]
cat(all(c("row", "col") %in% colnames(meta_data)), "\n") ## the names are correct!
}
Create a PRECASTObj object to prepare for PRECAST models. Here, we
only show the HVGs
method to select the 2000 highly
variable genes, but users are able to choose more genes or also use the
SPARK-X
to choose the spatially variable genes.
## check the number of genes/features after filtering step
preobj@seulist
## Add adjacency matrix list for a PRECASTObj object to prepare for PRECAST model fitting.
PRECASTObj <- AddAdjList(preobj, platform = "Visium")
## Add a model setting in advance for a PRECASTObj object. verbose =TRUE helps outputing the
## information in the algorithm.
PRECASTObj <- AddParSetting(PRECASTObj, Sigma_equal = TRUE, coreNum = 1, maxIter = 30, verbose = TRUE)
For function PRECAST
, users can specify the number of
clusters K or set
K
to be an integer vector by using modified BIC(MBIC) to
determine K. Here, we use
user-specified number of clusters.
Use the function SelectModel()
to re-organize the fitted
results in PRECASTObj.
## backup the fitting results in resList
resList <- PRECASTObj@resList
PRECASTObj <- SelectModel(PRECASTObj)
ari_precast <- sapply(1:length(seuList), function(r) mclust::adjustedRandIndex(PRECASTObj@resList$cluster[[r]],
PRECASTObj@seulist[[r]]$layer_guess_reordered))
mat <- matrix(round(ari_precast, 2), nrow = 1)
name_ID4 <- as.character(c(151673, 151674, 151675, 151676))
colnames(mat) <- name_ID4
DT::datatable(mat)
Users are also able to set multiple K, then choose the best one
PRECASTObj2 <- AddParSetting(PRECASTObj, Sigma_equal = FALSE, coreNum = 4, maxIter = 30, verbose = TRUE) # set 4 cores to run in parallel.
PRECASTObj2 <- PRECAST(PRECASTObj2, K = 5:8)
## backup the fitting results in resList
resList2 <- PRECASTObj2@resList
PRECASTObj2 <- SelectModel(PRECASTObj2)
Besides, user can also use different initialization method by setting
int.model
, for example, set int.model=NULL
;
see the functions AddParSetting()
and
model_set()
for more details.
Integrate the all samples using the IntegrateSpaData
function. For computational efficiency, this function exclusively
integrates the variable genes. Specifically, in cases where users do not
specify the PRECASTObj@seuList
or seuList
argument within the IntegrateSpaData
function, it
automatically focuses on integrating only the variable genes. The
default setting for PRECASTObj@seuList
is NULL
when rawData.preserve
in CreatePRECASTObject
is set to FALSE
. For instance:
print(PRECASTObj@seuList)
seuInt <- IntegrateSpaData(PRECASTObj, species = "Human")
seuInt
## The low-dimensional embeddings obtained by PRECAST are saved in PRECAST reduction slot.
There are two ways to use IntegrateSpaData
integrating
all genes, which will require more memory. We recommand running for all
genes on server. The first one is to set value for
PRECASTObj@seuList
.
## assign the raw Seurat list object to it. For illustration, we generate a new seuList with
## more genes; For integrating all genes, users can set `seuList <- bc2`.
genes <- c(row.names(PRECASTObj@seulist[[1]]), row.names(seuList[[1]])[1:10])
seuList_sub <- lapply(seuList, function(x) x[genes, ])
PRECASTObj@seuList <- seuList_sub #
seuInt <- IntegrateSpaData(PRECASTObj, species = "Human")
seuInt
The second method is to set a value for the argument
seuList
:
First, user can choose a beautiful color schema using
chooseColors()
.
Show the spatial scatter plot for clusters
p12 <- SpaPlot(seuInt, item = "cluster", batch = NULL, point_size = 1, cols = cols_cluster, combine = TRUE,
nrow.legend = 7)
p12
# users can plot each sample by setting combine=FALSE
Users can re-plot the above figures for specific need by returning a ggplot list object. For example, we plot the spatial heatmap using a common legend.
library(ggplot2)
pList <- SpaPlot(seuInt, item = "cluster", batch = NULL, point_size = 2.5, cols = cols_cluster,
combine = FALSE, nrow.legend = 7)
pList <- lapply(pList, function(x) x + coord_flip() + scale_x_reverse())
drawFigs(pList, layout.dim = c(2, 2), common.legend = TRUE, legend.position = "right", align = "hv")
Show the spatial UMAP/tNSE RGB plot to illustrate the performance in extracting features.
seuInt <- AddUMAP(seuInt)
p13List <- SpaPlot(seuInt, batch = NULL, item = "RGB_UMAP", point_size = 2, combine = FALSE, text_size = 15)
p13List <- lapply(p13List, function(x) x + coord_flip() + scale_x_reverse())
drawFigs(p13List, layout.dim = c(2, 2), common.legend = TRUE, legend.position = "right", align = "hv")
# seuInt <- AddTSNE(seuInt) SpaPlot(seuInt, batch=NULL,item='RGB_TSNE',point_size=2,
# combine=T, text_size=15)
Show the tSNE plot based on the extracted features from PRECAST to check the performance of integration.
seuInt <- AddTSNE(seuInt, n_comp = 2)
p1 <- dimPlot(seuInt, item = "cluster", point_size = 0.5, font_family = "serif", cols = cols_cluster,
border_col = "gray10", nrow.legend = 14, legend_pos = "right") # Times New Roman
p2 <- dimPlot(seuInt, item = "batch", point_size = 0.5, font_family = "serif", legend_pos = "right")
drawFigs(list(p1, p2), layout.dim = c(1, 2), legend.position = "right", align = "hv")
Combined differential expression analysis
library(Seurat)
dat_deg <- FindAllMarkers(seuInt)
library(dplyr)
n <- 5
dat_deg %>%
group_by(cluster) %>%
top_n(n = n, wt = avg_log2FC) -> top10
Plot DE genes’ heatmap for each spatial domain identified by PRECAST.
library(ggplot2)
## HeatMap
p1 <- DotPlot(seuInt, features = unique(top10$gene), col.min = 0, col.max = 1) + theme(axis.text.x = element_text(angle = 45,
hjust = 1, size = 8))
p1
sessionInfo()
#> R version 4.4.2 (2024-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.1 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=C
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] rmarkdown_2.29
#>
#> loaded via a namespace (and not attached):
#> [1] digest_0.6.37 R6_2.5.1 fastmap_1.2.0 xfun_0.49
#> [5] maketools_1.3.1 cachem_1.1.0 knitr_1.49 htmltools_0.5.8.1
#> [9] buildtools_1.0.0 lifecycle_1.0.4 cli_3.6.3 sass_0.4.9
#> [13] jquerylib_0.1.4 compiler_4.4.2 sys_3.4.3 tools_4.4.2
#> [17] evaluate_1.0.1 bslib_0.8.0 yaml_2.3.10 formatR_1.14
#> [21] jsonlite_1.8.9 rlang_1.1.4