--- title: "RPointCloud: Regulatory T Cells" author: "Kevin R. Coombes" data: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{RPointCloud: Regulatory T Cells} %\VignetteKeywords{TDA, topological data analysis, regulatory T cells} %\VignetteDepends{RPointCloud,igraph,Polychrome,Mercator,ClassDiscovery,ape} %\VignettePackage{RPointCloud} %\VignetteEngine{knitr::rmarkdown} --- ```{r opts, echo=FALSE} knitr::opts_chunk$set(fig.width=8, fig.height=5) oopt <- options(width=96) .format <- knitr::opts_knit$get("rmarkdown.pandoc.to") .tag <- function(N, cap ) ifelse(.format == "html", paste("Figure", N, ":", cap), cap) ``` # Introduction We want to illustrate the `RPointCloud` package (Version `r packageVersion("RPointCloud")`) with a single-cell mass cytometry data set. Not surprisingly, we start by loading the package. ```{r RPointCloud} library(RPointCloud) ``` We also load a lot of useful packages (some of which will eventually get incorporated into the package requirements). ```{r libpack} library(TDA) library(Polychrome) data(Dark24) data(Light24) library(Mercator) library(igraph) library(ClassDiscovery) library(PCDimension) library("ape") ``` Now we fetch the sample data set that is included with the package. ```{r CLL} data(treg) ls() ``` ```{r echo=FALSE, eval = FALSE} dmat <- distanceMatrix(dset, "pearson") picked <- Mercator::downsample(target = 250, distanceMat = as.matrix(dmat), cutoff = 1E-6) treg <- dset[, picked] tmat <- distanceMatrix(treg, "pearson") rm(picked) set.seed(84263) rip <- ripsDiag(tmat, 2, 0.7, "arbitrary", "Dionysus", TRUE) save(rip, treg, tmat, file = "treg.rda") ``` # TDA Built-in Visualizations of the Rips Diagram Here are some plots of the `TDA` results using tools from the original package. (I am not sure what any of these are really good for.) ```{r fig01, fig.width = 9, fig.cap = .tag(1, "The Rips barcode diagram from TDA.")} diag <- rip[["diagram"]] opar <- par(mfrow = c(1,2)) plot(diag, barcode = TRUE, main = "Barcode") plot(diag, main = "Rips Diagram") par(opar) rm(opar) ``` ```{r fig02, fig.width = 12, fig.cap = .tag(2, "Landscape, silhouette, and lambda-cluster plots of the Rips diagram, from TDA.")} L <- TDA::landscape(diag, KK = 1) S <- TDA::silhouette(diag) crt <- TDA::clusterTree(as.matrix(tmat), k = 5, dist = "arbitrary") opar <- par(mfrow = c(1, 3)) plot(L, type = "l", main = "Landscape") plot(S, type = "l", main = "Silhouette") plot(crt, type = "lambda",main = "Lambda Cluster Tree") par(opar) rm(L, S, opar) ``` # Mercator Visualizations of the Underlying Data and Distance Matrix ```{r Mercator} M <- Mercator(tmat, metric ="pearson", method = "mds", K = 8) M <- addVisualization(M, "hclust") M <- addVisualization(M, "tsne") M <- addVisualization(M, "umap") M <- addVisualization(M, "som") M@palette <- Light24 set.seed(72345) clue <- kmeans(t(treg), centers = 8, iter.max = 100, nstart = 20) M <- setClusters(M, clue$cluster) ``` ```{r fig03, fig.width = 9, fig.height = 12, fig.cap = .tag(3, "Mercator Visualizations of the distance matrix.")} opar <- par(mfrow = c(3,2), cex = 1.1) plot(M, view = "hclust") plot(M, view = "mds", main = "Mult-Dimensional Scaling") plot(M, view = "tsne", main = "t-SNE") plot(M, view = "umap", main = "UMAP") barplot(M, main = "Silhouette Width") plot(M, view = "som", main = "Self-Organizing Maps") par(opar) rm(opar) ``` # Dimension Zero Here is a picture of the "zero-cycle" data, which can also be used ultimately to cluster the points (where each point is a patient). The connected lines are similar to a single-linkage clustering structure, showing when individual points are merged together as the TDA parameter increases. ```{r fig04, fig.cap = .tag(4, "Hierarchical connections between zero cycles.")} nzero <- sum(diag[, "dimension"] == 0) cycles <- rip[["cycleLocation"]][2:nzero] W <- M@view[["umap"]]$layout plot(W, main = "Connected Zero Cycles") for (cyc in cycles) { points(W[cyc[1], , drop = FALSE], pch = 16,col = "red") X <- c(W[cyc[1], 1], W[cyc[2],1]) Y <- c(W[cyc[1], 2], W[cyc[2],2]) lines(X, Y) } ``` # Using iGraph We can convert the 0-dimensional cycle structure into a dendrogram, by first passing them through the `igraph` package. We start by putting all the zero-cycle data together, which can be viewed as an "edge-list" from the `igraph` perspective. ```{r igraph} edges <- t(do.call(cbind, cycles)) # this creates an "edgelist" G <- graph_from_edgelist(edges) G <- set_vertex_attr(G, "label", value = attr(tmat, "Labels")) ``` Note that we attached the sample names to the graph, obtaining them from the daisy distance matrix. Now we use two different algorithms to decide how to layout the graph. ```{r layouts} set.seed(2734) Lt <- layout_as_tree(G) L <- layout_with_fr(G) ``` ```{r fig05, fig.cap = .tag(5, "Two igraph depictions of the zero cycle structure."), fig.width = 10} opar <- par(mfrow = c(1,2), mai = c(0.01, 0.01, 1.02, 0.01)) plot(G, layout = Lt, main = "As Tree") plot(G, layout = L, main = "Fruchterman-Reingold") par(opar) ``` Note that the Fruchterman-Reingold layout gives the most informative structure. ## Community Structure There are a variety of community-finding algorithms that we can apply. (Communities in graph theory are similar to clusters in other machine learning areas of study.) "Edge-betweenness" seems to work best. ```{r keg} keg <- cluster_edge_betweenness(G) # 19 table(membership(keg)) pal <- Light24[membership(keg)] ``` The first line in the next code chunk shows that we did actually produce a tree. We explore three different ways ro visualize it ```{r fig06, fig.width = 6, fig.height = 6, fig.cap = .tag(6, "Community structure, simplified.")} is.hierarchical(keg) H <- as.hclust(keg) H$labels <- vertex_attr(G, "names") K <- 12 colset <- Light24[cutree(H, k=K)] G2 <- set_vertex_attr(G, "color", value = colset) e <- 0.01 opar <- par(mai = c(e, e, e, e)) plot(G2, layout = L) par(opar) ``` ```{r fig08, fig.width=7, fig.height = 7, fig.cap = .tag(8, "Cladogram realization, from the ape package.")} P <- as.phylo(H) opar <- par(mai = c(0.01, 0.01, 1.0, 0.01)) plot(P, type = "u", tip.color = colset, cex = 0.8, main = "Ape/Cladogram") par(opar) rm(opar) ``` ## Visualizing Features In any of the "scatter plot views" (e.g., MDS, UMAP, t-SNE) from Mercator, we may want to overlay different colors to represent different clinical features. ```{r views} U <- M@view[["mds"]] V <- M@view[["tsne"]]$Y W <- M@view[["umap"]]$layout ``` ```{r fig10, fig.width = 9, fig.cap = .tag(10, "UMAP visualizations with clinical features.")} FOXP3 <- Feature(treg["FOXP3",], "FOXP3", c("pink", "skyblue"), c("Low", "High")) CTLA4 <- Feature(treg["CTLA4", ], "CTLA4", c("green", "magenta"), c("Low", "High")) opar <- par(mfrow = c(1,2)) plot(W, main = "UMAP; FOXP3", xlab = "U1", ylab = "U2") points(FOXP3, W, pch = 16, cex = 1.4) plot(W, main = "UMAP; CTLA4", xlab = "U1", ylab = "U2") points(CTLA4, W, pch = 16, cex = 1.4) par(opar) rm(opar) ``` ```{r cleanup} options(oopt) #rm(list = ls()) ```