vignettes/RobinSingleCellVignetteNew.Rmd
RobinSingleCellVignetteNew.Rmd
# datav2 <- system.file("extdata", "filtered_tabulamuris_mtx.rds", package="scrobinv2")
datav2 <- get_data_path("filtered_tabulamuris_mtx.rds")
tabData <- readRDS(file=datav2)
SingleCellData <- CreateSeuratObject(counts = tabData, project = "singleCell", min.cells = round(dim(tabData)[2]*5/100), min.features = 0)
SingleCellData
## An object of class Seurat
## 8694 features across 532 samples within 1 assay
## Active assay: RNA (8694 features, 0 variable features)
## 1 layer present: counts
SingleCellData <- NormalizeData(SingleCellData, normalization.method = "LogNormalize", scale.factor = 10000)
SingleCellData[["RNA"]]@layers$counts[1:5,1:5]
## 5 x 5 sparse Matrix of class "dgCMatrix"
##
## [1,] . . . . .
## [2,] . . 1 . 1
## [3,] . . 1 . 1
## [4,] 1 . 1 . .
## [5,] 1 1 2 . .
SingleCellData <- FindVariableFeatures(object = SingleCellData, selection.method="vst", nfeatures = 2000)
SingleCellData <- ScaleData(SingleCellData, do.center = TRUE, do.scale = TRUE)
SingleCellData[["RNA"]]@layers$scale.data[1:5,1:5]
## [,1] [,2] [,3] [,4] [,5]
## [1,] -0.2119967 -0.2119967 -0.2119967 -0.2119967 -0.2119967
## [2,] -0.2464207 -0.2464207 -0.2464207 6.4241312 -0.2464207
## [3,] -0.2325839 -0.2325839 -0.2325839 -0.2325839 -0.2325839
## [4,] -0.3036820 -0.3036820 -0.3036820 -0.3036820 -0.3036820
## [5,] -0.2199427 -0.2199427 -0.2199427 -0.2199427 -0.2199427
SingleCellData <- RunPCA(SingleCellData, features = VariableFeatures(object = SingleCellData))
SingleCellData <- FindNeighbors(SingleCellData, dims = 1:50)
SingleCellData@graphs$RNA_snn[1:5,1:5] # Adjacency matrix
## 5 x 5 sparse Matrix of class "dgCMatrix"
## 10X_P4_7_CCTACACGTAAGGATT 10X_P7_6_ACACCGGGTAGCGTGA
## 10X_P4_7_CCTACACGTAAGGATT 1.0000000 0.1428571
## 10X_P7_6_ACACCGGGTAGCGTGA 0.1428571 1.0000000
## 10X_P7_9_TGATTTCAGCAGGTCA . .
## 10X_P8_14_CGAATGTAGAAGGTGA . .
## 10X_P4_6_CGATGGCCATCACAAC . .
## 10X_P7_9_TGATTTCAGCAGGTCA 10X_P8_14_CGAATGTAGAAGGTGA
## 10X_P4_7_CCTACACGTAAGGATT . .
## 10X_P7_6_ACACCGGGTAGCGTGA . .
## 10X_P7_9_TGATTTCAGCAGGTCA 1.0000000 .
## 10X_P8_14_CGAATGTAGAAGGTGA . 1
## 10X_P4_6_CGATGGCCATCACAAC 0.2121212 .
## 10X_P4_6_CGATGGCCATCACAAC
## 10X_P4_7_CCTACACGTAAGGATT .
## 10X_P7_6_ACACCGGGTAGCGTGA .
## 10X_P7_9_TGATTTCAGCAGGTCA 0.2121212
## 10X_P8_14_CGAATGTAGAAGGTGA .
## 10X_P4_6_CGATGGCCATCACAAC 1.0000000
AdjSNN <- SingleCellData@graphs$RNA_snn
graphSingleCell <- graph_from_adjacency_matrix(AdjSNN,mode="directed",weighted = TRUE,add.colnames = "NA",diag=FALSE)
edge <- as_edgelist(graphSingleCell)
graph <- igraph::graph_from_edgelist(edge, directed=FALSE)
graph <- igraph::simplify(graph)
graph
## IGRAPH 188ce46 U--- 532 14421 --
## + edges from 188ce46:
## [1] 1-- 2 1-- 8 1-- 13 1-- 14 1-- 17 1-- 27 1-- 42 1-- 63 1-- 65 1-- 71
## [11] 1-- 79 1-- 95 1-- 99 1--102 1--116 1--131 1--132 1--159 1--161 1--164
## [21] 1--172 1--179 1--194 1--197 1--205 1--220 1--258 1--260 1--264 1--267
## [31] 1--270 1--312 1--315 1--325 1--327 1--328 1--351 1--368 1--378 1--386
## [41] 1--391 1--393 1--395 1--397 1--406 1--407 1--409 1--420 1--421 1--426
## [51] 1--427 1--432 1--452 1--458 1--459 1--464 1--472 1--473 1--476 1--478
## [61] 1--495 1--496 1--497 1--501 1--502 1--503 1--510 1--518 2-- 8 2-- 13
## [71] 2-- 14 2-- 27 2-- 42 2-- 63 2-- 65 2-- 71 2-- 79 2-- 95 2-- 99 2--102
## [81] 2--116 2--131 2--132 2--159 2--161 2--164 2--172 2--179 2--194 2--197
## + ... omitted several edges
We apply the compare procedure to see which is the algorithm that better fits our network.
#Infomap
comp_I <- robinCompare(graph=graph, method1="louvain",
method2="infomap")
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.
plot1 <- plot(comp_I)
#Walktrap
comp_W <- robinCompare(graph=graph, method1="louvain",
method2="walktrap")
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.
plot2 <- plot(comp_W)
#LabelProp
comp_La <- robinCompare(graph=graph, method1="louvain",
method2="labelProp")
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.
plot3 <- plot(comp_La)
#Fastgreedy
comp_F <- robinCompare(graph=graph, method1="louvain",
method2="fastGreedy")
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.
plot4 <- plot(comp_F)
PlotComparisonAllVsInfomap <- gridExtra::grid.arrange(plot1,plot2,plot3,plot4, ncol=2)
The lowest curve is the most stable algorithm. Louvain and Walktrap are the best algorithms in our example.
Due to the fact that Louvain is one of the best algorithm for our network we apply the robust procedure with the Louvain algorithm to see if the communities detected are statistically significant.
graphRandom <- random(graph=graph)
proc <- robinRobust(graph=graph, graphRandom=graphRandom, method="louvain")
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.
plot(proc)
robinFDATest(proc)
## [1] "First step: basis expansion"
## Swapping 'y' and 'argvals', because 'y' is simpler,
## and 'argvals' should be; now dim(argvals) = 13 ; dim(y) = 13 x 20
## [1] "Second step: joint univariate tests"
## [1] "Third step: interval-wise combination and correction"
## [1] "creating the p-value matrix: end of row 2 out of 9"
## [1] "creating the p-value matrix: end of row 3 out of 9"
## [1] "creating the p-value matrix: end of row 4 out of 9"
## [1] "creating the p-value matrix: end of row 5 out of 9"
## [1] "creating the p-value matrix: end of row 6 out of 9"
## [1] "creating the p-value matrix: end of row 7 out of 9"
## [1] "creating the p-value matrix: end of row 8 out of 9"
## [1] "creating the p-value matrix: end of row 9 out of 9"
## [1] "Interval Testing Procedure completed"
## TableGrob (1 x 2) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## $adj.pvalue
## [1] 0.0294 0.0038 0.0022 0.0019 0.0017 0.0017 0.0017 0.0022 0.0038
##
## $pvalues
## [1] 0.0294 0.0017 0.0017 0.0017 0.0017 0.0017 0.0017 0.0017 0.0017
robinGPTest(proc)
## Profile 1
## Profile 2
## [1] 91.60066
The communities given by Louvain are statistically significant.
Once we found the best clustering algorithm, we can use the found communities for identifying marker genes, to subsequently find cell types.
mc <- membershipCommunities(graph=graph, method="louvain")
SingleCellData$seurat_clusters = mc
Idents(SingleCellData) <- "seurat_clusters"
markers <- FindAllMarkers(SingleCellData, only.pos = TRUE, logfc.threshold = 0.25)
markers %>%
group_by(cluster) %>%
dplyr::filter(avg_log2FC > 1) %>%
slice_head(n = 10) %>%
ungroup() -> top10
DoHeatmap(SingleCellData, features = top10$gene) + NoLegend()
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.1 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## time zone: Etc/UTC
## tzcode source: system (glibc)
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] future_1.58.0 robin_2.0.0 igraph_2.1.4 mclust_6.1.1
## [5] patchwork_1.3.1 Seurat_5.3.0 SeuratObject_5.1.0 sp_2.2-0
## [9] dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] RcppAnnoy_0.0.22 splines_4.4.2 later_1.4.2
## [4] bitops_1.0-9 tibble_3.3.0 cellranger_1.1.0
## [7] polyclip_1.10-7 fastDummies_1.7.5 deSolve_1.40
## [10] lifecycle_1.0.4 globals_0.18.0 lattice_0.22-7
## [13] MASS_7.3-65 magrittr_2.0.3 limma_3.62.2
## [16] plotly_4.11.0 sass_0.4.10 rmarkdown_2.29
## [19] jquerylib_0.1.4 yaml_2.3.10 httpuv_1.6.16
## [22] sctransform_0.4.2 spam_2.11-1 askpass_1.2.1
## [25] spatstat.sparse_3.1-0 reticulate_1.42.0 gld_2.6.7
## [28] cowplot_1.2.0 pbapply_1.7-2 RColorBrewer_1.1-3
## [31] abind_1.4-8 expm_1.0-0 Rtsne_0.17
## [34] purrr_1.1.0 RCurl_1.98-1.17 pracma_2.4.4
## [37] data.tree_1.1.0 ggrepel_0.9.6 irlba_2.3.5.1
## [40] listenv_0.9.1 spatstat.utils_3.1-4 goftest_1.2-3
## [43] RSpectra_0.16-2 spatstat.random_3.4-1 fitdistrplus_1.2-4
## [46] parallelly_1.45.0 pkgdown_2.1.3 codetools_0.2-20
## [49] tidyselect_1.2.1 farver_2.1.2 matrixStats_1.5.0
## [52] spatstat.explore_3.4-3 jsonlite_2.0.0 e1071_1.7-16
## [55] ks_1.15.1 progressr_0.15.1 ggridges_0.5.6
## [58] survival_3.8-3 systemfonts_1.2.3 tools_4.4.2
## [61] ragg_1.4.0 DescTools_0.99.60 ica_1.0-3
## [64] Rcpp_1.1.0 glue_1.8.0 gridExtra_2.3
## [67] xfun_0.52 withr_3.0.2 formatR_1.14
## [70] fastmap_1.2.0 boot_1.3-31 digest_0.6.37
## [73] R6_2.6.1 mime_0.13 qpdf_1.4.1
## [76] textshaping_1.0.1 colorspace_2.1-1 networkD3_0.4.1
## [79] scattermore_1.2 tensor_1.5.1 spatstat.data_3.1-6
## [82] tidyr_1.3.1 generics_0.1.4 data.table_1.17.8
## [85] class_7.3-23 httr_1.4.7 htmlwidgets_1.6.4
## [88] uwot_0.2.3 pkgconfig_2.0.3 gtable_0.3.6
## [91] Exact_3.3 lmtest_0.9-40 pcaPP_2.0-5
## [94] htmltools_0.5.8.1 dotCall64_1.2 scales_1.4.0
## [97] perturbR_0.1.3 lmom_3.2 png_0.1-8
## [100] spatstat.univar_3.1-4 knitr_1.50 rstudioapi_0.17.1
## [103] tzdb_0.5.0 reshape2_1.4.4 nlme_3.1-168
## [106] proxy_0.4-27 cachem_1.1.0 zoo_1.8-14
## [109] stringr_1.5.1 rootSolve_1.8.2.4 KernSmooth_2.23-26
## [112] parallel_4.4.2 miniUI_0.1.2 fda_6.3.0
## [115] desc_1.4.3 pillar_1.11.0 grid_4.4.2
## [118] vctrs_0.6.5 RANN_2.6.2 promises_1.3.3
## [121] xtable_1.8-4 cluster_2.1.8.1 evaluate_1.0.4
## [124] readr_2.1.5 mvtnorm_1.3-3 cli_3.6.5
## [127] compiler_4.4.2 rlang_1.1.6 fdatest_2.1.1
## [130] future.apply_1.20.0 labeling_0.4.3 fds_1.8
## [133] plyr_1.8.9 forcats_1.0.0 fs_1.6.6
## [136] stringi_1.8.7 rainbow_3.8 viridisLite_0.4.2
## [139] deldir_2.0-4 BiocParallel_1.40.2 hdrcde_3.4
## [142] lazyeval_0.2.2 spatstat.geom_3.4-1 Matrix_1.7-3
## [145] RcppHNSW_0.6.0 hms_1.1.3 ggplot2_3.5.2
## [148] statmod_1.5.0 shiny_1.11.1 haven_2.5.5
## [151] ROCR_1.0-11 bslib_0.9.0 readxl_1.4.5