Source file: notebooks/annotation/annotation_bmc.Rmd
Last updated: 2018-05-29
Code version: 3ea6d7c
library(ggplot2)
library(ggrastr)
library(dplyr)
library(parallel)
library(reshape2)
library(pagoda2)
library(dropestr)
library(dropEstAnalysis)
theme_set(theme_bw() + theme_pdf(show.ticks=F, legend.pos=c(0, 1)))
set.seed(42)
kDataPath <- '../../data/'
kDropEstData <- paste0(kDataPath, 'dropest/SCG71/est_11_14_poisson_real/')
kAnnotationData <- paste0(kDataPath, 'annotation/')
kOutputPath <- '../../output/'
holder <- readRDS(paste0(kDropEstData, 'SCG71.rds'))
est_cell_num <- dropestr::EstimateCellsNumber(holder$aligned_umis_per_cell)
real_cbs <- sort(holder$aligned_umis_per_cell, decreasing=T)[1:est_cell_num$expected] %>%
names()
scores <- ScorePipelineCells(holder, mit.chromosome.name='chrM', predict.all=T)
PlotCellScores(scores, cells.number=est_cell_num)
cm <- holder$cm_raw[, names(scores)[scores > 0.9]]
cm <- cm[Matrix::rowSums(cm > 0) > 10, ]
pgd <- GetPagoda(cm, n.cores=30, tsne.iter.num=5000)
4810 cells, 11571 genes; normalizing ... using plain model winsorizing ... log scale ... done.
calculating variance fit ... using gam 208 overdispersed genes ... 208 persisting ... done.
running PCA using 1000 OD genes .... done
calculating distance ... pearson ...running tSNE using 30 cores:
# clusters <- pgd$clusters$PCA$infomap
# write.csv(clusters, paste0(kAnnotationData, 'indrop_bmc_clusters.csv'))
clusters <- read.csv(paste0(kAnnotationData, 'indrop_bmc_clusters.csv'), row.names=1)
clusters <- setNames(clusters$x, rownames(clusters))
log_mtx <- log10(1e-3 + as.matrix(pgd$counts[names(clusters), ]))
Pagoda embeding:
PlotClustering(pgd, clusters)
Heatmap for marker genes:
type_ids <- lst(
`Maturing neutrophils` = c(1:4, 12:13, 15, 19, 22),
`Maturing macrophages` = c(5, 7),
`Cycling cells` = c(9),
`B cells, mature` = c(6, 8),
`B cells, immature` = c(14),
`pre-B cells` = c(17, 20),
`T cells` = c(16),
`NK cells` = c(18),
`Mast cells` = c(21, 23),
`Progenitors` = c(10, 11)
)
heatmap_genes <- c(
'Mmp9', 'Srgn', 'Cxcr2',
'Cd14', 'Ly6c2',
'Cd79a', 'Cd79b',
'Cd74', 'Cd83',
'Vpreb3', 'Vpreb1',
'Cd7',
'Nkg7', 'Il2rb', 'Thy1',
'Cd34', 'Kit',
'Lyz1', 'Lyz2',
'Cd38',
'Ccl3', 'Ccl4')
heatmap_clusters <- clusters
# heatmap_clusters <- heatmap_clusters[heatmap_clusters > 16]
PlotExpressionHeatmap(log_mtx, heatmap_clusters, heatmap_genes)
markers_df <- data.frame(
Type = c(
"Maturing neutrophils", "Maturing macrophages", "T cells",
"B cells", "B cells, mature", "B cells, immature", "pre-B cells",
"Progenitors", "NK cells"),
Markers = c(
"Mmp9, Srgn, Cxcr2", "Cd14, Ly6c2", "Cd7",
"Cd79a, Cd79b", "Cd74, Cd83", "Vpreb1", "Vpreb3",
"Cd34, Kit", "Nkg7, Il2rb, Thy1")
)
markers_df$Clusters <- sapply(type_ids, paste, collapse=", ")[as.character(markers_df$Type)]
write.csv(markers_df[c('Type', 'Markers')],
paste0(kOutputPath, 'tables/annotation_bmc_markers.csv'), row.names=F)
markers_df
Type | Markers | Clusters |
---|---|---|
Maturing neutrophils | Mmp9, Srgn, Cxcr2 | 1, 2, 3, 4, 12, 13, 15, 19, 22 |
Maturing macrophages | Cd14, Ly6c2 | 5, 7 |
T cells | Cd7 | 16 |
B cells | Cd79a, Cd79b | NA |
B cells, mature | Cd74, Cd83 | 6, 8 |
B cells, immature | Vpreb1 | 14 |
pre-B cells | Vpreb3 | 17, 20 |
Progenitors | Cd34, Kit | 10, 11 |
NK cells | Nkg7, Il2rb, Thy1 | 18 |
clusters_annotated <- AnnotateClusters(clusters, type_ids)
write.csv(data.frame(Barcode=names(clusters_annotated),
Type=as.vector(clusters_annotated)),
paste0(kAnnotationData, 'indrop_bmc_clusters_annotated.csv'))
raster_width <- 8 / 3
raster_height <- 7 / 3
raster_dpi <- 150
long_type_names <- c("Maturing neutrophils", "Maturing macrophages", "Cycling cells",
"B cells, immature", "B cells, mature", "pre-B cells")
for (type in long_type_names) {
clusters_annotated[clusters_annotated == type] <- stringi::stri_replace_last(type, "\n", regex=" ")
}
gg_annotation <- PlotClustering(pgd, clusters_annotated, lineheight=0.9, size=0.3,
raster=T, raster.width=raster_width,
raster.height=raster_height, raster.dpi=raster_dpi) +
theme_pdf(show.ticks=F) + ggpubr::rremove("xylab") +
scale_size_continuous(range=c(3, 3)) +
theme(plot.margin=margin())
gg_annotation
plot_mtx <- apply(log_mtx, 2, function(vec) scales::rescale(rank(vec)))
metacluster_borders <- list(
b = c('Maturing macrophages', 'Progenitors', 'T cells', 'Mast cells'),
l = c('Maturing neutrophils', 'Cycling cells'),
r = c('NK cells', 'B cells, immature', 'B cells, mature', 'pre-B cells')
) %>%
lapply(BordersOfClusterUnion, clusters, type_ids, pgd$embeddings$PCA$tSNE)
plot_genes <- c('Vpreb3', 'Il2rb', 'Mmp9', 'Srgn', 'Cxcr2', 'Cd7', 'Cd34', 'Lyz1')
plot_borders <- lapply(c("r", "r", "l", "l", "l", "b", "b", "b"),
function(n) metacluster_borders[[n]])
gene_plots <- mapply(function(g, b)
PlotGeneFraction(g, pgd, plot_mtx, limits=b, title.x=0.04, title.y=0.99,
legend.position="none", size=0.3,
class.label.layer=gg_annotation$layers[[3]],
raster=T, raster.width=8/3, raster.height=7/3, raster.dpi=150),
plot_genes, plot_borders, SIMPLIFY=F)
gg_fig <- cowplot::plot_grid(plotlist=c(list(gg_annotation), gene_plots), ncol=3)
gg_fig
ggsave(paste0(kOutputPath, 'figures/supp_annotation_bmc.pdf'), width=8, height=7)
# # Web app
# go_env <- p2.generate.mouse.go(pgd)
# pgd$testPathwayOverdispersion(setenv = go_env, verbose = T, correlation.distance.threshold = 0.9,
# recalculate.pca = F, min.pathway.size = 100, max.pathway.size = 1000)
#
# go_sets <- names(go_env) %>% setNames(names(go_env)) %>% lapply(function(x) {
# list(properties = list(locked = T, genesetname = x,
# shortdescription = GO.db::GOTERM[[x]]@Term), genes = c(go_env[[x]]))
# })
#
# de_sets <- get.de.geneset(pgd, groups = pgd$clusters$PCA$infomap, prefix = 'de_')
# go_sets <- c(go_sets, de_sets)
#
# additional_metadata <- list()
# additional_metadata$altCluster <- as.factor(clusters_annotated) %>%
# p2.metadata.from.factor(displayname='Annotated', s=0.7, v=0.8, start=0, end=0.5)
#
# pgd_web_object <- make.p2.app(pgd, dendrogramCellGroups = pgd$clusters$PCA$infomap,
# additionalMetadata = additional_metadata,
# geneSets = go_sets, show.clusters = T)
#
# pgd_web_object$serializeToStaticFast(binary.filename = paste0(kDropEstData, 'bmc_pagoda_annotated.bin'))
# saveRDS(pgd_web_object, paste0(kDropEstData, 'pagoda_annotation_web.rds'))
#
# show.app(pgd_web_object, "bmc")
value | |
---|---|
version | R version 3.4.1 (2017-06-30) |
os | Ubuntu 14.04.5 LTS |
system | x86_64, linux-gnu |
ui | X11 |
language | (EN) |
collate | en_US.UTF-8 |
tz | America/New_York |
date | 2018-05-29 |
package | loadedversion | date | source | |
---|---|---|---|---|
1 | AnnotationDbi | 1.32.3 | 2016-01-28 | Bioconductor |
2 | assertthat | 0.2.0 | 2017-04-11 | CRAN (R 3.4.1) |
3 | backports | 1.1.2 | 2017-12-13 | CRAN (R 3.4.1) |
5 | base64enc | 0.1-3 | 2015-07-28 | cran (@0.1-3) |
6 | bindr | 0.1 | 2016-11-13 | CRAN (R 3.4.1) |
7 | bindrcpp | 0.2 | 2017-06-17 | CRAN (R 3.4.1) |
8 | Biobase | 2.30.0 | 2016-01-28 | Bioconductor |
9 | BiocGenerics | 0.16.1 | 2016-01-28 | Bioconductor |
10 | bit | 1.1-12 | 2014-04-09 | CRAN (R 3.4.1) |
11 | bit64 | 0.9-7 | 2017-05-08 | CRAN (R 3.4.1) |
12 | blob | 1.1.0 | 2017-06-17 | CRAN (R 3.4.1) |
13 | brew | 1.0-6 | 2011-04-13 | CRAN (R 3.4.1) |
14 | Cairo | 1.5-9 | 2015-09-26 | CRAN (R 3.4.1) |
15 | clisymbols | 1.2.0 | 2017-05-21 | CRAN (R 3.4.1) |
16 | colorspace | 1.3-2 | 2016-12-14 | CRAN (R 3.4.1) |
18 | cowplot | 0.9.2 | 2017-12-17 | CRAN (R 3.4.1) |
20 | DBI | 1.0.0 | 2018-05-02 | CRAN (R 3.4.1) |
21 | dendsort | 0.3.3 | 2015-12-14 | cran (@0.3.3) |
22 | digest | 0.6.15 | 2018-01-28 | cran (@0.6.15) |
23 | dplyr | 0.7.4 | 2017-09-28 | CRAN (R 3.4.1) |
24 | dropEstAnalysis | 0.6.0 | 2018-05-16 | local (VPetukhov/dropEstAnalysis@NA) |
25 | dropestr | 0.7.7 | 2018-03-17 | local (@0.7.7) |
26 | evaluate | 0.10.1 | 2017-06-24 | CRAN (R 3.4.1) |
27 | ggplot2 | 2.2.1 | 2016-12-30 | CRAN (R 3.4.1) |
28 | ggpubr | 0.1.6 | 2017-11-14 | CRAN (R 3.4.1) |
29 | ggrastr | 0.1.5 | 2017-12-28 | Github (VPetukhov/ggrastr@cc56b45) |
30 | ggrepel | 0.7.0 | 2017-09-29 | CRAN (R 3.4.1) |
31 | git2r | 0.21.0 | 2018-01-04 | cran (@0.21.0) |
32 | glue | 1.2.0 | 2017-10-29 | CRAN (R 3.4.1) |
33 | GO.db | 3.2.2 | 2017-11-12 | Bioconductor |
37 | gtable | 0.2.0 | 2016-02-26 | CRAN (R 3.4.1) |
38 | highr | 0.6 | 2016-05-09 | CRAN (R 3.4.1) |
39 | htmltools | 0.3.6 | 2017-04-28 | CRAN (R 3.4.1) |
40 | igraph | 1.2.1 | 2018-03-10 | cran (@1.2.1) |
41 | IRanges | 2.4.8 | 2016-09-15 | Bioconductor |
42 | irlba | 2.3.2 | 2018-01-11 | cran (@2.3.2) |
43 | KernSmooth | 2.23-15 | 2015-06-29 | CRAN (R 3.4.0) |
44 | knitr | 1.20 | 2018-02-20 | cran (@1.20) |
45 | ks | 1.11.0 | 2018-01-16 | local (VPetukhov/ks@NA) |
46 | labeling | 0.3 | 2014-08-23 | CRAN (R 3.4.1) |
47 | lattice | 0.20-35 | 2017-03-25 | CRAN (R 3.4.1) |
48 | lazyeval | 0.2.1 | 2017-10-29 | CRAN (R 3.4.1) |
49 | magrittr | 1.5 | 2014-11-22 | CRAN (R 3.4.1) |
50 | MASS | 7.3-47 | 2017-04-21 | CRAN (R 3.4.0) |
51 | Matrix | 1.2-12 | 2017-11-16 | CRAN (R 3.4.1) |
52 | mclust | 5.4 | 2017-11-22 | CRAN (R 3.4.1) |
53 | memoise | 1.1.0 | 2017-04-21 | CRAN (R 3.4.1) |
55 | mgcv | 1.8-22 | 2017-09-19 | CRAN (R 3.4.1) |
56 | munsell | 0.4.3 | 2016-02-13 | CRAN (R 3.4.1) |
57 | mvtnorm | 1.0-7 | 2018-01-26 | cran (@1.0-7) |
58 | nlme | 3.1-131 | 2017-02-06 | CRAN (R 3.4.0) |
59 | pagoda2 | 0.0.0.9002 | 2018-04-08 | local (hms-dbmi/pagoda2@NA) |
61 | pcaMethods | 1.60.0 | 2017-11-12 | Bioconductor |
62 | pcaPP | 1.9-73 | 2018-01-14 | cran (@1.9-73) |
63 | pheatmap | 1.0.8 | 2015-12-11 | CRAN (R 3.4.1) |
64 | pkgconfig | 2.0.1 | 2017-03-21 | CRAN (R 3.4.1) |
65 | plyr | 1.8.4 | 2016-06-08 | CRAN (R 3.4.1) |
66 | R6 | 2.2.2 | 2017-06-17 | CRAN (R 3.4.1) |
67 | RColorBrewer | 1.1-2 | 2014-12-07 | CRAN (R 3.4.1) |
68 | Rcpp | 0.12.17 | 2018-05-18 | cran (@0.12.17) |
69 | reshape2 | 1.4.3 | 2017-12-11 | CRAN (R 3.4.1) |
70 | rjson | 0.2.19 | 2018-05-18 | cran (@0.2.19) |
71 | rlang | 0.1.4 | 2017-11-05 | CRAN (R 3.4.1) |
72 | rmarkdown | 1.9 | 2018-03-01 | CRAN (R 3.4.1) |
73 | Rook | 1.1-1 | 2014-10-20 | CRAN (R 3.4.1) |
74 | rprojroot | 1.3-2 | 2018-01-03 | cran (@1.3-2) |
75 | RSQLite | 2.0 | 2017-06-19 | CRAN (R 3.4.1) |
76 | Rtsne | 0.14 | 2017-11-12 | Github (jkrijthe/Rtsne@1d0f926) |
77 | S4Vectors | 0.8.11 | 2016-01-30 | Bioconductor |
78 | scales | 0.5.0 | 2017-08-24 | CRAN (R 3.4.1) |
79 | sessioninfo | 1.0.0 | 2017-06-21 | CRAN (R 3.4.1) |
82 | stringi | 1.1.7 | 2018-03-12 | cran (@1.1.7) |
83 | stringr | 1.3.0 | 2018-02-19 | cran (@1.3.0) |
84 | tibble | 1.3.4 | 2017-08-22 | CRAN (R 3.4.1) |
86 | triebeard | 0.3.0 | 2016-08-04 | cran (@0.3.0) |
87 | urltools | 1.7.0 | 2018-01-20 | cran (@1.7.0) |
89 | withr | 2.1.2 | 2018-03-15 | cran (@2.1.2) |
90 | yaml | 2.1.18 | 2018-03-08 | cran (@2.1.18) |
This R Markdown site was created with workflowr