Source file: notebooks/low_quality_cells/validation_bmc.Rmd
Last updated: 2018-05-16
Code version: dae257a
library(ggplot2)
library(ggpubr)
library(ggrastr)
library(dplyr)
library(parallel)
library(Seurat)
library(dropestr)
library(dropEstAnalysis)
theme_set(theme_base)
set.seed(42)
kOutputFolder <- '../../output/'
kDataPath <- '../../data/'
kEstDataPath <- paste0(kDataPath, 'dropest/SCG71/est_11_14_poisson_real/')
kAnnotationDataPath <- paste0(kDataPath, 'annotation/')
holder <- readRDS(paste0(kEstDataPath, 'SCG71.rds'))
est_cell_num <- EstimateCellsNumber(holder$aligned_umis_per_cell)
umis_per_cell <- sort(holder$aligned_umis_per_cell, decreasing=T)
scores <- ScorePipelineCells(holder, mit.chromosome.name='chrM', predict.all=T,
verbose=T)[names(umis_per_cell)]
Explained variance after PCA: 96.78%; used 3 PCs.
Used features: ReadsPerUmi, UmiPerGene, LowExpressedGenesFrac.
PlotCellScores(scores)
intersect_cbs <- names(scores[1:est_cell_num$expected])
intersect_cbs <- intersect_cbs[scores[intersect_cbs] > 0.9]
unknown_cell_scores <- scores[(est_cell_num$expected+1):length(scores)]
rescued_cbs <- names(unknown_cell_scores)[unknown_cell_scores > 0.9]
unknown_cell_scores <- scores[1:est_cell_num$expected]
filtered_cbs <- names(unknown_cell_scores)[unknown_cell_scores < 0.1]
c(Unchanged=length(intersect_cbs),
Rescued=length(rescued_cbs), Filtered=length(filtered_cbs))
Unchanged Rescued Filtered
4684 126 86
r_cm_rescued <- holder$cm_raw[, c(names(umis_per_cell)[1:est_cell_num$expected],
rescued_cbs)]
# You need to run "annotation/annotation_bmc.Rmd" first
clusters_annotated <- paste0(kAnnotationDataPath, 'indrop_bmc_clusters_annotated.csv') %>%
read.csv() %>% (function(x) setNames(as.character(x$Type), x$Barcode))
r_rescued <- GetPagoda(r_cm_rescued, n.cores=30)
5558 cells, 11856 genes; normalizing ... using plain model winsorizing ... log scale ... done.
calculating variance fit ... using gam 198 overdispersed genes ... 198 persisting ... done.
running PCA using 1000 OD genes .... done
calculating distance ... pearson ...running tSNE using 30 cores:
intersect_clusters <- clusters_annotated[intersect(names(clusters_annotated),
intersect_cbs)]
notannotated_cells <- setdiff(colnames(r_cm_rescued), names(clusters_annotated))
clusters_annotated_resc <- AnnotateClustersByGraph(r_rescued$graphs$PCA,
clusters_annotated, notannotated_cells,
max.iter=100, mc.cores=10)
rescued_clusters <- clusters_annotated_resc[rescued_cbs]
intersect_clusters <- clusters_annotated[intersect_cbs]
plot_cbs <- names(clusters_annotated) %>% setdiff(rescued_cbs) %>% setdiff(filtered_cbs)
plot_clusters <- clusters_annotated[plot_cbs]
plot_rescued_clusters <- rescued_clusters
for (type in c("Maturing neutrophils", "Maturing macrophages", "Cycling cells")) {
plot_clusters[plot_clusters == type] <- gsub(" ", "\n", type)
plot_rescued_clusters[plot_rescued_clusters == type] <- gsub(" ", "\n", type)
}
gg_tsne <- PlotFiltrationResults(r_rescued, plot_clusters, filtered.cbs=filtered_cbs,
rescued.clusters=plot_rescued_clusters,
raster.width=4, raster.height=4.8, lineheight=0.9) +
ylim(-35, 33) +
theme_pdf(legend.pos=c(0, 1), show.ticks = F)
gg_tsne
rescued_table <- TableOfRescuedCells(clusters_annotated_resc[c(intersect_cbs, rescued_cbs)],
rescued_cbs)
write.csv(rescued_table, paste0(kOutputFolder, "tables/rescued_cbc_bmc.csv"), row.names=F)
rescued_table
Cell type | Total num. of cells | Num. of rescued | Fraction of rescued, % |
---|---|---|---|
B cells, immature | 99 | 0 | 0.00 |
B cells, mature | 457 | 1 | 0.22 |
Cycling cells | 192 | 5 | 2.60 |
Mast cells | 48 | 1 | 2.08 |
Maturing macrophages | 653 | 1 | 0.15 |
Maturing neutrophils | 2876 | 113 | 3.93 |
NK cells | 61 | 0 | 0.00 |
pre-B cells | 120 | 0 | 0.00 |
Progenitors | 222 | 4 | 1.80 |
T cells | 82 | 1 | 1.22 |
seurat_cm <- r_cm_rescued[Matrix::rowSums(r_cm_rescued) > 200, ]
srt <- CreateSeuratObject(raw.data=r_cm_rescued, project="BMC", display.progress=F)
srt <- NormalizeData(object=srt, normalization.method="LogNormalize", scale.factor=10000)
srt <- FindVariableGenes(object=srt, mean.function=ExpMean, dispersion.function=LogVMR,
x.low.cutoff = 0.0125, x.high.cutoff = 3, y.cutoff = 1, do.plot=F)
srt <- ScaleData(object = srt, vars.to.regress = "nUMI", display.progress=F)
srt@ident <- as.factor(clusters_annotated_resc[colnames(srt@raw.data)])
names(srt@ident) <- colnames(srt@raw.data)
compared_clusters <- unique(srt@ident)
cluster_markers <- mclapply(compared_clusters, function(i)
mclapply(setdiff(compared_clusters, i), FindClusterMarkers, i, srt, mc.cores=4),
mc.cores=11)
overexpressed_genes <- GetOverexpressedGenes(srt, compared_clusters, cluster_markers)
clusters_info <- list(clusters=srt@ident, marks=cluster_markers,
overexpressed_genes=overexpressed_genes)
tested_clusts <- sort(c(intersect_clusters, rescued_clusters))
separation <- c(setNames(rep('rescued', length(rescued_cbs)), rescued_cbs),
setNames(rep('real', length(intersect_clusters)), names(intersect_clusters)))
umis_per_cb_subset <- log10(Matrix::colSums(r_cm_rescued[, names(tested_clusts)]))
tested_clusts <- tested_clusts[order(tested_clusts, -umis_per_cb_subset)]
de_genes <- clusters_info$overexpressed_genes
plot_df <- ExpressionMatrixToDataFrame(r_rescued$counts[names(tested_clusts), de_genes],
umis_per_cb_subset, as.factor(tested_clusts),
filtration.type=separation)
plot_df <- plot_df %>% filter(UmisPerCb < 2.83)
plot_dfs <- split(plot_df, plot_df$FiltrationType)
ggs <- lapply(plot_dfs, HeatmapAnnotGG, umi.per.cell.limits=range(plot_df$UmisPerCb))
legend_guides <- list(HeatmapLegendGuide('Expression'),
HeatmapLegendGuide('Cell type', guide=guide_legend, ncol=3),
HeatmapLegendGuide('log10(#molecules)'))
gg_legends <- mapply(`+`, ggs$real, legend_guides, SIMPLIFY=F) %>%
lapply(`+`, theme(legend.margin=margin(l=4, r=4, unit='pt'))) %>% lapply(get_legend)
ggs$real$heatmap <- ggs$real$heatmap + rremove('xlab') + ylab('Cells')
ggs$rescued$heatmap <- ggs$rescued$heatmap + labs(x = 'Genes', y = 'Cells')
ggs_annot <- ggs %>%
lapply(function(gg) cowplot::plot_grid(
plotlist=lapply(gg, `+`, theme(legend.position="none", plot.margin=margin())),
nrow=1, rel_widths=c(1.5, 0.1, 0.1), align='h'))
gg_left <- cowplot::plot_grid(ggs_annot$real, ggs_annot$rescued, nrow=2, labels=c('A', 'B'))
gg_right <- gg_tsne + theme(plot.margin=margin(l=0.1, unit='in'),
axis.text=element_blank(), axis.ticks=element_blank())
gg_bottom <- cowplot::plot_grid(plotlist=gg_legends[c(1, 3, 2)], ncol=3,
rel_widths=c(1, 1, 2.2))
gg_fig <- cowplot::plot_grid(gg_left, gg_right, labels=c('', 'C'), ncol=2) %>%
cowplot::plot_grid(gg_bottom, nrow=2, rel_heights=c(1, 0.25), align='v') +
theme(plot.margin=margin(1, 1, 1, 1))
gg_fig
ggsave(paste0(kOutputFolder, 'figures/fig_bmc_lq_cells.pdf'), gg_fig, width=8, height=6)
value | |
---|---|
version | R version 3.4.1 (2017-06-30) |
os | Ubuntu 14.04.5 LTS |
system | x86_64, linux-gnu |
ui | X11 |
language | (EN) |
collate | en_US.UTF-8 |
tz | America/New_York |
date | 2018-05-16 |
package | loadedversion | date | source | |
---|---|---|---|---|
1 | acepack | 1.4.1 | 2016-10-29 | CRAN (R 3.4.1) |
2 | AnnotationDbi | 1.32.3 | 2016-01-28 | Bioconductor |
3 | ape | 5.0 | 2017-10-30 | CRAN (R 3.4.1) |
4 | assertthat | 0.2.0 | 2017-04-11 | CRAN (R 3.4.1) |
5 | backports | 1.1.2 | 2017-12-13 | CRAN (R 3.4.1) |
7 | base64enc | 0.1-3 | 2015-07-28 | cran (@0.1-3) |
8 | bindr | 0.1 | 2016-11-13 | CRAN (R 3.4.1) |
9 | bindrcpp | 0.2 | 2017-06-17 | CRAN (R 3.4.1) |
10 | Biobase | 2.30.0 | 2016-01-28 | Bioconductor |
11 | BiocGenerics | 0.16.1 | 2016-01-28 | Bioconductor |
12 | bit | 1.1-12 | 2014-04-09 | CRAN (R 3.4.1) |
13 | bit64 | 0.9-7 | 2017-05-08 | CRAN (R 3.4.1) |
14 | bitops | 1.0-6 | 2013-08-17 | CRAN (R 3.4.1) |
15 | blob | 1.1.0 | 2017-06-17 | CRAN (R 3.4.1) |
16 | brew | 1.0-6 | 2011-04-13 | CRAN (R 3.4.1) |
17 | broom | 0.4.3 | 2017-11-20 | CRAN (R 3.4.1) |
18 | Cairo | 1.5-9 | 2015-09-26 | CRAN (R 3.4.1) |
19 | caret | 6.0-78 | 2017-12-10 | CRAN (R 3.4.1) |
20 | caTools | 1.17.1 | 2014-09-10 | CRAN (R 3.4.1) |
21 | checkmate | 1.8.5 | 2017-10-24 | CRAN (R 3.4.1) |
22 | class | 7.3-14 | 2015-08-30 | CRAN (R 3.4.0) |
23 | clisymbols | 1.2.0 | 2017-05-21 | CRAN (R 3.4.1) |
24 | cluster | 2.0.6 | 2017-03-16 | CRAN (R 3.4.0) |
25 | codetools | 0.2-15 | 2016-10-05 | CRAN (R 3.4.1) |
26 | colorspace | 1.3-2 | 2016-12-14 | CRAN (R 3.4.1) |
28 | cowplot | 0.9.2 | 2017-12-17 | CRAN (R 3.4.1) |
29 | CVST | 0.2-1 | 2013-12-10 | CRAN (R 3.4.1) |
30 | data.table | 1.10.4-3 | 2017-10-27 | CRAN (R 3.4.1) |
32 | DBI | 0.7 | 2017-06-18 | CRAN (R 3.4.1) |
33 | ddalpha | 1.3.1 | 2017-09-27 | CRAN (R 3.4.1) |
34 | dendsort | 0.3.3 | 2015-12-14 | cran (@0.3.3) |
35 | DEoptimR | 1.0-8 | 2016-11-19 | CRAN (R 3.4.1) |
36 | diffusionMap | 1.1-0 | 2014-02-20 | CRAN (R 3.4.1) |
37 | digest | 0.6.15 | 2018-01-28 | cran (@0.6.15) |
38 | dimRed | 0.1.0 | 2017-05-04 | CRAN (R 3.4.1) |
39 | diptest | 0.75-7 | 2016-12-05 | CRAN (R 3.4.1) |
40 | doParallel | 1.0.11 | 2017-09-28 | CRAN (R 3.4.1) |
41 | dplyr | 0.7.4 | 2017-09-28 | CRAN (R 3.4.1) |
42 | dropEstAnalysis | 0.6.0 | 2018-05-16 | local (VPetukhov/dropEstAnalysis@NA) |
43 | dropestr | 0.7.7 | 2018-03-17 | local (@0.7.7) |
44 | DRR | 0.0.2 | 2016-09-15 | CRAN (R 3.4.1) |
45 | dtw | 1.18-1 | 2015-09-01 | CRAN (R 3.4.1) |
46 | evaluate | 0.10.1 | 2017-06-24 | CRAN (R 3.4.1) |
47 | flexmix | 2.3-14 | 2017-04-28 | CRAN (R 3.4.1) |
48 | FNN | 1.1 | 2013-07-31 | CRAN (R 3.4.1) |
49 | foreach | 1.4.4 | 2017-12-12 | CRAN (R 3.4.1) |
50 | foreign | 0.8-69 | 2017-06-21 | CRAN (R 3.4.0) |
51 | Formula | 1.2-2 | 2017-07-10 | CRAN (R 3.4.1) |
52 | fpc | 2.1-10 | 2015-08-14 | CRAN (R 3.4.1) |
53 | gdata | 2.18.0 | 2017-06-06 | CRAN (R 3.4.1) |
54 | ggjoy | 0.4.0 | 2017-09-15 | CRAN (R 3.4.1) |
55 | ggplot2 | 2.2.1 | 2016-12-30 | CRAN (R 3.4.1) |
56 | ggpubr | 0.1.6 | 2017-11-14 | CRAN (R 3.4.1) |
57 | ggrastr | 0.1.5 | 2017-12-28 | Github (VPetukhov/ggrastr@cc56b45) |
58 | ggrepel | 0.7.0 | 2017-09-29 | CRAN (R 3.4.1) |
59 | ggridges | 0.4.1 | 2017-09-15 | CRAN (R 3.4.1) |
60 | git2r | 0.21.0 | 2018-01-04 | cran (@0.21.0) |
61 | glue | 1.2.0 | 2017-10-29 | CRAN (R 3.4.1) |
62 | GO.db | 3.2.2 | 2017-11-12 | Bioconductor |
63 | gower | 0.1.2 | 2017-02-23 | CRAN (R 3.4.1) |
64 | gplots | 3.0.1 | 2016-03-30 | CRAN (R 3.4.1) |
68 | gridBase | 0.4-7 | 2014-02-24 | CRAN (R 3.4.1) |
69 | gridExtra | 2.3 | 2017-09-09 | CRAN (R 3.4.1) |
70 | gtable | 0.2.0 | 2016-02-26 | CRAN (R 3.4.1) |
71 | gtools | 3.5.0 | 2015-05-29 | CRAN (R 3.4.1) |
72 | highr | 0.6 | 2016-05-09 | CRAN (R 3.4.1) |
73 | Hmisc | 4.0-3 | 2017-05-02 | CRAN (R 3.4.1) |
74 | htmlTable | 1.11.0 | 2017-12-01 | CRAN (R 3.4.1) |
75 | htmltools | 0.3.6 | 2017-04-28 | CRAN (R 3.4.1) |
76 | htmlwidgets | 1.0 | 2018-01-20 | cran (@1.0) |
77 | ica | 1.0-1 | 2015-08-25 | CRAN (R 3.4.1) |
78 | igraph | 1.2.1 | 2018-03-10 | cran (@1.2.1) |
79 | ipred | 0.9-6 | 2017-03-01 | CRAN (R 3.4.1) |
80 | IRanges | 2.4.8 | 2016-09-15 | Bioconductor |
81 | irlba | 2.3.2 | 2018-01-11 | cran (@2.3.2) |
82 | iterators | 1.0.9 | 2017-12-12 | CRAN (R 3.4.1) |
83 | kernlab | 0.9-25 | 2016-10-03 | CRAN (R 3.4.1) |
84 | KernSmooth | 2.23-15 | 2015-06-29 | CRAN (R 3.4.0) |
85 | knitr | 1.20 | 2018-02-20 | cran (@1.20) |
86 | ks | 1.11.0 | 2018-01-16 | local (VPetukhov/ks@NA) |
87 | labeling | 0.3 | 2014-08-23 | CRAN (R 3.4.1) |
88 | lars | 1.2 | 2013-04-24 | CRAN (R 3.4.1) |
89 | lattice | 0.20-35 | 2017-03-25 | CRAN (R 3.4.1) |
90 | latticeExtra | 0.6-28 | 2016-02-09 | CRAN (R 3.4.1) |
91 | lava | 1.5.1 | 2017-09-27 | CRAN (R 3.4.1) |
92 | lazyeval | 0.2.1 | 2017-10-29 | CRAN (R 3.4.1) |
93 | lubridate | 1.7.1 | 2017-11-03 | CRAN (R 3.4.1) |
94 | magrittr | 1.5 | 2014-11-22 | CRAN (R 3.4.1) |
95 | MASS | 7.3-47 | 2017-04-21 | CRAN (R 3.4.0) |
96 | Matrix | 1.2-12 | 2017-11-16 | CRAN (R 3.4.1) |
97 | mclust | 5.4 | 2017-11-22 | CRAN (R 3.4.1) |
98 | memoise | 1.1.0 | 2017-04-21 | CRAN (R 3.4.1) |
100 | mgcv | 1.8-22 | 2017-09-19 | CRAN (R 3.4.1) |
101 | mixtools | 1.1.0 | 2017-03-10 | CRAN (R 3.4.1) |
102 | mnormt | 1.5-5 | 2016-10-15 | CRAN (R 3.4.1) |
103 | ModelMetrics | 1.1.0 | 2016-08-26 | CRAN (R 3.4.1) |
104 | modeltools | 0.2-21 | 2013-09-02 | CRAN (R 3.4.1) |
105 | munsell | 0.4.3 | 2016-02-13 | CRAN (R 3.4.1) |
106 | mvtnorm | 1.0-7 | 2018-01-26 | cran (@1.0-7) |
107 | nlme | 3.1-131 | 2017-02-06 | CRAN (R 3.4.0) |
108 | NMF | 0.20.6 | 2015-05-26 | CRAN (R 3.4.1) |
109 | nnet | 7.3-12 | 2016-02-02 | CRAN (R 3.4.0) |
110 | numDeriv | 2016.8-1 | 2016-08-27 | CRAN (R 3.4.1) |
111 | pagoda2 | 0.0.0.9002 | 2018-04-08 | local (hms-dbmi/pagoda2@NA) |
113 | pbapply | 1.3-3 | 2017-07-04 | CRAN (R 3.4.1) |
114 | pcaMethods | 1.60.0 | 2017-11-12 | Bioconductor |
115 | pcaPP | 1.9-73 | 2018-01-14 | cran (@1.9-73) |
116 | pkgconfig | 2.0.1 | 2017-03-21 | CRAN (R 3.4.1) |
117 | pkgmaker | 0.22 | 2014-05-14 | CRAN (R 3.4.1) |
118 | plyr | 1.8.4 | 2016-06-08 | CRAN (R 3.4.1) |
119 | prabclus | 2.2-6 | 2015-01-14 | CRAN (R 3.4.1) |
120 | prodlim | 1.6.1 | 2017-03-06 | CRAN (R 3.4.1) |
121 | proxy | 0.4-20 | 2017-12-12 | CRAN (R 3.4.1) |
122 | psych | 1.7.8 | 2017-09-09 | CRAN (R 3.4.1) |
123 | purrr | 0.2.4 | 2017-10-18 | CRAN (R 3.4.1) |
124 | R.methodsS3 | 1.7.1 | 2016-02-16 | CRAN (R 3.4.1) |
125 | R.oo | 1.21.0 | 2016-11-01 | CRAN (R 3.4.1) |
126 | R.utils | 2.6.0 | 2017-11-05 | CRAN (R 3.4.1) |
127 | R6 | 2.2.2 | 2017-06-17 | CRAN (R 3.4.1) |
128 | ranger | 0.8.0 | 2017-06-20 | CRAN (R 3.4.1) |
129 | RColorBrewer | 1.1-2 | 2014-12-07 | CRAN (R 3.4.1) |
130 | Rcpp | 0.12.16 | 2018-03-13 | cran (@0.12.16) |
131 | RcppRoll | 0.2.2 | 2015-04-05 | CRAN (R 3.4.1) |
132 | recipes | 0.1.1 | 2017-11-20 | CRAN (R 3.4.1) |
133 | registry | 0.5 | 2017-12-03 | CRAN (R 3.4.1) |
134 | reshape2 | 1.4.3 | 2017-12-11 | CRAN (R 3.4.1) |
135 | rjson | 0.2.15 | 2014-11-03 | CRAN (R 3.4.1) |
136 | rlang | 0.1.4 | 2017-11-05 | CRAN (R 3.4.1) |
137 | rmarkdown | 1.9 | 2018-03-01 | CRAN (R 3.4.1) |
138 | rngtools | 1.2.4 | 2014-03-06 | CRAN (R 3.4.1) |
139 | robustbase | 0.92-8 | 2017-11-01 | CRAN (R 3.4.1) |
140 | ROCR | 1.0-7 | 2015-03-26 | CRAN (R 3.4.1) |
141 | Rook | 1.1-1 | 2014-10-20 | CRAN (R 3.4.1) |
142 | rpart | 4.1-11 | 2017-04-21 | CRAN (R 3.4.0) |
143 | rprojroot | 1.3-2 | 2018-01-03 | cran (@1.3-2) |
144 | RSQLite | 2.0 | 2017-06-19 | CRAN (R 3.4.1) |
145 | rstudioapi | 0.7 | 2017-09-07 | CRAN (R 3.4.1) |
146 | Rtsne | 0.14 | 2017-11-12 | Github (jkrijthe/Rtsne@1d0f926) |
147 | S4Vectors | 0.8.11 | 2016-01-30 | Bioconductor |
148 | scales | 0.5.0 | 2017-08-24 | CRAN (R 3.4.1) |
149 | scatterplot3d | 0.3-40 | 2017-04-22 | CRAN (R 3.4.1) |
150 | SDMTools | 1.1-221 | 2014-08-05 | CRAN (R 3.4.1) |
151 | segmented | 0.5-3.0 | 2017-11-30 | CRAN (R 3.4.1) |
152 | sessioninfo | 1.0.0 | 2017-06-21 | CRAN (R 3.4.1) |
153 | Seurat | 2.1.0 | 2017-10-12 | CRAN (R 3.4.1) |
154 | sfsmisc | 1.1-1 | 2017-06-08 | CRAN (R 3.4.1) |
155 | sn | 1.5-1 | 2017-11-23 | CRAN (R 3.4.1) |
159 | stringi | 1.1.7 | 2018-03-12 | cran (@1.1.7) |
160 | stringr | 1.3.0 | 2018-02-19 | cran (@1.3.0) |
161 | survival | 2.41-3 | 2017-04-04 | CRAN (R 3.4.0) |
162 | tclust | 1.3-1 | 2017-08-24 | CRAN (R 3.4.1) |
163 | tibble | 1.3.4 | 2017-08-22 | CRAN (R 3.4.1) |
164 | tidyr | 0.7.2 | 2017-10-16 | CRAN (R 3.4.1) |
165 | tidyselect | 0.2.3 | 2017-11-06 | CRAN (R 3.4.1) |
166 | timeDate | 3042.101 | 2017-11-16 | CRAN (R 3.4.1) |
168 | triebeard | 0.3.0 | 2016-08-04 | cran (@0.3.0) |
169 | trimcluster | 0.1-2 | 2012-10-29 | CRAN (R 3.4.1) |
170 | tsne | 0.1-3 | 2016-07-15 | CRAN (R 3.4.1) |
171 | urltools | 1.7.0 | 2018-01-20 | cran (@1.7.0) |
173 | VGAM | 1.0-4 | 2017-07-25 | CRAN (R 3.4.1) |
174 | withr | 2.1.2 | 2018-03-15 | cran (@2.1.2) |
175 | xtable | 1.8-2 | 2016-02-05 | CRAN (R 3.4.1) |
176 | yaml | 2.1.18 | 2018-03-08 | cran (@2.1.18) |
This R Markdown site was created with workflowr