Source file: notebooks/cell_barcode_correction/size_gain.Rmd

Last updated: 2018-02-04

Code version: d5a26c1

Initialization

library(ggplot2)
library(ggrastr)
library(dplyr)
library(parallel)
library(dropestr)
library(dropEstAnalysis)
library(Matrix)

theme_set(theme_base)

kPlotsFolder <- '../../output/figures/'
kDataPath <- '../../data/dropest/'
dataset_names <- c('precise', 'unmerged')
kDropSeqPath <- paste0('dropseq/thousand/', c('est_01_16_precise/', 'est_01_16_unmerged/'), 
                       'thousand.rds')
kInDropPath <- paste0('SCG71/', c('est_11_16_poisson_simple/', 'est_11_16_unmerged/'), 
                      'SCG71.rds')
k10xPath <- paste0('10x/pbmc33k/', c('est_11_17_poisson/', 'est_11_17_unmerged/'), 
                   'pbmc33k_no_umis.rds')

kDataPaths <- list(dropseq=kDropSeqPath, indrop=kInDropPath, `10x`=k10xPath) %>% 
  lapply(function(l) paste0(kDataPath, l) %>% setNames(dataset_names))

holders <- mclapply(kDataPaths, function(x) mclapply(x, readRDS, mc.cores=length(x)),
                    mc.cores=length(kDataPaths))
validation_data <- lapply(holders, function(hs) list(cms=lapply(hs, `[[`, 'cm')))

validation_data$dropseq$cell_number <- 1000
validation_data$indrop$cell_number <- 5200
validation_data$`10x`$cell_number <- 30000

for (n in names(validation_data)) {
  cn <- validation_data[[n]]$cell_number
  validation_data[[n]]$umi_per_cb <- lapply(validation_data[[n]]$cms, function(cm) 
    sort(Matrix::colSums(cm), decreasing=T))
  validation_data[[n]]$real_cbs <- names(validation_data[[n]]$umi_per_cb$precise)[1:cn]
}

size_increase <- lapply(validation_data, function(d) (
  d$umi_per_cb$precise[d$real_cbs] - d$umi_per_cb$unmerged[d$real_cbs]) / 
    d$umi_per_cb$unmerged[d$real_cbs])

Figure

names(size_increase) <- c('Drop-seq, mixture', 'inDrop, BMCs', '10x, 33k PBMCs')
plot_df <- mapply(function(v, n) tibble(Increase=v, Dataset=n), size_increase, 
                  names(size_increase), SIMPLIFY=F) %>% bind_rows()

gg_size <- ggplot(plot_df) + 
  geom_histogram(aes(x=100 * Increase, fill=Dataset, y = 100 * 0.75 * ..density..), 
                 binwidth=0.75, color=alpha('black', 0.05), position='identity', alpha=0.5) +
  scale_x_continuous(expand=c(0, 0), limits=c(0.0, 20)) +
  scale_y_continuous(expand=c(0, 0), limits=c(0, 35)) +
  labs(x='Increase in #molecules per CB, %', y='Number of CBs, %') +
  theme_pdf(legend.pos=c(1, 1)) + theme(panel.grid.minor=element_blank())

ggsave(paste0(kPlotsFolder, "merge_size_increase.pdf"), gg_size, width=3.5, height=2.3)
gg_size

Session information

value
version R version 3.4.1 (2017-06-30)
os Ubuntu 14.04.5 LTS
system x86_64, linux-gnu
ui X11
language (EN)
collate en_US.UTF-8
tz America/New_York
date 2018-02-04
package loadedversion date source
1 assertthat 0.2.0 2017-04-11 CRAN (R 3.4.1)
2 backports 1.1.2 2017-12-13 CRAN (R 3.4.1)
4 bindr 0.1 2016-11-13 CRAN (R 3.4.1)
5 bindrcpp 0.2 2017-06-17 CRAN (R 3.4.1)
6 clisymbols 1.2.0 2017-05-21 CRAN (R 3.4.1)
7 colorspace 1.3-2 2016-12-14 CRAN (R 3.4.1)
10 digest 0.6.14 2018-01-14 cran (@0.6.14)
11 dplyr 0.7.4 2017-09-28 CRAN (R 3.4.1)
12 dropEstAnalysis 0.6.0 2018-02-01 local (VPetukhov/dropEstAnalysis@NA)
13 dropestr 0.7.5 2018-01-31 local (@0.7.5)
14 evaluate 0.10.1 2017-06-24 CRAN (R 3.4.1)
15 ggplot2 2.2.1 2016-12-30 CRAN (R 3.4.1)
16 ggrastr 0.1.5 2017-12-28 Github (VPetukhov/ggrastr@cc56b45)
17 git2r 0.21.0 2018-01-04 cran (@0.21.0)
18 glue 1.2.0 2017-10-29 CRAN (R 3.4.1)
22 gtable 0.2.0 2016-02-26 CRAN (R 3.4.1)
23 highr 0.6 2016-05-09 CRAN (R 3.4.1)
24 htmltools 0.3.6 2017-04-28 CRAN (R 3.4.1)
25 knitr 1.18 2017-12-27 cran (@1.18)
26 labeling 0.3 2014-08-23 CRAN (R 3.4.1)
27 lattice 0.20-35 2017-03-25 CRAN (R 3.4.1)
28 lazyeval 0.2.1 2017-10-29 CRAN (R 3.4.1)
29 magrittr 1.5 2014-11-22 CRAN (R 3.4.1)
30 Matrix 1.2-12 2017-11-16 CRAN (R 3.4.1)
32 munsell 0.4.3 2016-02-13 CRAN (R 3.4.1)
34 pkgconfig 2.0.1 2017-03-21 CRAN (R 3.4.1)
35 plyr 1.8.4 2016-06-08 CRAN (R 3.4.1)
36 R6 2.2.2 2017-06-17 CRAN (R 3.4.1)
37 Rcpp 0.12.15 2018-01-20 cran (@0.12.15)
38 rlang 0.1.4 2017-11-05 CRAN (R 3.4.1)
39 rmarkdown 1.8 2017-11-17 CRAN (R 3.4.1)
40 rprojroot 1.3-2 2018-01-03 cran (@1.3-2)
41 scales 0.5.0 2017-08-24 CRAN (R 3.4.1)
42 sessioninfo 1.0.0 2017-06-21 CRAN (R 3.4.1)
44 stringi 1.1.6 2017-11-17 CRAN (R 3.4.1)
45 stringr 1.2.0 2017-02-18 CRAN (R 3.4.1)
46 tibble 1.3.4 2017-08-22 CRAN (R 3.4.1)
49 withr 2.1.1 2017-12-19 cran (@2.1.1)
50 yaml 2.1.16 2017-12-12 CRAN (R 3.4.1)

This R Markdown site was created with workflowr