Source file: notebooks/umi_correction/trimmed_collisions.Rmd

Last updated: 2018-02-18

Code version: e50352c

library(ggplot2)
library(ggsci)
library(ggridges)
library(ggrastr)
library(dplyr)
library(parallel)
library(reshape2)
library(dropestr)
library(dropEstAnalysis)

theme_set(theme_base)
kPlotsFolder <- '../../output/figures/'
kDataPath <- '../../data/dropest/'
# reads_per_umi_per_cell <- holder$reads_per_umi_per_cell
# reads_per_umi_per_cell$reads_per_umi <- FilterNUmis(holder$reads_per_umi_per_cell$reads_per_umi)

reads_per_umi_per_cell <- kDataPath %>% 
  paste0('10x/aml035_post_transplant/est_10_20_umi_quality/reads_per_umi_per_cell.rds') %>% 
  readRDS()

Collisions

Trimming:

umi_lengths <- rep(6:10, 2)
is_reverse <- c(rep(F, 5), rep(T, 5))

trimmed <- mcmapply(function(i, b) TrimUmisSummary(reads_per_umi_per_cell, i, b),
                    umi_lengths, is_reverse, SIMPLIFY = F, mc.cores=10)
collisions_infos <- lapply(trimmed, function(d) d$collisions.info)
collision_df <- mapply(function(s, l, r) data.frame(Observed=1:length(s), Adjusted=s, 
                                                    UmiLength=l, IsReverse=r),
              collisions_infos, umi_lengths, is_reverse, SIMPLIFY = F) %>% 
  bind_rows() %>% 
  mutate(AdjustedUniform=mapply(AdjustGeneExpressionUniform, Observed, 4^UmiLength))

collision_dfs <- split(collision_df, collision_df$IsReverse) %>% 
  setNames(c('Reverse', 'Direct'))
rm(trimmed, reads_per_umi_per_cell, collisions_infos, collision_df)
invisible(gc())

Main figure

plt_guide <- guides(color=guide_legend(title='UMI length', nrow = 2, order=1),
                    linetype=guide_legend(title='UMI distribution', order=2, keywidth = 1.5))

expand <- c(0.01, 0)

fig_collisions <- ggplot(collision_dfs$Direct, aes(x=Observed, col=as.factor(UmiLength))) + 
  geom_line(aes(y=Adjusted - Observed, linetype='Empirical'), size=0.9) +
  geom_line(aes(y=AdjustedUniform - Observed, linetype='Uniform'), size=0.9) +
  scale_x_continuous(expand = expand, breaks=seq(4000, 12000, 4000)) + 
  scale_y_continuous(expand = expand) +
  labs(x='#Observed UMIs', y='#Collisions') +
  theme_pdf(legend.pos=c(1, 1)) + plt_guide

fig_collisions <- cowplot::plot_grid(fig_collisions, labels='C')

ggsave(paste0(kPlotsFolder, '/fig_collisions.pdf'), fig_collisions, height=5.5, width=3.5)
print(fig_collisions)

rm(fig_collisions);

Ratio

gg_collisions_ext <- ggplot(mapping=aes(x=Observed, col=as.factor(UmiLength), 
                                        y=Adjusted / AdjustedUniform)) + 
  geom_smooth(data=collision_dfs$Direct, mapping=aes(linetype='Empirical, back trim'), 
              size=0.9, se=F) +
  geom_smooth(data=collision_dfs$Reverse, mapping=aes(linetype='Empirical, front trim'), 
              size=0.9, se=F) +
  scale_x_continuous(expand = expand) + scale_y_continuous(expand = expand) +
  labs(x='#Observed UMIs', y='#Collisions empirical / #Collisions uniform') +
  theme_pdf(legend.pos=c(1, 1)) + 
  plt_guide

rm(collision_dfs)
gg_collisions_ext

Estimated number of adjacent UMIs

holder <- readRDS(paste0(kDataPath, 'SCG71/est_01_15_umi_quality/SCG71.rds'))
umi_distribution <- GetUmisDistribution(holder$reads_per_umi_per_cell$reads_per_umi)
umi_probs <- umi_distribution / sum(umi_distribution)
rm(holder, umi_distribution)
invisible(gc())
gene_sizes <- list(2:10, 11:50, seq(60, 300, 10), seq(300, 500, 25), seq(600, 4090, 200), 
                   c(4050, 4095))
sample_nums <- unlist(mapply(rep, c(100000, 100000, 15000, 1000, 500, 500), 
                             sapply(gene_sizes, length)))
gene_sizes_f <- unlist(gene_sizes)

adjacent_umis_num <- mcmapply(function(s, n) 
  SampleNumbersOfAdjacentUmis(s, umi_probs, n, uniform=F), 
  gene_sizes_f, sample_nums, mc.cores=30)
adjacent_umis_num_unif <- mcmapply(function(s, n) 
  SampleNumbersOfAdjacentUmis(s, umi_probs, n, uniform=T), 
  gene_sizes_f, sample_nums, mc.cores=30)
plot_df <- data.frame(GeneSize=c(1, unlist(gene_sizes))) %>% 
  mutate(ProbOfAdjacentUMIs=c(0, sapply(adjacent_umis_num, mean)) / GeneSize^2,
         ProbOfAdjacentUMIsUnif=c(0, sapply(adjacent_umis_num_unif, mean)) / GeneSize^2)

axis_ratio <- 200
axis_offset <- 0.001

gg_prob_ratio <- ggplot(plot_df, aes(x=GeneSize)) + 
  geom_line(aes(y=ProbOfAdjacentUMIs, color='Empirical'), size=2, alpha=0.8) + 
  geom_line(aes(y=ProbOfAdjacentUMIsUnif, color='Uniform'), size=2, alpha=0.78) +
  geom_smooth(aes(y=ProbOfAdjacentUMIs / ProbOfAdjacentUMIsUnif / axis_ratio - axis_offset, 
                  linetype='Empirical / Uniform'), size=1.5, color='black', se=FALSE) +
  labs(x = '#UMIs per gene', y='Adjacent UMI probability') +
  scale_x_log10(expand=c(0, 0), limits=c(2, 4100)) + annotation_logticks(side='b') +
  scale_y_continuous(expand=c(1e-5, 1e-5), limits=c(0.002, 0.0052), 
                     sec.axis=sec_axis(trans=~.*axis_ratio + axis_offset*axis_ratio, 
                                       name='Probabilities ratio')) +
  scale_linetype_manual(values='dashed', name='Probabilities ratio') +
  guides(color=guide_legend(title='UMI distribution')) + 
  guides(linetype=guide_legend(override.aes=list(linetype=6))) +
  theme_pdf(legend.pos=c(0.5, 0)) + 
  theme(legend.box='horizontal', legend.key.width=unit(0.3, 'in'))
gg_prob_ratio

Complete figure

gg_figure <- cowplot::plot_grid(gg_collisions_ext, gg_prob_ratio, ncol=2, labels="AUTO", 
                                align='h', rel_widths=c(3.5, 4.5), label_x=0.03)

ggsave(paste0(kPlotsFolder, 'supp_impact_on_collisions.pdf'), gg_figure, width=8, height=4)
gg_figure

Session information

value
version R version 3.4.1 (2017-06-30)
os Ubuntu 14.04.5 LTS
system x86_64, linux-gnu
ui X11
language (EN)
collate en_US.UTF-8
tz America/New_York
date 2018-02-18
package loadedversion date source
1 assertthat 0.2.0 2017-04-11 CRAN (R 3.4.1)
2 backports 1.1.2 2017-12-13 CRAN (R 3.4.1)
4 bindr 0.1 2016-11-13 CRAN (R 3.4.1)
5 bindrcpp 0.2 2017-06-17 CRAN (R 3.4.1)
6 clisymbols 1.2.0 2017-05-21 CRAN (R 3.4.1)
7 colorspace 1.3-2 2016-12-14 CRAN (R 3.4.1)
9 cowplot 0.9.2 2017-12-17 CRAN (R 3.4.1)
11 digest 0.6.14 2018-01-14 cran (@0.6.14)
12 dplyr 0.7.4 2017-09-28 CRAN (R 3.4.1)
13 dropEstAnalysis 0.6.0 2018-02-18 local (VPetukhov/dropEstAnalysis@NA)
14 dropestr 0.7.5 2018-02-18 local (@0.7.5)
15 evaluate 0.10.1 2017-06-24 CRAN (R 3.4.1)
16 ggplot2 2.2.1 2016-12-30 CRAN (R 3.4.1)
17 ggrastr 0.1.5 2017-12-28 Github (VPetukhov/ggrastr@cc56b45)
18 ggridges 0.4.1 2017-09-15 CRAN (R 3.4.1)
19 ggsci 2.8 2017-09-30 CRAN (R 3.4.1)
20 git2r 0.21.0 2018-01-04 cran (@0.21.0)
21 glue 1.2.0 2017-10-29 CRAN (R 3.4.1)
25 gtable 0.2.0 2016-02-26 CRAN (R 3.4.1)
26 highr 0.6 2016-05-09 CRAN (R 3.4.1)
27 htmltools 0.3.6 2017-04-28 CRAN (R 3.4.1)
28 knitr 1.18 2017-12-27 cran (@1.18)
29 labeling 0.3 2014-08-23 CRAN (R 3.4.1)
30 lattice 0.20-35 2017-03-25 CRAN (R 3.4.1)
31 lazyeval 0.2.1 2017-10-29 CRAN (R 3.4.1)
32 magrittr 1.5 2014-11-22 CRAN (R 3.4.1)
33 Matrix 1.2-12 2017-11-16 CRAN (R 3.4.1)
35 mgcv 1.8-22 2017-09-19 CRAN (R 3.4.1)
36 munsell 0.4.3 2016-02-13 CRAN (R 3.4.1)
37 nlme 3.1-131 2017-02-06 CRAN (R 3.4.0)
39 pkgconfig 2.0.1 2017-03-21 CRAN (R 3.4.1)
40 plyr 1.8.4 2016-06-08 CRAN (R 3.4.1)
41 R6 2.2.2 2017-06-17 CRAN (R 3.4.1)
42 Rcpp 0.12.15 2018-01-20 cran (@0.12.15)
43 reshape2 1.4.3 2017-12-11 CRAN (R 3.4.1)
44 rlang 0.1.4 2017-11-05 CRAN (R 3.4.1)
45 rmarkdown 1.8 2017-11-17 CRAN (R 3.4.1)
46 rprojroot 1.3-2 2018-01-03 cran (@1.3-2)
47 scales 0.5.0 2017-08-24 CRAN (R 3.4.1)
48 sessioninfo 1.0.0 2017-06-21 CRAN (R 3.4.1)
50 stringi 1.1.6 2017-11-17 CRAN (R 3.4.1)
51 stringr 1.2.0 2017-02-18 CRAN (R 3.4.1)
52 tibble 1.3.4 2017-08-22 CRAN (R 3.4.1)
55 withr 2.1.1 2017-12-19 cran (@2.1.1)
56 yaml 2.1.16 2017-12-12 CRAN (R 3.4.1)

This R Markdown site was created with workflowr