Source file: notebooks/human_mouse/hm_10x_6k.Rmd
Last updated: 2018-02-04
Code version: 0306e1c
library(ggplot2)
library(ggrastr)
library(dropestr)
library(dropEstAnalysis)
library(Matrix)
library(dplyr)
theme_set(theme_base)
Here bam file was filtered by removing all reads, which were aligned on both mouse and human chromosomes at the same time.
# holder <- readRDS('../../data/dropest/10x/hgmm_6k/est_2018_01_25_filtered/hgmm_6k.rds')
# holder_filt <- list()
# holder_filt$cm_raw <- holder$cm_raw
# holder_filt$reads_per_chr_per_cells <- holder$reads_per_chr_per_cells$Exon
# saveRDS(holder_filt, '../../data/dropest/10x/hgmm_6k/est_2018_01_25_filtered/hgmm_6k_filt.rds')
holder <- readRDS('../../data/dropest/10x/hgmm_6k/est_2018_01_25_filtered/hgmm_6k_filt.rds')
kPlotDir <- '../../output/figures/'
cm_real <- holder$cm_raw
cell_number <- 6500
gene_species <- ifelse(substr(rownames(cm_real), 1, 2) == "hg", 'Human', 'Mouse') %>%
as.factor()
umi_by_species <- lapply(levels(gene_species), function(l) cm_real[gene_species == l,] %>%
Matrix::colSums()) %>% as.data.frame() %>%
`colnames<-`(levels(gene_species)) %>% tibble::rownames_to_column('CB') %>%
as_tibble() %>%
mutate(Total = Human + Mouse, Organism=ifelse(Human > Mouse, "Human", "Mouse"),
IsReal=rank(Total) >= length(Total) - cell_number) %>%
filter(Total > 20)
reads_per_chr <- FillNa(holder$reads_per_chr_per_cells$Exon[umi_by_species$CB,])
umi_by_species <- umi_by_species %>%
mutate(
MitReads = reads_per_chr$mm10_MT + reads_per_chr$hg19_MT,
TotalReads = rowSums(reads_per_chr),
MitochondrionFraction = MitReads / TotalReads
)
umi_by_species$Type <- ifelse(umi_by_species$IsReal, umi_by_species$Organism, "Background")
umi_by_species$Type[umi_by_species$Mouse > 2e3 & umi_by_species$Human > 2e3] <- 'Dublets'
gg_template <- ggplot(umi_by_species, aes(x=Mouse, y=Human)) +
geom_abline(aes(slope=1, intercept=0), linetype='dashed', alpha=0.5) +
scale_x_log10(limits=c(1, 2e5), name="#Mouse molecules") +
scale_y_log10(name="#Human molecules") + annotation_logticks() +
theme_pdf(legend.pos=c(0.97, 0.05)) + theme(legend.margin=margin(l=3, r=3, unit="pt"))
gg_left <- gg_template + geom_point(aes(color=IsReal), size=0.1, alpha=0.15) +
guides(color=guide_legend(override.aes=list(size=1.5, alpha=1)))
gg_right <- gg_template + geom_point(aes(color=MitochondrionFraction), size=0.1, alpha=0.15) +
scale_color_gradientn(colours=c("#1200ba", "#347fff", "#cc4000", "#ff3333"),
values=scales::rescale(c(0, 0.1, 0.3, 0.8)),
breaks=seq(0, 1.0, 0.2)) +
guides(color=guide_colorbar(direction="horizontal", title.position="top",
title="Mitochondrial\nfraction",
barwidth=unit(1.2, units="in")))
cowplot::plot_grid(gg_left, gg_right)
ggplot(umi_by_species) +
geom_point(aes(x=Total, y=pmin(Human, Mouse) / Total, color=Organism), size=0.1,
alpha=0.1) +
scale_x_log10(name='#Real UMIs', limits=c(10, 2e5)) + annotation_logticks() +
ylab('Fraction of mixed UMIs') +
guides(color=guide_legend(override.aes=list(size=1.5, alpha=1))) +
theme_pdf(legend.pos=c(1, 1))
Background cells have constant fraction of mouse and human reads:
mouse_frac <- umi_by_species %>% filter(IsReal) %>%
summarise(Mouse=sum(Mouse[Organism == 'Mouse']), Human=sum(Human[Organism == 'Human']),
MF=Mouse / (Mouse + Human)) %>% .$MF
ggplot(umi_by_species) +
geom_histogram(aes(x=Mouse / Total, y=..density.., fill=IsReal), binwidth=0.005,
position="identity") +
geom_vline(xintercept=mouse_frac) +
xlab("Fraction of mouse reads") +
theme_pdf(legend.pos=c(1, 1))
Distribution of total number of molecules by background cells:
gg <- ggplot(umi_by_species %>% filter(!IsReal)) +
geom_histogram(aes(x=Total), bins=100) +
scale_x_continuous(limits=c(0, 600), expand=c(0, 0), name="Total #UMIs") +
scale_y_continuous(limits=c(0, 6000), expand=c(0, 0), name="#Cells") +
theme_pdf()
gg
arrows_df <- umi_by_species %>% group_by(Type) %>%
summarise(MouseEnd=median(Mouse), HumanEnd=median(Human)) %>%
mutate(Mouse=c(1e1, 2e4, 7e1, 6e4),
Human=c(1e3, 5e3, 7e4, 1.5e2))
gg_fig <- gg_template +
geom_point_rast(aes(color=MitochondrionFraction), size=0.1, alpha=0.15, width=6,
height=4, dpi=200) +
scale_color_gradientn(colours=c("#1200ba", "#347fff", "#cc4000", "#ff3333"),
values=scales::rescale(c(0, 0.1, 0.3, 0.8)),
breaks=seq(0, 1.0, 0.2)) +
guides(color=guide_colorbar(direction="horizontal", title.position="top",
title="Mitochondrial\nfraction",
barwidth=unit(1.2, units="in"))) +
stat_ellipse(aes(group=Type), level=0.9999) +
geom_segment(aes(xend=MouseEnd, yend=HumanEnd, group=Type), data=arrows_df,
arrow=arrow(length = unit(0.03, "npc"))) +
geom_label(aes(label=Type), data=arrows_df, fill=alpha('white', 1)) +
theme(plot.margin=margin(1, 1, 1, 1))
try(invisible(dev.off()), silent=T)
ggsave(paste0(kPlotDir, 'supp_human_mouse.pdf'), gg_fig, width=6, height=4)
gg_fig
value | |
---|---|
version | R version 3.4.1 (2017-06-30) |
os | Ubuntu 14.04.5 LTS |
system | x86_64, linux-gnu |
ui | X11 |
language | (EN) |
collate | en_US.UTF-8 |
tz | America/New_York |
date | 2018-02-04 |
package | loadedversion | date | source | |
---|---|---|---|---|
1 | assertthat | 0.2.0 | 2017-04-11 | CRAN (R 3.4.1) |
2 | backports | 1.1.2 | 2017-12-13 | CRAN (R 3.4.1) |
4 | bindr | 0.1 | 2016-11-13 | CRAN (R 3.4.1) |
5 | bindrcpp | 0.2 | 2017-06-17 | CRAN (R 3.4.1) |
6 | Cairo | 1.5-9 | 2015-09-26 | CRAN (R 3.4.1) |
7 | clisymbols | 1.2.0 | 2017-05-21 | CRAN (R 3.4.1) |
8 | colorspace | 1.3-2 | 2016-12-14 | CRAN (R 3.4.1) |
10 | cowplot | 0.9.2 | 2017-12-17 | CRAN (R 3.4.1) |
12 | digest | 0.6.14 | 2018-01-14 | cran (@0.6.14) |
13 | dplyr | 0.7.4 | 2017-09-28 | CRAN (R 3.4.1) |
14 | dropEstAnalysis | 0.6.0 | 2018-02-01 | local (VPetukhov/dropEstAnalysis@NA) |
15 | dropestr | 0.7.5 | 2018-01-31 | local (@0.7.5) |
16 | evaluate | 0.10.1 | 2017-06-24 | CRAN (R 3.4.1) |
17 | ggplot2 | 2.2.1 | 2016-12-30 | CRAN (R 3.4.1) |
18 | ggrastr | 0.1.5 | 2017-12-28 | Github (VPetukhov/ggrastr@cc56b45) |
19 | git2r | 0.21.0 | 2018-01-04 | cran (@0.21.0) |
20 | glue | 1.2.0 | 2017-10-29 | CRAN (R 3.4.1) |
24 | gtable | 0.2.0 | 2016-02-26 | CRAN (R 3.4.1) |
25 | highr | 0.6 | 2016-05-09 | CRAN (R 3.4.1) |
26 | htmltools | 0.3.6 | 2017-04-28 | CRAN (R 3.4.1) |
27 | knitr | 1.18 | 2017-12-27 | cran (@1.18) |
28 | labeling | 0.3 | 2014-08-23 | CRAN (R 3.4.1) |
29 | lattice | 0.20-35 | 2017-03-25 | CRAN (R 3.4.1) |
30 | lazyeval | 0.2.1 | 2017-10-29 | CRAN (R 3.4.1) |
31 | magrittr | 1.5 | 2014-11-22 | CRAN (R 3.4.1) |
32 | MASS | 7.3-47 | 2017-04-21 | CRAN (R 3.4.0) |
33 | Matrix | 1.2-12 | 2017-11-16 | CRAN (R 3.4.1) |
35 | munsell | 0.4.3 | 2016-02-13 | CRAN (R 3.4.1) |
36 | pkgconfig | 2.0.1 | 2017-03-21 | CRAN (R 3.4.1) |
37 | plyr | 1.8.4 | 2016-06-08 | CRAN (R 3.4.1) |
38 | R6 | 2.2.2 | 2017-06-17 | CRAN (R 3.4.1) |
39 | Rcpp | 0.12.15 | 2018-01-20 | cran (@0.12.15) |
40 | rlang | 0.1.4 | 2017-11-05 | CRAN (R 3.4.1) |
41 | rmarkdown | 1.8 | 2017-11-17 | CRAN (R 3.4.1) |
42 | rprojroot | 1.3-2 | 2018-01-03 | cran (@1.3-2) |
43 | scales | 0.5.0 | 2017-08-24 | CRAN (R 3.4.1) |
44 | sessioninfo | 1.0.0 | 2017-06-21 | CRAN (R 3.4.1) |
46 | stringi | 1.1.6 | 2017-11-17 | CRAN (R 3.4.1) |
47 | stringr | 1.2.0 | 2017-02-18 | CRAN (R 3.4.1) |
48 | tibble | 1.3.4 | 2017-08-22 | CRAN (R 3.4.1) |
51 | withr | 2.1.1 | 2017-12-19 | cran (@2.1.1) |
52 | yaml | 2.1.16 | 2017-12-12 | CRAN (R 3.4.1) |
This R Markdown site was created with workflowr