Source file: notebooks/human_mouse/hm_10x_1k.Rmd
Last updated: 2018-01-27
Code version: 1485196
library(ggplot2)
library(ggrastr)
library(dropestr)
library(dropEstAnalysis)
library(Matrix)
library(dplyr)
theme_set(theme_base)
Here bam file was filtered by realigning it with kallisto 0.43
separately on mouse and human genome. Only reads, which were aligned only on one of them were used in dropEst
.
holder <- readRDS('../../data/dropest/10x/hgmm_1k/est_2018_01_27_kallisto/hgmm_1k.rds')
cm_real <- holder$cm_raw
cell_number <- 1100
gene_species <- ifelse(substr(rownames(cm_real), 1, 2) == "hg", 'Human', 'Mouse') %>%
as.factor()
umi_by_species <- lapply(levels(gene_species), function(l) cm_real[gene_species == l,] %>%
Matrix::colSums()) %>% as.data.frame() %>%
`colnames<-`(levels(gene_species)) %>% tibble::rownames_to_column('CB') %>%
as_tibble() %>%
mutate(Total = Human + Mouse, Organism=ifelse(Human > Mouse, "Human", "Mouse"),
IsReal=rank(Total) >= length(Total) - cell_number) %>%
filter(Total > 20)
reads_per_chr <- FillNa(holder$reads_per_chr_per_cells$Exon[umi_by_species$CB,])
umi_by_species <- umi_by_species %>%
mutate(
MitReads = reads_per_chr$mm10_MT + reads_per_chr$hg19_MT,
TotalReads = rowSums(reads_per_chr),
MitochondrionFraction = MitReads / TotalReads
)
gg <- ggplot(umi_by_species, aes(x=Mouse, y=Human)) +
geom_abline(aes(slope=1, intercept=0), linetype='dashed', alpha=0.5) +
scale_x_log10(limits=c(1, 2e5), name="#Mouse molecules") +
scale_y_log10(name="#Human molecules") + annotation_logticks() +
theme_pdf(legend.pos=c(0.97, 0.05)) + theme(legend.margin=margin(l=3, r=3, unit="pt"))
gg_left <- gg + geom_point(aes(color=IsReal), size=0.1, alpha=0.15) +
guides(color=guide_legend(override.aes=list(size=1.5, alpha=1)))
gg_right <- gg + geom_point(aes(color=MitochondrionFraction), size=0.1, alpha=0.15) +
scale_color_gradientn(colours=c("#1200ba", "#347fff", "#cc4000", "#ff3333"),
values=scales::rescale(c(0, 0.1, 0.3, 0.8)),
breaks=seq(0, 1.0, 0.2)) +
guides(color=guide_colorbar(direction="horizontal", title.position="top",
title="Mitochondrial\nfraction",
barwidth=unit(1.2, units="in")))
cowplot::plot_grid(gg_left, gg_right)
ggplot(umi_by_species) +
geom_point(aes(x=Total, y=pmin(Human, Mouse) / Total, color=Organism), size=0.1,
alpha=0.1) +
scale_x_log10(name='#Real UMIs', limits=c(10, 2e5)) + annotation_logticks() +
ylab('Fraction of mixed UMIs') +
guides(color=guide_legend(override.aes=list(size=1.5, alpha=1))) +
theme_pdf(legend.pos=c(1, 1))
Background cells have constant fraction of mouse and human reads:
mouse_frac <- umi_by_species %>% filter(IsReal) %>%
summarise(Mouse=sum(Mouse[Organism == 'Mouse']), Human=sum(Human[Organism == 'Human']),
MF=Mouse / (Mouse + Human)) %>% .$MF
ggplot(umi_by_species) +
geom_histogram(aes(x=Mouse / Total, y=..density.., fill=IsReal), binwidth=0.005, position="identity") +
geom_vline(xintercept=mouse_frac) +
xlab("Fraction of mouse reads") +
theme_pdf(legend.pos=c(1, 1))
Distribution of total number of molecules by background cells:
gg <- ggplot(umi_by_species %>% filter(!IsReal)) +
geom_histogram(aes(x=Total), bins=100) +
scale_x_continuous(limits=c(0, 250), expand=c(0, 0), name="Total #UMIs") +
scale_y_continuous(limits=c(0,9000), expand=c(0, 0), name="#Cells") +
theme_pdf()
gg
data.frame(value=unlist(sessioninfo::platform_info()))
value | |
---|---|
version | R version 3.4.1 (2017-06-30) |
os | Ubuntu 14.04.5 LTS |
system | x86_64, linux-gnu |
ui | X11 |
language | (EN) |
collate | en_US.UTF-8 |
tz | America/New_York |
date | 2018-01-27 |
as.data.frame(sessioninfo::package_info())[c('package', 'loadedversion', 'date', 'source')]
package | loadedversion | date | source | |
---|---|---|---|---|
1 | assertthat | 0.2.0 | 2017-04-11 | CRAN (R 3.4.1) |
2 | backports | 1.1.2 | 2017-12-13 | CRAN (R 3.4.1) |
4 | bindr | 0.1 | 2016-11-13 | CRAN (R 3.4.1) |
5 | bindrcpp | 0.2 | 2017-06-17 | CRAN (R 3.4.1) |
6 | clisymbols | 1.2.0 | 2017-05-21 | CRAN (R 3.4.1) |
7 | colorspace | 1.3-2 | 2016-12-14 | CRAN (R 3.4.1) |
9 | cowplot | 0.9.2 | 2017-12-17 | CRAN (R 3.4.1) |
11 | digest | 0.6.14 | 2018-01-14 | cran (@0.6.14) |
12 | dplyr | 0.7.4 | 2017-09-28 | CRAN (R 3.4.1) |
13 | dropEstAnalysis | 0.6.0 | 2018-01-27 | local (VPetukhov/dropEstAnalysis@NA) |
14 | dropestr | 0.6.0 | 2018-01-24 | local (@0.6.0) |
15 | evaluate | 0.10.1 | 2017-06-24 | CRAN (R 3.4.1) |
16 | ggplot2 | 2.2.1 | 2016-12-30 | CRAN (R 3.4.1) |
17 | ggrastr | 0.1.5 | 2017-12-28 | Github (VPetukhov/ggrastr@cc56b45) |
18 | git2r | 0.21.0 | 2018-01-04 | cran (@0.21.0) |
19 | glue | 1.2.0 | 2017-10-29 | CRAN (R 3.4.1) |
23 | gtable | 0.2.0 | 2016-02-26 | CRAN (R 3.4.1) |
24 | highr | 0.6 | 2016-05-09 | CRAN (R 3.4.1) |
25 | htmltools | 0.3.6 | 2017-04-28 | CRAN (R 3.4.1) |
26 | knitr | 1.18 | 2017-12-27 | cran (@1.18) |
27 | labeling | 0.3 | 2014-08-23 | CRAN (R 3.4.1) |
28 | lattice | 0.20-35 | 2017-03-25 | CRAN (R 3.4.1) |
29 | lazyeval | 0.2.1 | 2017-10-29 | CRAN (R 3.4.1) |
30 | magrittr | 1.5 | 2014-11-22 | CRAN (R 3.4.1) |
31 | Matrix | 1.2-12 | 2017-11-16 | CRAN (R 3.4.1) |
33 | munsell | 0.4.3 | 2016-02-13 | CRAN (R 3.4.1) |
34 | pkgconfig | 2.0.1 | 2017-03-21 | CRAN (R 3.4.1) |
35 | plyr | 1.8.4 | 2016-06-08 | CRAN (R 3.4.1) |
36 | R6 | 2.2.2 | 2017-06-17 | CRAN (R 3.4.1) |
37 | Rcpp | 0.12.15 | 2018-01-20 | cran (@0.12.15) |
38 | rlang | 0.1.4 | 2017-11-05 | CRAN (R 3.4.1) |
39 | rmarkdown | 1.8 | 2017-11-17 | CRAN (R 3.4.1) |
40 | rprojroot | 1.3-2 | 2018-01-03 | cran (@1.3-2) |
41 | scales | 0.5.0 | 2017-08-24 | CRAN (R 3.4.1) |
42 | sessioninfo | 1.0.0 | 2017-06-21 | CRAN (R 3.4.1) |
44 | stringi | 1.1.6 | 2017-11-17 | CRAN (R 3.4.1) |
45 | stringr | 1.2.0 | 2017-02-18 | CRAN (R 3.4.1) |
46 | tibble | 1.3.4 | 2017-08-22 | CRAN (R 3.4.1) |
49 | withr | 2.1.1 | 2017-12-19 | cran (@2.1.1) |
50 | yaml | 2.1.16 | 2017-12-12 | CRAN (R 3.4.1) |
This R Markdown site was created with workflowr