Source file: notebooks/cell_barcode_correction/merge_targets_mixture.Rmd
Last updated: 2018-05-31
Code version: 35d0d5a
library(ggplot2)
library(ggrastr)
library(dplyr)
library(parallel)
library(dropestr)
library(dropEstAnalysis)
library(Matrix)
theme_set(theme_base)
kDataPath <- '../../data/dropest/'
kTablesPath <- '../../output/tables/'
k10xSubfolders <- c(poisson='est_01_14_precise/', real='est_01_14_barcodes/',
unmerged='est_01_14_unmerged/', merge_all='est_01_16_merge_all/')
kDropSeqSubolders <- c(poisson='est_01_16_precise/', unmerged='est_01_16_unmerged/',
merge_all='est_01_16_merge_all/')
kDataFiles <- list(
`10x`=paste0(kDataPath, '10x/hgmm_6k/', k10xSubfolders, "hgmm6k.rds") %>%
setNames(names(k10xSubfolders)),
drop_seq=paste0(kDataPath, 'dropseq/thousand/', kDropSeqSubolders, "thousand.rds") %>%
setNames(names(kDropSeqSubolders))
)
holders <- mclapply(kDataFiles, function(paths) mclapply(paths, readRDS, mc.cores=4),
mc.cores=2)
validation_data <- mclapply(holders, function(hs) list(
merge_targets = lapply(hs, function(holder) unlist(holder$merge_targets)),
cms_raw = lapply(hs, `[[`, 'cm_raw'),
cms = lapply(hs, `[[`, 'cm')
), mc.cores=8)
validation_data$`10x`$cms_raw <- lapply(validation_data$`10x`$cms_raw,
function(cm) cm[grep("^[^;]+$", rownames(cm)),])
validation_data$`10x`$cms <- lapply(validation_data$`10x`$cms,
function(cm) cm[grep("^[^;]+$", rownames(cm)),])
rm(holders)
invisible(gc())
# saveRDS(validation_data, paste0(kDataPath, 'human_mouse_mixture_validation_data.rds'))
# validation_data <- readRDS(paste0(kDataPath, 'human_mouse_mixture_validation_data.rds'))
umis_per_cb <- Matrix::colSums(validation_data$`10x`$cms$unmerged) %>% sort(decreasing=T)
real_cbs <- names(umis_per_cb)[1:6000]
PlotCellsNumberLine(umis_per_cb[1:10000])
GeneSpeciesFromMixture <- function(cm, org1.marker, org1.name, org2.name) {
res <- ifelse(substr(rownames(cm), 1, nchar(org1.marker)) == org1.marker, org1.name, org2.name)
return(as.factor(res))
}
CellSpeciesFromMixture <- function(gene.species, cm) {
res <- levels(gene.species) %>%
lapply(function(l) cm[gene.species == l,] %>% Matrix::colSums())
res <- levels(gene.species)[as.integer(res[[1]] < res[[2]]) + 1] %>%
setNames(colnames(cm)) %>% as.factor()
return(res)
}
gene_species <- GeneSpeciesFromMixture(validation_data$`10x`$cms_raw$unmerged,
'hg', 'Human', 'Mouse')
cell_species <- CellSpeciesFromMixture(gene_species,
validation_data$`10x`$cms_raw$unmerged)
table(cell_species[real_cbs])
Human Mouse
3260 2740
table(cell_species) / sum(table(cell_species))
cell_species
Human Mouse
0.7844979 0.2155021
merge_targets <- lapply(validation_data$`10x`$merge_targets,
function(mt) mt[mt %in% real_cbs])
comparison_10x <- MergeComparisonSummary(merge_targets, cell_species, dataset="10x hgmm6k")
comparison_10x$`Merge type` <- c('Poisson', 'Known barcodes', 'Simple')
comparison_10x
Dataset | Merge type | #Merges | Fraction of mixed merges | Similarity to merge with barcodes |
---|---|---|---|---|
10x hgmm6k | Poisson | 8999 | 0.58% | 99.74% |
10x hgmm6k | Known barcodes | 8985 | 0.62% | 100% |
10x hgmm6k | Simple | 21827 | 32.96% | 20.67% |
umis_per_cb <- Matrix::colSums(validation_data$drop_seq$cms$unmerged) %>%
sort(decreasing=T)
real_cbs <- names(umis_per_cb)[1:1000]
PlotCellsNumberLine(umis_per_cb[1:5000])
gene_species <- GeneSpeciesFromMixture(validation_data$drop_seq$cms_raw$unmerged,
'HUMAN', 'Human', 'Mouse')
cell_species <- CellSpeciesFromMixture(gene_species,
validation_data$drop_seq$cms_raw$unmerged)
table(cell_species[real_cbs])
Human Mouse
586 414
table(cell_species)
cell_species
Human Mouse
52885 9670
merge_targets <- lapply(validation_data$drop_seq$merge_targets,
function(mt) mt[mt %in% real_cbs])
comparison_drop_seq <- MergeComparisonSummary(merge_targets, cell_species,
dataset='Drop-seq thousand')
comparison_drop_seq$`Merge type` <- c('Poisson', 'Simple')
comparison_drop_seq
Dataset | Merge type | #Merges | Fraction of mixed merges | Similarity to merge with barcodes |
---|---|---|---|---|
Drop-seq thousand | Poisson | 15186 | 0.83% | NA% |
Drop-seq thousand | Simple | 26154 | 8.74% | NA% |
complete_table <- rbind(comparison_10x, comparison_drop_seq)
write.csv(complete_table, paste0(kTablesPath, 'merge_comparison.csv'), row.names=F)
value | |
---|---|
version | R version 3.4.1 (2017-06-30) |
os | Ubuntu 14.04.5 LTS |
system | x86_64, linux-gnu |
ui | X11 |
language | (EN) |
collate | en_US.UTF-8 |
tz | America/New_York |
date | 2018-05-31 |
package | loadedversion | date | source | |
---|---|---|---|---|
1 | assertthat | 0.2.0 | 2017-04-11 | CRAN (R 3.4.1) |
2 | backports | 1.1.2 | 2017-12-13 | CRAN (R 3.4.1) |
4 | bindr | 0.1 | 2016-11-13 | CRAN (R 3.4.1) |
5 | bindrcpp | 0.2 | 2017-06-17 | CRAN (R 3.4.1) |
6 | clisymbols | 1.2.0 | 2017-05-21 | CRAN (R 3.4.1) |
7 | colorspace | 1.3-2 | 2016-12-14 | CRAN (R 3.4.1) |
10 | digest | 0.6.15 | 2018-01-28 | cran (@0.6.15) |
11 | dplyr | 0.7.4 | 2017-09-28 | CRAN (R 3.4.1) |
12 | dropEstAnalysis | 0.6.0 | 2018-05-16 | local (VPetukhov/dropEstAnalysis@NA) |
13 | dropestr | 0.7.7 | 2018-03-17 | local (@0.7.7) |
14 | evaluate | 0.10.1 | 2017-06-24 | CRAN (R 3.4.1) |
15 | ggplot2 | 2.2.1 | 2016-12-30 | CRAN (R 3.4.1) |
16 | ggrastr | 0.1.5 | 2017-12-28 | Github (VPetukhov/ggrastr@cc56b45) |
17 | git2r | 0.21.0 | 2018-01-04 | cran (@0.21.0) |
18 | glue | 1.2.0 | 2017-10-29 | CRAN (R 3.4.1) |
22 | gtable | 0.2.0 | 2016-02-26 | CRAN (R 3.4.1) |
23 | highr | 0.6 | 2016-05-09 | CRAN (R 3.4.1) |
24 | htmltools | 0.3.6 | 2017-04-28 | CRAN (R 3.4.1) |
25 | knitr | 1.20 | 2018-02-20 | cran (@1.20) |
26 | labeling | 0.3 | 2014-08-23 | CRAN (R 3.4.1) |
27 | lattice | 0.20-35 | 2017-03-25 | CRAN (R 3.4.1) |
28 | lazyeval | 0.2.1 | 2017-10-29 | CRAN (R 3.4.1) |
29 | magrittr | 1.5 | 2014-11-22 | CRAN (R 3.4.1) |
30 | Matrix | 1.2-12 | 2017-11-16 | CRAN (R 3.4.1) |
32 | munsell | 0.4.3 | 2016-02-13 | CRAN (R 3.4.1) |
34 | pkgconfig | 2.0.1 | 2017-03-21 | CRAN (R 3.4.1) |
35 | plyr | 1.8.4 | 2016-06-08 | CRAN (R 3.4.1) |
36 | R6 | 2.2.2 | 2017-06-17 | CRAN (R 3.4.1) |
37 | Rcpp | 0.12.17 | 2018-05-18 | cran (@0.12.17) |
38 | rlang | 0.1.4 | 2017-11-05 | CRAN (R 3.4.1) |
39 | rmarkdown | 1.9 | 2018-03-01 | CRAN (R 3.4.1) |
40 | rprojroot | 1.3-2 | 2018-01-03 | cran (@1.3-2) |
41 | scales | 0.5.0 | 2017-08-24 | CRAN (R 3.4.1) |
42 | sessioninfo | 1.0.0 | 2017-06-21 | CRAN (R 3.4.1) |
44 | stringi | 1.1.7 | 2018-03-12 | cran (@1.1.7) |
45 | stringr | 1.3.0 | 2018-02-19 | cran (@1.3.0) |
46 | tibble | 1.3.4 | 2017-08-22 | CRAN (R 3.4.1) |
49 | withr | 2.1.2 | 2018-03-15 | cran (@2.1.2) |
50 | yaml | 2.1.18 | 2018-03-08 | cran (@2.1.18) |
This R Markdown site was created with workflowr