Benchmark read alignments to transposable elements

Getting started

We start the workflow by loading Repsc and the human hg38 BSgenome object into our R environment:

devtools::load_all('/net/mraid14/export/tgdata/users/davidbr/src/Reputils/')
devtools::load_all('/net/mraid14/export/tgdata/users/davidbr/src/Repsc/')

library(BSgenome.Hsapiens.UCSC.hg38)

Introduce sequence errors

Import reads

bams  <- grep('error', dir('~/davidbr/proj/epitherapy/data/h1299/10x/dacsb/aligned/', pattern = 'bam$', full.names = TRUE), value = TRUE)

reads <- importBAM(bams,
                  paired  = TRUE, 
                  mate    = 'first',
                  anchor  = 'fiveprime',
                  multi   = FALSE,
                  what    = 'qname',
                  barcode = c('0.25', '0.5', '0', '1', '2', '4', '8'))

# add overlapping TE locus
reads_anno <- plyranges::join_overlap_left_directed(reads, tes)  

Benchmarking

Percent correctly mapped

reads_dt <- reads_anno %>% as.data.table()

p_n_reads <- reads_dt %>% count(barcode) %>% ggplot(aes(barcode, n)) + geom_bar(stat = ‘identity’)

fam_counts <- reads_dt[which(barcode == 0), ] %>% count(name)

bla = reads_dt[, same := abs(start - start[barcode == 0]) < 10, by = ‘qname’ ][, .(perc_cor_map = sum(same, na.rm = TRUE) / length(same) * 100, repclass = repclass[barcode == 0]), by = c(‘barcode’, ‘name’)] bla %>% filter(name %in% (fam_counts %>% filter(n > 100) %>% pull(name))) %>% ggplot(aes(x = as.numeric(barcode), y = perc_cor_map, group = name, col = repclass, alpha = 0.25)) + geom_point() + geom_line() + facet_wrap(~repclass) + theme(legend.position = ‘none’) + scale_color_brewer(palette = ‘Set1’) ```