• 1 DNA methyltransferases 3A and 3B target specific sequences during mouse gastrulation
    • 1.1 Run the notebooks
    • 1.2 Download the UMI matrices
    • 1.3 Notebook order
  • 2 Methylation trends in EB days 0-6
    • 2.0.1 initialize definitions
    • 2.0.2 Load data
    • 2.0.3 Cluster
    • 2.0.4 Project DNMT1 data
    • 2.1 Plot global EB trends
      • 2.1.1 Figure 4A
      • 2.1.2 Extended Data Figure 8K
    • 2.2 cluster methylation in days 5 and 6 with DKO
      • 2.2.1 Project DKO on day 1-4 clusters
      • 2.2.2 Figure 4C
      • 2.2.3 Figure 4B
  • 3 A/B sequence model
    • 3.0.1 initialize definitions
    • 3.0.2 Get A/B meth data
    • 3.1 Calculate models
      • 3.1.1 Compute models
    • 3.2 Plot models vs preditions
      • 3.2.1 Figure 4F
      • 3.2.2 Plot GLM model parameters
      • 3.2.3 Figure 4D
    • 3.3 plot logo
      • 3.3.1 Figure 4E
    • 3.4 Test models with closest CpG methylation
      • 3.4.1 Compute model with CG and GC content
      • 3.4.2 Compute model with TOR
      • 3.4.3 Compute model with closest CpG
      • 3.4.4 Compute model with all enhancer methylation
      • 3.4.5 Figure 7A
    • 3.5 Estimate prediction noise
      • 3.5.1 Kinteics per time and score bin
      • 3.5.2 Figure 4G
      • 3.5.3 Scores within enhancers
      • 3.5.4 Figure 7B
      • 3.5.5 Extract proA/proB enhancers
      • 3.5.6 No difference in methylation of full enhancers vs shuffled
      • 3.5.7 Toatal enh methylation prediction
      • 3.5.8 Figure 7C
  • 4 Validation from external data
    • 4.0.1 initialize definitions
    • 4.0.2 Get A/B meth data
    • 4.0.3 Get Meissner data
    • 4.0.4 Figure 4H
    • 4.0.5 Figure 4I
  • 5 Mallona et al. NAR 2020
    • 5.0.1 initialize definitions
    • 5.1 Model ES
      • 5.1.1 Figure 5B,C
      • 5.1.2 Figure 5D
      • 5.1.3 Figure 5E
      • 5.1.4 Figure 5F,G
      • 5.1.5 Figure 5H
      • 5.1.6 Figure 5I
      • 5.1.7 Figure 5J
  • 6 Yagi et al. 
    • 6.0.1 initialize definitions
    • 6.0.2 DMRs
    • 6.0.3 Get Meissner data
    • 6.0.4 Get MEEB data
    • 6.0.5 Extended Data Figure 8I
  • 7 Single cell methylation
    • 7.0.1 initialize definitions
    • 7.1 Generate cell-cycle ordering
      • 7.1.1 In-vivo
      • 7.1.2 EB
    • 7.2 Plot in-vivo cell-cycle
      • 7.2.1 Figure 6B
      • 7.2.2 Figure 6A
      • 7.2.3 Figure 6C
      • 7.2.4 Extended Data Figure 9G
    • 7.3 Plot EBs cell cycle
      • 7.3.1 Figure 6D,E
      • 7.3.2 Figure 6F,G, Extended Data Figure 9I
      • 7.3.3 Figure 6D
    • 7.4 Plot other EB batches
      • 7.4.1 Extended Data Figure 9H
  • 8 Differential methylation in EB day 6
    • 8.1 initialize definitions
    • 8.2 Extract data
    • 8.3 Plot global differences
      • 8.3.1 Figure 7D
    • 8.4 Extract DNMT3A targets
    • 8.5 Plot distribution of AB score on the differential regions
      • 8.5.1 Figure 7E
    • 8.6 Plot examples
      • 8.6.1 Figure 7G, Extended Data Figure 10
    • 8.7 Test regions enrichment vs gene expression
      • 8.7.1 Figure 7F
  • 9 QC - bulk methylation
    • 9.0.1 initialize definitions
    • 9.1 Probe design
    • 9.2 Breakdown of probes
      • 9.2.1 Extended Data Figure 8B
    • 9.3 WGBS E8.5
      • 9.3.1 Coverage
      • 9.3.2 Extended Data Figure 8A
      • 9.3.3 Methylation distribution on-target vs off-target
      • 9.3.4 Extended Data Figure 8C
    • 9.4 Distribution of coverage per condition
      • 9.4.1 Extended Data Figure 8D
    • 9.5 Distribution of non-CpGs methylation per condition
      • 9.5.1 Extended Data Figure 8H
    • 9.6 “On target” statistics
      • 9.6.1 Extended Data Figure 8E
    • 9.7 Global methylation trend
      • 9.7.1 Extended Data Figure 8G
  • 10 QC - sc methylation
    • 10.0.1 initialize definitions
    • 10.1 Distribution of %CHH
      • 10.1.1 Extended Data Figure 9B
    • 10.2 Number of cells
      • 10.2.1 Extended Data Figure 9A
    • 10.3 Number of CpGs
      • 10.3.1 Extended Data Figure 9C
    • 10.4 Sorting plots
      • 10.4.1 Extended Data Figure 9D
      • 10.4.2 Extended Data Figure 9E
    • 10.5 Early/Late coverage per cell (e7.5)
      • 10.5.1 Extended Data Figure 9F

DNA methyltransferases 3A and 3B target specific sequences during mouse gastrulation

2 Methylation trends in EB days 0-6

2.0.1 initialize definitions

suppressMessages(suppressWarnings(source(here::here("code/init.R"))))

2.0.2 Load data

Load the data of methylation day 0-4:

cpg_meth <- calc_eb_day0_to_day4_cpg_meth(min_cov = 10, max_na  = 5)
m <- intervs_to_mat(cpg_meth)
dim(m)
## [1] 132052     24
colnames(m)
##  [1] "d0_3a"   "d0S_3a"  "d1_3a"   "d2_3a"   "d3_3a"   "d4_3a"   "d0_3b"  
##  [8] "d0S_3b"  "d1_3b"   "d2_3b"   "d3_3b"   "d4_3b"   "d0_tko"  "d0S_tko"
## [15] "d1_tko"  "d2_tko"  "d3_tko"  "d4_tko"  "d0_wt"   "d0S_wt"  "d1_wt"  
## [22] "d2_wt"   "d3_wt"   "d4_wt"

2.0.3 Cluster

colnames(m)
##  [1] "d0_3a"   "d0S_3a"  "d1_3a"   "d2_3a"   "d3_3a"   "d4_3a"   "d0_3b"  
##  [8] "d0S_3b"  "d1_3b"   "d2_3b"   "d3_3b"   "d4_3b"   "d0_tko"  "d0S_tko"
## [15] "d1_tko"  "d2_tko"  "d3_tko"  "d4_tko"  "d0_wt"   "d0S_wt"  "d1_wt"  
## [22] "d2_wt"   "d3_wt"   "d4_wt"
m_column_order <- c(
    "d0_3a", "d1_3a", "d2_3a", "d3_3a", "d4_3a",
    "d0_3b", "d1_3b", "d2_3b", "d3_3b", "d4_3b",
    "d0_wt", "d1_wt", "d2_wt", "d3_wt", "d4_wt",    
    "d0_tko", "d1_tko", "d2_tko", "d3_tko", "d4_tko",
    "d0S_3a", "d0S_3b", "d0S_wt", "d0S_tko"
)
m <- m[, m_column_order]
colnames(m)
##  [1] "d0_3a"   "d1_3a"   "d2_3a"   "d3_3a"   "d4_3a"   "d0_3b"   "d1_3b"  
##  [8] "d2_3b"   "d3_3b"   "d4_3b"   "d0_wt"   "d1_wt"   "d2_wt"   "d3_wt"  
## [15] "d4_wt"   "d0_tko"  "d1_tko"  "d2_tko"  "d3_tko"  "d4_tko"  "d0S_3a" 
## [22] "d0S_3b"  "d0S_wt"  "d0S_tko"
K <- 60
km <- tglkmeans::TGL_kmeans(m, K, id_column=FALSE, seed=19) %cache_rds% here("output/meth_capt_clust.rds")
enframe(km$size[c(1, 9, 19, 24, 30, 32, 40, 44, 53, 60)])
## # A tibble: 10 x 2
##   name value
## 1    1  1179
## 2    9  5459
## 3   19  2023
## 4   24  1573
## 5   30  1295
## 6   32  2810
## # ... with 4 more rows
km_m_3a <- rowSums(km$centers[,c("d1_3a","d2_3a","d3_3a","d4_3a")])
km_m_3b <- rowSums(km$centers[,c("d1_3b","d2_3b","d3_3b","d4_3b")])
km_m_wt <- rowSums(km$centers[,c("d1_wt","d2_wt","d3_wt","d4_wt")])
cent_dlt <- km$centers[,c("d1_3a","d2_3a","d3_3a","d4_3a")]-
km$centers[,c("d1_3b","d2_3b","d3_3b","d4_3b")]
dlt24 <- cent_dlt[,2]-cent_dlt[,4]

clst_mod <- rep(0, K)
clst_mod[(km_m_3a-km_m_3b) < -0.3 & dlt24 > -0.05] <- 3
clst_mod[(km_m_3a-km_m_3b) < -0.2 & dlt24 < -0.05] <- 2
clst_mod[(km_m_3a-km_m_3b) > 0.2] <- 1
smooth_n = floor(nrow(m)/1000)+1
km_mean_e = rowSums(km$centers)
tot_cent_dlt = rowSums(cent_dlt)

k_ord = order(ifelse(clst_mod==0, 
                    km_mean_e, 
                    abs(tot_cent_dlt)) + clst_mod*20)
clst_map <- 1:K
names(clst_map) <- as.character(k_ord)
clst <- km$cluster
clst_sorted <- clst_map[as.character(clst)]
km$cluster <- clst_sorted
km$centers <- km$centers[k_ord,]
rownames(km$centers) <- 1:K
m_ord <- order(km$cluster)
m_s5 = apply(m[m_ord,-1],2, zoo::rollmean, smooth_n, fill=c('extend',NA,'extend'), na.rm=T)

colnames(m_s5) = colnames(m)[-1]
dim(m_s5)
## [1] 132052     23
options(repr.plot.width = 8, repr.plot.height = 20)
plot_clust <- function() {
    shades <- colorRampPalette(c("white", "#635547", "#DABE99","#C594BF","#B51D8D","darkred", "yellow"))(1000)
    image(t(as.matrix(m_s5)),col=shades, xaxt='n', yaxt='n'); 
    N = length(m_ord)

    m_y = tapply((1:N)/N, km$cluster[m_ord], mean)
    mtext(1:K, at = m_y, las=2, side= 2)
    mtext(1:K, at = m_y, las=2, side= 4)
    abline(h=tapply((1:N)/N, km$cluster[m_ord], max))
}
plot_clust()

options(repr.plot.width = 3, repr.plot.height = 10)
plot_delta <- function(){
    delta = m_s5[,c("d1_3a","d2_3a","d3_3a","d4_3a")]-
            m_s5[,c("d1_3b","d2_3b","d3_3b","d4_3b")]

    dshades <- colorRampPalette(c("red","darkred","white","darkblue","blue"))(1000)
    image(pmin(pmax(t(as.matrix(delta)),-0.3),0.3),col=dshades,zlim=c(-0.3,0.3))
}
plot_delta()

2.0.4 Project DNMT1 data

clust_intervs <- m %>% mat_to_intervs() %>% select(chrom:end)
all_clust_data <- calc_eb_cpg_meth(from = 0, to = 5, min_cov = 10, max_na = NULL, intervals = clust_intervs, iterator = clust_intervs, cache_fn = here("output/all_clust_data.tsv"), rm_meth_cov = TRUE)
colnames(all_clust_data)
##  [1] "chrom"     "start"     "end"       "d0_d13ako" "d3_d13ako" "d4_d13ako"
##  [7] "d0_d13bko" "d3_d13bko" "d4_d13bko" "d5_dko"    "d0_ko1"    "d0S_ko1"  
## [13] "d1_ko1"    "d2_ko1"    "d3_ko1"    "d4_ko1"    "d5_ko1"    "d0_3a"    
## [19] "d0S_3a"    "d1_3a"     "d2_3a"     "d3_3a"     "d4_3a"     "d5_3a"    
## [25] "d0_3b"     "d0S_3b"    "d1_3b"     "d2_3b"     "d3_3b"     "d4_3b"    
## [31] "d5_3b"     "d0_tko"    "d0S_tko"   "d1_tko"    "d2_tko"    "d3_tko"   
## [37] "d4_tko"    "d0_wt"     "d0S_wt"    "d1_wt"     "d2_wt"     "d3_wt"    
## [43] "d4_wt"     "d5_wt"
m_column_order <- c(
    "d0_3a", "d1_3a", "d2_3a", "d3_3a", "d4_3a", 
    "d0_3b", "d1_3b", "d2_3b", "d3_3b", "d4_3b", 
    "d0_wt", "d1_wt", "d2_wt", "d3_wt", "d4_wt", 
    "d0_tko", "d1_tko", "d2_tko", "d3_tko", "d4_tko",
    "d0S_3a", "d0S_3b", "d0S_wt", "d0S_tko", "d0S_ko1",
    "d0_ko1", "d1_ko1", "d2_ko1", "d3_ko1", "d4_ko1", 
    "d0_d13ako", "d3_d13ako", "d4_d13ako",
    "d0_d13bko", "d3_d13bko", "d4_d13bko"
)
setdiff(colnames(all_clust_data)[-1:-3], m_column_order)
## [1] "d5_dko" "d5_ko1" "d5_3a"  "d5_3b"  "d5_wt"
m_all <- intervs_to_mat(all_clust_data)[, m_column_order]
colnames(m_all)
##  [1] "d0_3a"     "d1_3a"     "d2_3a"     "d3_3a"     "d4_3a"     "d0_3b"    
##  [7] "d1_3b"     "d2_3b"     "d3_3b"     "d4_3b"     "d0_wt"     "d1_wt"    
## [13] "d2_wt"     "d3_wt"     "d4_wt"     "d0_tko"    "d1_tko"    "d2_tko"   
## [19] "d3_tko"    "d4_tko"    "d0S_3a"    "d0S_3b"    "d0S_wt"    "d0S_tko"  
## [25] "d0S_ko1"   "d0_ko1"    "d1_ko1"    "d2_ko1"    "d3_ko1"    "d4_ko1"   
## [31] "d0_d13ako" "d3_d13ako" "d4_d13ako" "d0_d13bko" "d3_d13bko" "d4_d13bko"
m_all_s5 = apply(m_all[m_ord, ],2, zoo::rollmean, smooth_n, fill=c('extend',NA,'extend'), na.rm=T)
head(m_all_s5)
##           d0_3a      d1_3a       d2_3a      d3_3a      d4_3a       d0_3b
## [1,] 0.01001037 0.01079793 0.004898041 0.01102023 0.01277668 0.005717633
## [2,] 0.01001037 0.01079793 0.004898041 0.01102023 0.01277668 0.005717633
## [3,] 0.01001037 0.01079793 0.004898041 0.01102023 0.01277668 0.005717633
## [4,] 0.01001037 0.01079793 0.004898041 0.01102023 0.01277668 0.005717633
## [5,] 0.01001037 0.01079793 0.004898041 0.01102023 0.01277668 0.005717633
## [6,] 0.01001037 0.01079793 0.004898041 0.01102023 0.01277668 0.005717633
##            d1_3b       d2_3b      d3_3b      d4_3b       d0_wt       d1_wt
## [1,] 0.007146658 0.008921023 0.01298582 0.01649393 0.006180905 0.006556652
## [2,] 0.007146658 0.008921023 0.01298582 0.01649393 0.006180905 0.006556652
## [3,] 0.007146658 0.008921023 0.01298582 0.01649393 0.006180905 0.006556652
## [4,] 0.007146658 0.008921023 0.01298582 0.01649393 0.006180905 0.006556652
## [5,] 0.007146658 0.008921023 0.01298582 0.01649393 0.006180905 0.006556652
## [6,] 0.007146658 0.008921023 0.01298582 0.01649393 0.006180905 0.006556652
##           d2_wt      d3_wt      d4_wt      d0_tko      d1_tko      d2_tko
## [1,] 0.01132803 0.01854894 0.02603882 0.005965149 0.004823037 0.004019872
## [2,] 0.01132803 0.01854894 0.02603882 0.005965149 0.004823037 0.004019872
## [3,] 0.01132803 0.01854894 0.02603882 0.005965149 0.004823037 0.004019872
## [4,] 0.01132803 0.01854894 0.02603882 0.005965149 0.004823037 0.004019872
## [5,] 0.01132803 0.01854894 0.02603882 0.005965149 0.004823037 0.004019872
## [6,] 0.01132803 0.01854894 0.02603882 0.005965149 0.004823037 0.004019872
##           d3_tko      d4_tko     d0S_3a     d0S_3b     d0S_wt     d0S_tko
## [1,] 0.005696785 0.006240729 0.01627394 0.01726621 0.02124627 0.007268556
## [2,] 0.005696785 0.006240729 0.01627394 0.01726621 0.02124627 0.007268556
## [3,] 0.005696785 0.006240729 0.01627394 0.01726621 0.02124627 0.007268556
## [4,] 0.005696785 0.006240729 0.01627394 0.01726621 0.02124627 0.007268556
## [5,] 0.005696785 0.006240729 0.01627394 0.01726621 0.02124627 0.007268556
## [6,] 0.005696785 0.006240729 0.01627394 0.01726621 0.02124627 0.007268556
##         d0S_ko1      d0_ko1      d1_ko1      d2_ko1     d3_ko1      d4_ko1
## [1,] 0.01001541 0.004047823 0.006723765 0.007574284 0.01410948 0.007942855
## [2,] 0.01001541 0.004047823 0.006723765 0.007574284 0.01410948 0.007942855
## [3,] 0.01001541 0.004047823 0.006723765 0.007574284 0.01410948 0.007942855
## [4,] 0.01001541 0.004047823 0.006723765 0.007574284 0.01410948 0.007942855
## [5,] 0.01001541 0.004047823 0.006723765 0.007574284 0.01410948 0.007942855
## [6,] 0.01001541 0.004047823 0.006723765 0.007574284 0.01410948 0.007942855
##        d0_d13ako d3_d13ako  d4_d13ako   d0_d13bko  d3_d13bko   d4_d13bko
## [1,] 0.008525676 0.0166642 0.01150588 0.006178614 0.01029716 0.005339577
## [2,] 0.008525676 0.0166642 0.01150588 0.006178614 0.01029716 0.005339577
## [3,] 0.008525676 0.0166642 0.01150588 0.006178614 0.01029716 0.005339577
## [4,] 0.008525676 0.0166642 0.01150588 0.006178614 0.01029716 0.005339577
## [5,] 0.008525676 0.0166642 0.01150588 0.006178614 0.01029716 0.005339577
## [6,] 0.008525676 0.0166642 0.01150588 0.006178614 0.01029716 0.005339577
days <- strsplit(colnames(m_all_s5), split = "_") %>% map_chr(~ .x[1]) %>% gsub("d", "", .) 
days[grep("0S", days)] <- c("A", "B", "WT", "TKO", "KO1")
options(repr.plot.width = 20, repr.plot.height = 20)
shades <- colorRampPalette(c("white", "#635547", "#DABE99","#C594BF","#B51D8D","darkred", "yellow"))(1000)
image(t(as.matrix(m_all_s5)),col=shades, xaxt='n', yaxt='n'); 
N = length(m_ord)

m_y = tapply((1:N)/N, km$cluster[m_ord], mean)
mtext(1:K, at = m_y, las=2, side= 2)
mtext(1:K, at = m_y, las=2, side= 4)
axis(3, at = seq(0, 1, length = ncol(m_all_s5)), labels = days)
abline(h=tapply((1:N)/N, km$cluster[m_ord], max))

options(repr.plot.width = 3, repr.plot.height = 20)
delta = m_s5[,c("d1_3a","d2_3a","d3_3a","d4_3a")]-
        m_s5[,c("d1_3b","d2_3b","d3_3b","d4_3b")]

dshades <- colorRampPalette(c("red","darkred","white","darkblue","blue"))(1000)
image(pmin(pmax(t(as.matrix(delta)),-0.3),0.3),col=dshades,zlim=c(-0.3,0.3))

2.0.4.1 Trajectory per cluster

chosen_clusters <- c(1, 9, 19, 24, 30, 32, 40, 44, 53, 60)
line_colors <- c("wt" = "darkblue", "3a" = "purple", "3b" = "orange", "tko" = "darkgray", "ko1" = "darkgreen", "d13ako" = "turquoise4", "d13bko" = "orangered4")
df <- all_clust_data %>%         
    mutate(clust = km$cluster) %>% 
    group_by(clust) %>% 
    summarise_at(vars(starts_with("d")), mean, na.rm=TRUE) %>% 
    gather("samp", "meth", -clust) %>% 
    separate(samp, c("day", "line"), sep="_") %>% 
    filter(day != "d0S", day != "d5") %>% 
    mutate(day = gsub("d", "", day)) %>% 
    mutate(line = factor(line, levels = names(line_colors)))
options(repr.plot.width = 5, repr.plot.height = 15)
df %>% 
    filter(clust %in% chosen_clusters) %>% 
    mutate(clust = factor(clust), clust = forcats::fct_rev(clust)) %>% 
    ggplot(aes(x=day, y=meth, color=line, group=line)) + 
        geom_line() + 
        geom_point(size=0.5) + 
        xlab("EB day") + 
        ylab("Meth.") + 
        theme(aspect.ratio = 1) + 
        facet_grid(clust~.) + 
        scale_color_manual(name = "", values = line_colors) + 
        scale_y_continuous(limits=c(0,1))

2.1 Plot global EB trends

2.1.1 Figure 4A

cpg_meth1 <- gextract.left_join("seq.CG_500_mean", intervals=cpg_meth, iterator=cpg_meth, colnames="cg_cont") %>% select(-(chrom1:end1)) %>% select(chrom, start, end, cg_cont, everything()) %>% as_tibble()
line_colors <- c("wt" = "darkblue", "3a" = "purple", "3b" = "orange", "tko" = "darkgray")
df <- cpg_meth1 %>% 
    mutate(cg_cont = cut(cg_cont, c(0,0.03,0.08,0.15), include.lowest=TRUE, labels=c("low", "mid", "high"))) %>% 
    group_by(cg_cont) %>% 
    summarise_at(vars(starts_with("d")), mean, na.rm=TRUE) %>% 
    gather("samp", "meth", -cg_cont) %>% 
    separate(samp, c("day", "line"), sep="_") %>% 
    filter(day != "d0S") %>% 
    mutate(day = gsub("d", "", day)) %>% 
    mutate(line = factor(line, levels = names(line_colors)))
options(repr.plot.width = 14, repr.plot.height = 7)
p_high <- df %>% filter(cg_cont == "high") %>% ggplot(aes(x=day, y=meth, color=line, group=line)) + geom_line() + geom_point(size=0.5) + ggtitle("High CpG content") + xlab("EB day") + ylab("Meth.") + theme(aspect.ratio = 1) + scale_color_manual(name = "", values = line_colors) + scale_y_continuous(limits=c(0,0.8))
p_low <- df %>% filter(cg_cont == "low") %>% ggplot(aes(x=day, y=meth, color=line, group=line)) + geom_line() + geom_point(size=0.5) + ggtitle("Low CpG content") + xlab("EB day") + ylab("Meth.") + theme(aspect.ratio = 1) + scale_color_manual(name = "", values = line_colors) + scale_y_continuous(limits=c(0,0.8))
p_high + p_low

2.1.2 Extended Data Figure 8K

Same for X chromosome

line_colors <- c("wt" = "darkblue", "3a" = "purple", "3b" = "orange", "tko" = "darkgray")
df <- cpg_meth1 %>% 
    mutate(type = case_when(chrom == "chrX" ~ "X", TRUE ~ "autosome")) %>% 
    mutate(cg_cont = cut(cg_cont, c(0,0.03,0.08,0.15), include.lowest=TRUE, labels=c("low", "mid", "high"))) %>% 
    group_by(type, cg_cont) %>% 
    summarise_at(vars(starts_with("d")), mean, na.rm=TRUE) %>% 
    gather("samp", "meth", -cg_cont, -type) %>% 
    separate(samp, c("day", "line"), sep="_") %>% 
    filter(day != "d0S") %>% 
    mutate(day = gsub("d", "", day)) %>% 
    mutate(line = factor(line, levels = names(line_colors)))
options(repr.plot.width = 14, repr.plot.height = 7)
p_high_x <- df %>% filter(type == "X", cg_cont == "high") %>% ggplot(aes(x=day, y=meth, color=line, group=line)) + geom_line() + geom_point(size=0.5) + ggtitle("X chromosome:\nHigh CpG content") + xlab("EB day") + ylab("Meth.") + theme(aspect.ratio = 1) + scale_color_manual(name = "", values = line_colors) + scale_y_continuous(limits=c(0,0.8))
p_low_x <- df %>% filter(type == "X", cg_cont == "low") %>% ggplot(aes(x=day, y=meth, color=line, group=line)) + geom_line() + geom_point(size=0.5) + ggtitle("X chromosome:\nLow CpG content") + xlab("EB day") + ylab("Meth.") + theme(aspect.ratio = 1) + scale_color_manual(name = "", values = line_colors) + scale_y_continuous(limits=c(0,0.8))
p_high_x + p_low_x

2.2 cluster methylation in days 5 and 6 with DKO

cpg_meth_56 <- calc_eb_day5_to_day6_cpg_meth()

2.2.1 Project DKO on day 1-4 clusters

all_clust_data <- calc_eb_cpg_meth(from = 0, to = 6, min_cov = 10, max_na = NULL, intervals = clust_intervs, iterator = clust_intervs, cache_fn = here("output/all_clust_data_day1_to_6.tsv"), rm_meth_cov = TRUE, tracks_key = NULL)
m_column_order <- c(
    "d0_3a", "d1_3a", "d2_3a", "d3_3a", "d4_3a",  "d5_3a", "d6_3a", 
    "d0_3b", "d1_3b", "d2_3b", "d3_3b", "d4_3b",  "d5_3b", "d6_3b",
    "d0_wt", "d1_wt", "d2_wt", "d3_wt", "d4_wt",  "d5_wt", "d6_wt",
    "d5_dko", "d6_dko",
    "d0_tko", "d1_tko", "d2_tko", "d3_tko", "d4_tko", 
    "d0S_3a", "d0S_3b", "d0S_wt", "d0S_tko"
)
setdiff(colnames(all_clust_data)[-1:-3], m_column_order)
##  [1] "d0_d13ako" "d3_d13ako" "d4_d13ako" "d0_d13bko" "d3_d13bko" "d4_d13bko"
##  [7] "d0_ko1"    "d0S_ko1"   "d1_ko1"    "d2_ko1"    "d3_ko1"    "d4_ko1"   
## [13] "d5_ko1"
setdiff(m_column_order, colnames(all_clust_data))
## character(0)
m_all <- intervs_to_mat(all_clust_data)[, m_column_order]
colnames(m_all)
##  [1] "d0_3a"   "d1_3a"   "d2_3a"   "d3_3a"   "d4_3a"   "d5_3a"   "d6_3a"  
##  [8] "d0_3b"   "d1_3b"   "d2_3b"   "d3_3b"   "d4_3b"   "d5_3b"   "d6_3b"  
## [15] "d0_wt"   "d1_wt"   "d2_wt"   "d3_wt"   "d4_wt"   "d5_wt"   "d6_wt"  
## [22] "d5_dko"  "d6_dko"  "d0_tko"  "d1_tko"  "d2_tko"  "d3_tko"  "d4_tko" 
## [29] "d0S_3a"  "d0S_3b"  "d0S_wt"  "d0S_tko"
m_all_s5 = apply(m_all[m_ord, ],2, zoo::rollmean, smooth_n, fill=c('extend',NA,'extend'), na.rm=T)
head(m_all_s5)
##           d0_3a      d1_3a       d2_3a      d3_3a      d4_3a d5_3a      d6_3a
## [1,] 0.01001037 0.01079793 0.004898041 0.01102023 0.01277668     0 0.02408283
## [2,] 0.01001037 0.01079793 0.004898041 0.01102023 0.01277668     0 0.02408283
## [3,] 0.01001037 0.01079793 0.004898041 0.01102023 0.01277668     0 0.02408283
## [4,] 0.01001037 0.01079793 0.004898041 0.01102023 0.01277668     0 0.02408283
## [5,] 0.01001037 0.01079793 0.004898041 0.01102023 0.01277668     0 0.02408283
## [6,] 0.01001037 0.01079793 0.004898041 0.01102023 0.01277668     0 0.02408283
##            d0_3b       d1_3b       d2_3b      d3_3b      d4_3b      d5_3b
## [1,] 0.005717633 0.007146658 0.008921023 0.01298582 0.01649393 0.01666667
## [2,] 0.005717633 0.007146658 0.008921023 0.01298582 0.01649393 0.01666667
## [3,] 0.005717633 0.007146658 0.008921023 0.01298582 0.01649393 0.01666667
## [4,] 0.005717633 0.007146658 0.008921023 0.01298582 0.01649393 0.01666667
## [5,] 0.005717633 0.007146658 0.008921023 0.01298582 0.01649393 0.01666667
## [6,] 0.005717633 0.007146658 0.008921023 0.01298582 0.01649393 0.01666667
##           d6_3b       d0_wt       d1_wt      d2_wt      d3_wt      d4_wt
## [1,] 0.01485334 0.006180905 0.006556652 0.01132803 0.01854894 0.02603882
## [2,] 0.01485334 0.006180905 0.006556652 0.01132803 0.01854894 0.02603882
## [3,] 0.01485334 0.006180905 0.006556652 0.01132803 0.01854894 0.02603882
## [4,] 0.01485334 0.006180905 0.006556652 0.01132803 0.01854894 0.02603882
## [5,] 0.01485334 0.006180905 0.006556652 0.01132803 0.01854894 0.02603882
## [6,] 0.01485334 0.006180905 0.006556652 0.01132803 0.01854894 0.02603882
##           d5_wt      d6_wt      d5_dko      d6_dko      d0_tko      d1_tko
## [1,] 0.03191584 0.02506428 0.004145408 0.004717813 0.005965149 0.004823037
## [2,] 0.03191584 0.02506428 0.004145408 0.004717813 0.005965149 0.004823037
## [3,] 0.03191584 0.02506428 0.004145408 0.004717813 0.005965149 0.004823037
## [4,] 0.03191584 0.02506428 0.004145408 0.004717813 0.005965149 0.004823037
## [5,] 0.03191584 0.02506428 0.004145408 0.004717813 0.005965149 0.004823037
## [6,] 0.03191584 0.02506428 0.004145408 0.004717813 0.005965149 0.004823037
##           d2_tko      d3_tko      d4_tko     d0S_3a     d0S_3b     d0S_wt
## [1,] 0.004019872 0.005696785 0.006240729 0.01627394 0.01726621 0.02124627
## [2,] 0.004019872 0.005696785 0.006240729 0.01627394 0.01726621 0.02124627
## [3,] 0.004019872 0.005696785 0.006240729 0.01627394 0.01726621 0.02124627
## [4,] 0.004019872 0.005696785 0.006240729 0.01627394 0.01726621 0.02124627
## [5,] 0.004019872 0.005696785 0.006240729 0.01627394 0.01726621 0.02124627
## [6,] 0.004019872 0.005696785 0.006240729 0.01627394 0.01726621 0.02124627
##          d0S_tko
## [1,] 0.007268556
## [2,] 0.007268556
## [3,] 0.007268556
## [4,] 0.007268556
## [5,] 0.007268556
## [6,] 0.007268556
days <- strsplit(colnames(m_all_s5), split = "_") %>% map_chr(~ .x[1]) %>% gsub("d", "", .)
days
##  [1] "0"  "1"  "2"  "3"  "4"  "5"  "6"  "0"  "1"  "2"  "3"  "4"  "5"  "6"  "0" 
## [16] "1"  "2"  "3"  "4"  "5"  "6"  "5"  "6"  "0"  "1"  "2"  "3"  "4"  "0S" "0S"
## [31] "0S" "0S"
days[grep("0S", days)] <- c("A", "B", "WT", "TKO")

2.2.2 Figure 4C

options(repr.plot.width = 20, repr.plot.height = 20)
shades <- colorRampPalette(c("white", "#635547", "#DABE99","#C594BF","#B51D8D","darkred", "yellow"))(1000)
image(t(as.matrix(m_all_s5)),col=shades, xaxt='n', yaxt='n'); 
N = length(m_ord)

m_y = tapply((1:N)/N, km$cluster[m_ord], mean)
mtext(1:K, at = m_y, las=2, side= 2)
mtext(1:K, at = m_y, las=2, side= 4)
axis(3, at = seq(0, 1, length = ncol(m_all_s5)), labels = days)
abline(h=tapply((1:N)/N, km$cluster[m_ord], max))

2.2.3 Figure 4B

options(repr.plot.width = 3, repr.plot.height = 20)
delta = m_s5[,c("d1_3a","d2_3a","d3_3a","d4_3a")]-
        m_s5[,c("d1_3b","d2_3b","d3_3b","d4_3b")]

dshades <- colorRampPalette(c("red","darkred","white","darkblue","blue"))(1000)
image(pmin(pmax(t(as.matrix(delta)),-0.3),0.3),col=dshades,zlim=c(-0.3,0.3))