11 X dosage compensation
source(here::here("scripts/init.R"))
We start by looking at the CNA data
options(repr.plot.width = 8, repr.plot.height = 4)
p1 <- cna %>% ggplot(aes(x=cna_val)) + geom_density() + xlim(0,5) + geom_vline(xintercept=c(0,0.5, 1.5, 2.5, 3.5, 4.5), color="red")
p2 <- cna %>% ggplot(aes(x=cna_val, color=factor(cna_round))) + geom_density() + xlim(0,5) + guides(color=FALSE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
p1 + p2
## Warning: Removed 12192 rows containing non-finite values (stat_density).
## Warning: Removed 12192 rows containing non-finite values (stat_density).
11.1 X analysis
We then go to extract loci that are associated with xist expression
xist_loci <- get_xist_loci()
xist_loci_meth <- get_xist_loci_meth()
## Joining, by = c("chrom", "start", "end")
xist_meth <- xist_loci_meth %>%
group_by(samp) %>%
summarise(meth = mean(meth, na.rm = TRUE))
xist_expr <- get_gene_expression_mat() %>%
filter(name == "XIST") %>%
gather("samp", "expr", -(chrom:name3.chr))
xist_loci_expr <- get_xist_loci_expr()
## Joining, by = c("chrom", "start", "end")
df <- xist_meth %>%
left_join(xist_expr %>% select(samp, expr)) %>%
left_join(samp_data %>% select(samp, ER = ER1)) %>%
filter(!is.na(ER))
## Joining, by = "samp"
## Joining, by = "samp"
head(xist_loci)
## # A tibble: 6 x 3
## chrom start end
## 1 chrX 9983293 9983844
## 2 chrX 11776199 11776750
## 3 chrX 14891025 14891576
## 4 chrX 16964312 16964863
## 5 chrX 17393041 17393592
## 6 chrX 18443223 18443774
count(xist_loci, chrom)
## # A tibble: 1 x 2
## chrom n
## 1 chrX 615
We show that Xist expression is correlated with XIST loci methylation
head(df)
## # A tibble: 6 x 4
## samp meth expr ER
## 1 MB_0006 0.3131481 8.088802 ER+
## 2 MB_0028 0.1560928 6.482577 ER+
## 3 MB_0030 0.3030659 NA ER+
## 4 MB_0035 0.1877706 6.406843 ER-
## 5 MB_0040 0.2148237 NA ER+
## 6 MB_0046 0.1631259 6.652839 ER+
11.1.0.1 Extended Data Figure 9B
options(repr.plot.width = 7, repr.plot.height = 7)
df %>% group_by(ER) %>% summarise(cor = cor(meth, expr, use="pairwise.complete.obs"))
## # A tibble: 3 x 2
## ER cor
## 1 ER- 0.6021123
## 2 ER+ 0.5171663
## 3 normal 0.4914330
p_xist_meth_cor <- df %>%
arrange(sample(samp)) %>%
ggplot(aes(x = meth, y = expr, color = ER)) +
geom_point(size=0.2) +
scale_color_manual(values = annot_colors$ER1, guide = FALSE) +
theme(aspect.ratio = 1) +
xlab("Methylation") +
ylab("Xist expression") +
annotate("text", x = 0.45, y = 6, label = sprintf("~ rho == %0.2f", cor(df$meth, df$expr, method = "spearman", use = "pairwise.complete.obs")), parse = TRUE, size = 2, family = "Arial")
p_xist_meth_cor
## Warning: Removed 263 rows containing missing values (geom_point).
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
Moving on to look at the methylation in different CNA regimes
get_xist_cna
## function ()
## {
## xist_cna <- get_xist_loci() %>% gintervals.neighbors1(cna %>%
## mutate(end = ifelse(start == end, start + 1, end)) %>%
## filter(chrom == "chrX"), maxneighbors = nrow(samp_data)) %>%
## filter(dist == 0) %>% select(chrom, start, end, samp,
## cna = cna_round)
## xist_cna <- xist_cna %>% mutate(cna_grp = cut(cna, breaks = c(-1,
## 0, 1, 2, 10), labels = c("0N", "1N", "2N", ">=3N"))) %>%
## filter(cna_grp != "0N") %>% left_join(samp_data %>% select(samp,
## ER = ER1))
## return(xist_cna)
## }
xist_cna <- get_xist_cna()
## Joining, by = "samp"
xist_cna %>% count(cna) %>% ggplot(aes(x=factor(cna), y=n)) + geom_col() + theme_bw()
Plotting the distribution of loci
11.1.0.2 Extended Data Figure 9D
options(repr.plot.width = 8, repr.plot.height = 5)
df <- xist_cna %>%
left_join(get_xist_loci_meth()) %>%
group_by(cna_grp, ER, chrom, start, end) %>%
summarise(meth = mean(meth, na.rm = TRUE)) %>%
filter(!is.na(ER)) %>%
mutate(cna_grp = factor(cna_grp, levels = c("1N", "2N", ">=3N"))) %>%
mutate(ER = factor(ER, levels = c("ER+", "ER-")))
## Joining, by = c("chrom", "start", "end")
## Joining, by = c("chrom", "start", "end", "samp")
p_boxp_meth_cna <- df %>%
ggplot(aes(x = cna_grp, y = meth, fill = ER, group = cna_grp)) +
# ggrastr::geom_boxplot_jitter(outlier.size = 0.1, outlier.jitter.width = 0.01, raster=TRUE) +
geom_boxplot(linewidth=0.1, fatten=0.5, outlier.size=0.1) +
scale_fill_manual(values = annot_colors$ER1, guide = FALSE) +
xlab("") +
ylab("Methylation in Xist\nassociated promoters") +
facet_grid(. ~ ER) +
ylim(0, 1.1) +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
)
## Warning: Ignoring unknown parameters: linewidth
p_boxp_meth_cna + ggpubr::stat_compare_means(label = "p.signif", comparisons = list(c("1N", "2N"), c("2N", ">=3N")))+ theme_bw()
## Warning: Removed 2016 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2016 rows containing non-finite values (stat_signif).
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
p_boxp_meth_cna + ggpubr::stat_compare_means(comparisons = list(c("1N", "2N"), c("2N", ">=3N")))+ theme_bw()
## Warning: Removed 2016 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2016 rows containing non-finite values (stat_signif).
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
df %>% ungroup() %>% distinct(chrom, start, end, ER, cna_grp) %>% count(ER, cna_grp)
## # A tibble: 6 x 3
## ER cna_grp n
## 1 ER+ 1N 615
## 2 ER+ 2N 615
## 3 ER+ >=3N 615
## 4 ER- 1N 615
## 5 ER- 2N 615
## 6 ER- >=3N 615
xist_cna %>% distinct(cna_grp, samp, ER) %>% count(cna_grp, ER)
## # A tibble: 6 x 3
## cna_grp ER n
## 1 1N ER- 181
## 2 1N ER+ 444
## 3 2N ER- 269
## 4 2N ER+ 988
## 5 >=3N ER- 135
## 6 >=3N ER+ 413
We now move to show scatters of loci in different CNA regimes
get_xist_meth_cna
## function ()
## {
## get_xist_cna() %>% inner_join(get_xist_loci_meth()) %>% group_by(chrom,
## start, end, ER, cna_grp) %>% summarise(meth = mean(meth,
## na.rm = TRUE)) %>% ungroup() %>% spread(cna_grp, meth)
## }
xist_meth_cna <- get_xist_meth_cna()
## Joining, by = "samp"
## Joining, by = c("chrom", "start", "end")
## Joining, by = c("chrom", "start", "end", "samp")
head(xist_meth_cna)
## # A tibble: 6 x 7
## chrom start end ER 1N 2N >=3N
## 1 chrX 9983293 9983844 ER- 0.2078918 0.3067645 0.3205268
## 2 chrX 9983293 9983844 ER+ 0.1944013 0.3302692 0.3235067
## 3 chrX 11683770 11684321 ER- 0.2001634 0.2802105 0.2939960
## 4 chrX 11683770 11684321 ER+ 0.2885676 0.3238455 0.3123392
## 5 chrX 11776199 11776750 ER- 0.3316496 0.4977368 0.5313945
## 6 chrX 11776199 11776750 ER+ 0.3403706 0.5048260 0.5023405
11.1.0.3 Extended Data Figure 9E
p_scatter_meth_cna_2_3 <- xist_meth_cna %>%
ggplot(aes(x = `2N`, y = `>=3N`, color = ER)) +
# ggrastr::geom_point_rast(size = 0.2, raster.width = 1, raster.height = 1) +
geom_point(size=0.2) +
geom_abline(color = "black", linetype = "dashed") +
scale_color_manual(values = annot_colors$ER1, guide = FALSE) +
theme(aspect.ratio = 1) +
xlim(0, 0.65) +
ylim(0, 0.65) #+
# annotate("text", x = 0.5, y = 0.1, label = paste("rho = ", round(cor(xist_meth_cna$`2N`, xist_meth_cna$`>=3N`, use = "pairwise.complete.obs"), digits = 3)), size = 2, family = "Arial")
p_scatter_meth_cna_1_2 <- xist_meth_cna %>%
ggplot(aes(x = `1N`, y = `2N`, color = ER)) +
# ggrastr::geom_point_rast(size = 0.2, raster.width = 1, raster.height = 1) +
geom_point(size=0.2) +
geom_abline(color = "black", linetype = "dashed") +
scale_color_manual(values = annot_colors$ER1, guide = FALSE) +
theme(aspect.ratio = 1) +
xlim(0, 0.65) +
ylim(0, 0.65) #+
# annotate("text", x = 0.5, y = 0.1, label = paste("rho = ", round(cor(xist_meth_cna$`1N`, xist_meth_cna$`2N`, use = "pairwise.complete.obs"), digits = 3)), size = 2, family = "Arial")
(p_scatter_meth_cna_1_2 + theme_bw() + theme(aspect.ratio = 1)) + (p_scatter_meth_cna_2_3 + theme_bw() + theme(aspect.ratio = 1))
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
p_scatter_meth_cna_2_3 + facet_wrap(~ER) + theme_bw() + theme(aspect.ratio = 1)
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
Same with expression:
get_xist_expr_cna
## function ()
## {
## get_xist_cna() %>% inner_join(get_xist_loci_expr()) %>% group_by(chrom,
## start, end, ER, cna_grp) %>% summarise(expr = mean(expr,
## na.rm = TRUE)) %>% ungroup() %>% spread(cna_grp, expr)
## }
xist_expr_cna <- get_xist_expr_cna()
## Joining, by = "samp"
## Joining, by = c("chrom", "start", "end")
## Joining, by = c("chrom", "start", "end", "samp")
head(xist_expr_cna)
## # A tibble: 6 x 7
## chrom start end ER 1N 2N >=3N
## 1 chrX 9983293 9983844 ER- 7.880753 7.976930 8.193818
## 2 chrX 9983293 9983844 ER+ 7.770670 7.811101 8.114064
## 3 chrX 14891025 14891576 ER- 7.175153 7.258638 7.427273
## 4 chrX 14891025 14891576 ER+ 7.268545 7.233742 7.451081
## 5 chrX 14891133 14891684 ER- 6.111207 6.018312 6.172959
## 6 chrX 14891133 14891684 ER+ 5.900340 5.859142 5.912923
11.1.0.4 Extended Data Figure 9F
p_scatter_expr_cna_2_3 <- xist_expr_cna %>%
ggplot(aes(x = `2N`, y = `>=3N`, color = ER)) +
geom_point(size=0.2) +
geom_abline(color = "black", linetype = "dashed") +
scale_color_manual(values = annot_colors$ER1, guide = FALSE) +
theme(aspect.ratio = 1) +
xlim(5, 14) +
ylim(5, 14)
p_scatter_expr_cna_1_2 <- xist_expr_cna %>% ggplot(aes(x = `1N`, y = `2N`, color = ER)) +
geom_point(size=0.2) +
geom_abline(color = "black", linetype = "dashed") +
scale_color_manual(values = annot_colors$ER1, guide = FALSE) +
theme(aspect.ratio = 1) +
xlim(5, 14) +
ylim(5, 14)
p_scatter_expr_cna_1_2 + p_scatter_expr_cna_2_3
## Warning: Removed 60 rows containing missing values (geom_point).
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: Removed 60 rows containing missing values (geom_point).
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
11.2 autosome analysis
get_autosome_loci
## function ()
## {
## get_promoter_avg_meth() %>% select(chrom, start, end) %>%
## filter(chrom != "chrX", chrom != "chrY") %>% anti_join(get_xist_loci(),
## by = c("chrom", "start", "end"))
## }
autosome_loci <- get_autosome_loci()
get_autosome_cna
## function ()
## {
## {
## autosome_cna <- get_autosome_loci() %>% gintervals.neighbors1(cna %>%
## mutate(end = ifelse(start == end, start + 1, end)) %>%
## filter(chrom != "chrX"), maxneighbors = nrow(samp_data)) %>%
## filter(dist == 0) %>% select(chrom, start, end, samp,
## cna = cna_round)
## autosome_cna <- autosome_cna %>% mutate(cna_grp = cut(cna,
## breaks = c(-1, 0, 1, 2, 10), labels = c("0N", "1N",
## "2N", ">=3N"))) %>% filter(cna_grp != "0N") %>%
## left_join(samp_data %>% select(samp, ER = ER1))
## autosome_cna
## } %cache_df% here("data/autosome_cna.tsv") %>% as_tibble()
## }
autosome_cna <- get_autosome_cna()
head(autosome_cna)
## # A tibble: 6 x 7
## chrom start end samp cna cna_grp ER
## 1 chr1 762469 763020 MB_0006 2 2N ER+
## 2 chr1 762469 763020 MB_0028 1 1N ER+
## 3 chr1 762469 763020 MB_0035 2 2N ER-
## 4 chr1 762469 763020 MB_0046 2 2N ER+
## 5 chr1 762469 763020 MB_0050 2 2N ER+
## 6 chr1 762469 763020 MB_0053 1 1N ER+
get_autosome_loci_meth
## function ()
## {
## get_autosome_loci() %>% inner_join(get_promoter_avg_meth()) %>%
## gather("samp", "meth", -(chrom:end))
## }
autosome_loci_meth <- get_autosome_loci_meth()
## Joining, by = c("chrom", "start", "end")
head(autosome_loci_meth)
## chrom start end samp meth
## 1 chr1 762469 763020 MB_0006 0.000000000
## 2 chr1 762676 763227 MB_0006 0.021929825
## 3 chr1 860619 861170 MB_0006 0.001870324
## 4 chr1 895465 896016 MB_0006 0.014367816
## 5 chr1 901375 901926 MB_0006 0.024253731
## 6 chr1 948345 948896 MB_0006 0.000000000
Plotting the distribution of loci
11.2.0.1 Extended Data Figure 9G
options(repr.plot.width = 8, repr.plot.height = 5)
df <- autosome_cna %>%
left_join(autosome_loci_meth) %>%
group_by(cna_grp, ER, chrom, start, end) %>%
summarise(meth = mean(meth, na.rm = TRUE)) %>%
filter(!is.na(ER)) %>%
mutate(cna_grp = factor(cna_grp, levels = c("1N", "2N", ">=3N"))) %>%
mutate(ER = factor(ER, levels = c("ER+", "ER-")))
## Joining, by = c("chrom", "start", "end", "samp")
p_boxp_meth_cna_autosome <- df %>%
ggplot(aes(x = cna_grp, y = meth, fill = ER, group = cna_grp)) +
# ggrastr::geom_boxplot_jitter(outlier.size = 0.1, outlier.jitter.width = 0.01, raster=TRUE) +
geom_boxplot(linewidth=0.1, fatten=0.5, outlier.size = 0.1) +
scale_fill_manual(values = annot_colors$ER1, guide = FALSE) +
xlab("") +
ylab("Methylation in autosomes") +
facet_grid(. ~ ER) +
ylim(0, 1.1) +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
)
## Warning: Ignoring unknown parameters: linewidth
p_boxp_meth_cna_autosome + ggpubr::stat_compare_means(comparisons = list(c("1N", "2N"), c("2N", ">=3N")))
## Warning: Removed 6 rows containing missing values (geom_signif).
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
p_boxp_meth_cna_autosome +
ggpubr::stat_compare_means(label = "p.signif", comparisons = list(c("1N", "2N"), c("2N", ">=3N")))
## Warning: Removed 6 rows containing missing values (geom_signif).
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
p_boxp_meth_cna_autosome
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
df %>% ungroup() %>% distinct(chrom, start, end, ER) %>% count(ER)
## # A tibble: 2 x 2
## ER n
## 1 ER+ 15207
## 2 ER- 15207
autosome_cna %>% distinct(cna_grp, samp, ER) %>% count(ER, cna_grp)
## # A tibble: 6 x 3
## ER cna_grp n
## 1 ER- >=3N 296
## 2 ER- 1N 278
## 3 ER- 2N 293
## 4 ER+ >=3N 1077
## 5 ER+ 1N 1060
## 6 ER+ 2N 1085
We now move to show scatters of loci in different CNA regimes
get_autosome_meth_cna
## function ()
## {
## {
## auto_df <- get_autosome_cna() %>% inner_join(get_autosome_loci_meth()) %>%
## group_by(chrom, start, end, ER, cna_grp) %>% summarise(meth = mean(meth,
## na.rm = TRUE)) %>% ungroup() %>% spread(cna_grp,
## meth)
## auto_df
## } %cache_df% here("data/autosome_meth_cna.tsv") %>% as_tibble()
## }
autosome_meth_cna <- get_autosome_meth_cna()
head(autosome_meth_cna)
## # A tibble: 6 x 7
## chrom start end ER >=3N 1N 2N
## 1 chr1 762469 763020 ER- 0.007852706 0.008341878 0.007857689
## 2 chr1 762469 763020 ER+ 0.006506879 0.008089050 0.008125402
## 3 chr1 762676 763227 ER- 0.028412915 0.021358769 0.026941446
## 4 chr1 762676 763227 ER+ 0.028590166 0.070967669 0.042346185
## 5 chr1 762851 763402 ER- 0.029766663 0.022340914 0.029069381
## 6 chr1 762851 763402 ER+ 0.032413643 0.078736553 0.047128295
11.2.0.2 Extended Data Figure 9H
options(repr.plot.width = 5, repr.plot.height = 3)
p_scatter_meth_cna_2_3_autosome <- autosome_meth_cna %>%
ggplot(aes(x = `2N`, y = `>=3N`, color = ER)) +
geom_point(size=0.2) +
geom_abline(color = "black", linetype = "dashed") +
scale_color_manual(values = annot_colors$ER1, guide = FALSE) +
geom_abline(color = "red", linetype = "dashed", intercept = 0.1) +
geom_abline(color = "red", linetype = "dashed", intercept = -0.1) +
theme(aspect.ratio = 1)
p_scatter_meth_cna_1_2_autosome <- autosome_meth_cna %>%
ggplot(aes(x = `1N`, y = `2N`, color = ER)) +
geom_point(size=0.2) +
geom_abline(color = "black", linetype = "dashed") +
scale_color_manual(values = annot_colors$ER1, guide = FALSE) +
geom_abline(color = "red", linetype = "dashed", intercept = 0.1) +
geom_abline(color = "red", linetype = "dashed", intercept = -0.1) +
theme(aspect.ratio = 1)
p_scatter_meth_cna_1_2_autosome + p_scatter_meth_cna_2_3_autosome
## Warning: Removed 5 rows containing missing values (geom_point).
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
Same with expression:
get_autosome_expr_cna
## function ()
## {
## {
## auto_df <- get_autosome_cna() %>% inner_join(get_autosome_loci_expr()) %>%
## group_by(chrom, start, end, ER, cna_grp) %>% summarise(expr = mean(expr,
## na.rm = TRUE)) %>% ungroup() %>% spread(cna_grp,
## expr)
## auto_df
## } %cache_df% here("data/autosome_expr_cna.tsv") %>% as_tibble()
## }
autosome_expr_cna <- get_autosome_expr_cna()
head(autosome_expr_cna)
## # A tibble: 6 x 7
## chrom start end ER >=3N 1N 2N
## 1 chr1 762469 763020 ER- NA NA NA
## 2 chr1 762469 763020 ER+ NA NA NA
## 3 chr1 762851 763402 ER- NA NA NA
## 4 chr1 762851 763402 ER+ NA NA NA
## 5 chr1 860619 861170 ER- 5.598424 5.685151 5.844717
## 6 chr1 860619 861170 ER+ 5.816428 5.804565 5.811422
p_scatter_expr_cna_2_3 <- autosome_expr_cna %>%
ggplot(aes(x = `2N`, y = `>=3N`, color = ER)) +
geom_point(size=0.2) +
geom_abline(color = "black", linetype = "dashed") +
scale_color_manual(values = annot_colors$ER1, guide = FALSE) +
theme(aspect.ratio = 1) +
xlim(5, 14) +
ylim(5, 14)
p_scatter_expr_cna_1_2 <- autosome_expr_cna %>% ggplot(aes(x = `1N`, y = `2N`, color = ER)) +
geom_point(size=0.2) +
geom_abline(color = "black", linetype = "dashed") +
scale_color_manual(values = annot_colors$ER1, guide = FALSE) +
theme(aspect.ratio = 1) +
xlim(5, 14) +
ylim(5, 14)
p_scatter_expr_cna_1_2 + p_scatter_expr_cna_2_3
## Warning: Removed 5366 rows containing missing values (geom_point).
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
## Warning: Removed 5369 rows containing missing values (geom_point).
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
Looking at the distribution of loci in 2N / >=3N (averaging the samples expression per ER and CNA)
get_xist_expr_cna
## function ()
## {
## get_xist_cna() %>% inner_join(get_xist_loci_expr()) %>% group_by(chrom,
## start, end, ER, cna_grp) %>% summarise(expr = mean(expr,
## na.rm = TRUE)) %>% ungroup() %>% spread(cna_grp, expr)
## }
get_autosome_expr_cna
## function ()
## {
## {
## auto_df <- get_autosome_cna() %>% inner_join(get_autosome_loci_expr()) %>%
## group_by(chrom, start, end, ER, cna_grp) %>% summarise(expr = mean(expr,
## na.rm = TRUE)) %>% ungroup() %>% spread(cna_grp,
## expr)
## auto_df
## } %cache_df% here("data/autosome_expr_cna.tsv") %>% as_tibble()
## }
df_fc <- get_xist_expr_cna() %>%
mutate(type = "X") %>%
bind_rows(get_autosome_expr_cna() %>% mutate(type = "auto")) %>%
mutate(diff = `>=3N` - `2N`, diff1 = `2N` - `1N`) %>%
# filter(ER == "ER+")
filter(ER == "ER-")
## Joining, by = "samp"
## Joining, by = c("chrom", "start", "end")
## Joining, by = c("chrom", "start", "end", "samp")
head(df_fc)
## # A tibble: 6 x 10
## chrom start end ER 1N 2N >=3N type diff
## 1 chrX 9983293 9983844 ER- 7.880753 7.976930 8.193818 X 0.216887405
## 2 chrX 14891025 14891576 ER- 7.175153 7.258638 7.427273 X 0.168634811
## 3 chrX 14891133 14891684 ER- 6.111207 6.018312 6.172959 X 0.154646592
## 4 chrX 15353625 15354176 ER- 7.884146 7.926416 8.264637 X 0.338220886
## 5 chrX 16964312 16964863 ER- 6.916974 7.004652 7.381009 X 0.376356425
## 6 chrX 17393041 17393592 ER- 6.256066 6.338770 6.347885 X 0.009114449
## diff1
## 1 0.09617691
## 2 0.08348508
## 3 -0.09289491
## 4 0.04227070
## 5 0.08767872
## 6 0.08270411
11.2.0.3 Extended Data Figure 9I
pval <- ks.test(df_fc$diff[df_fc$type == "X"], df_fc$diff[df_fc$type == "auto"])$p.value
message(pval)
## 2.47357689886485e-12
fc_p <- df_fc %>%
ggplot(aes(color = type, x = diff)) +
geom_density(size = 1) +
theme(aspect.ratio = 1) +
xlab("log expression fold change (>=3N/2N)") +
ylab("") +
scale_color_manual(values = c("X" = "darkblue", "auto" = "red")) +
coord_cartesian(xlim = c(-0.5, 1)) +
theme(aspect.ratio = 1)
# annotate("text", x = 0.5, y = 4, label = "p<<0.001 (KS)", size = 2, family = "Arial")
fc_p
## Warning: Removed 2710 rows containing non-finite values (stat_density).
pval <- ks.test(df_fc$diff1[df_fc$type == "X"], df_fc$diff1[df_fc$type == "auto"])$p.value
message(pval)
## 0
fc_p <- df_fc %>%
ggplot(aes(color = type, x = -diff1)) +
geom_density(size = 1) +
theme(aspect.ratio = 1) +
xlab("log expression fold change (1N/2N)") +
ylab("") +
scale_color_manual(values = c("X" = "darkblue", "auto" = "red")) +
coord_cartesian(xlim = c(-0.5, 1)) +
theme(aspect.ratio = 1)
# annotate("text", x = 0.5, y = 4, label = "p<<0.001 (KS)", size = 2, family = "Arial")
fc_p
## Warning: Removed 2710 rows containing non-finite values (stat_density).
head(df_fc)
## # A tibble: 6 x 10
## chrom start end ER 1N 2N >=3N type diff
## 1 chrX 9983293 9983844 ER- 7.880753 7.976930 8.193818 X 0.216887405
## 2 chrX 14891025 14891576 ER- 7.175153 7.258638 7.427273 X 0.168634811
## 3 chrX 14891133 14891684 ER- 6.111207 6.018312 6.172959 X 0.154646592
## 4 chrX 15353625 15354176 ER- 7.884146 7.926416 8.264637 X 0.338220886
## 5 chrX 16964312 16964863 ER- 6.916974 7.004652 7.381009 X 0.376356425
## 6 chrX 17393041 17393592 ER- 6.256066 6.338770 6.347885 X 0.009114449
## diff1
## 1 0.09617691
## 2 0.08348508
## 3 -0.09289491
## 4 0.04227070
## 5 0.08767872
## 6 0.08270411
thresh <- 0.1
autosome_cna_meth_expr <- get_autosome_cna_meth_expr()
dosage_cands <- autosome_cna_meth_expr %>%
filter(`>=3N` >= (`2N` + thresh) ) %>%
filter(`n_>=3N` >= 5, `n_2N` >= 5)
dosage_cands_1N <- autosome_cna_meth_expr %>%
filter(`1N` <= (`2N` - thresh) ) %>%
filter(`n_1N` >= 5, `n_2N` >= 5)
dosage_cands_expr <- dosage_cands %>% select(chrom, start, end, ER, `1N` = `expr_1N`, `2N` = `expr_2N`, `>=3N` = `expr_>=3N`)
dosage_cands_expr_1N <- dosage_cands_1N %>% select(chrom, start, end, ER, `1N` = `expr_1N`, `2N` = `expr_2N`)
df_fc <- get_xist_expr_cna() %>%
mutate(type = "X") %>%
bind_rows(autosome_expr_cna %>% anti_join(dosage_cands_expr) %>% mutate(type = "auto")) %>%
bind_rows(dosage_cands_expr %>% mutate(type = "auto-dosage")) %>%
mutate(ER = factor(ER, levels=c("ER+", "ER-"))) %>%
filter(!is.na(ER)) %>%
mutate(type = factor(type, levels=c("X", "auto", "auto-dosage"))) %>%
arrange(type) %>%
mutate(diff = `>=3N` - `2N`)
## Joining, by = "samp"
## Joining, by = c("chrom", "start", "end")
## Joining, by = c("chrom", "start", "end", "samp")
## Joining, by = c("chrom", "start", "end", "ER", ">=3N", "1N", "2N")
fc_p <- df_fc %>%
ggplot(aes(color = type, x = diff)) +
geom_density(size = 0.5) +
theme(aspect.ratio = 1) +
xlab("log expression fold change (>=3N/2N)") +
ylab("") +
scale_color_manual(values = c("auto" = "red", "auto-dosage" = "blue", "X" = "gray")) +
# ggsci::scale_color_npg() +
coord_cartesian(xlim = c(-0.6, 1)) +
theme(aspect.ratio = 1) +
facet_wrap(~ER, scale="free_y", nrow=1) +
guides(color = FALSE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
df_fc %>% group_by(ER) %>% summarise(p_x_auto = ks.test(diff[type == "X"], diff[type == "auto"])$p.value, p_auto_dosage = ks.test(diff[type == "auto-dosage"], diff[type == "auto"])$p.value) %>% print()
## # A tibble: 2 x 3
## ER p_x_auto p_auto_dosage
## 1 ER+ 5.535245e-03 0
## 2 ER- 1.049272e-12 0
df_fc_1N <- get_xist_expr_cna() %>%
mutate(type = "X") %>%
bind_rows(autosome_expr_cna %>% anti_join(dosage_cands_expr_1N) %>% mutate(type = "auto")) %>%
bind_rows(dosage_cands_expr_1N %>% mutate(type = "auto-dosage")) %>%
mutate(ER = factor(ER, levels=c("ER+", "ER-"))) %>%
filter(!is.na(ER)) %>%
mutate(type = factor(type, levels=c("X", "auto", "auto-dosage"))) %>%
arrange(type) %>%
mutate(diff = `1N` - `2N`)
## Joining, by = "samp"
## Joining, by = c("chrom", "start", "end")
## Joining, by = c("chrom", "start", "end", "samp")
## Joining, by = c("chrom", "start", "end", "ER", "1N", "2N")
fc_p_1N <- df_fc_1N %>%
ggplot(aes(color = type, x = diff)) +
geom_density(size = 0.5) +
theme(aspect.ratio = 1) +
xlab("log expression fold change (1N/2N)") +
ylab("") +
scale_color_manual(values = c("auto" = "red", "auto-dosage" = "blue", "X" = "gray")) +
coord_cartesian(xlim = c(-0.6, 1)) +
theme(aspect.ratio = 1) +
facet_wrap(~ER, scale="free_y", nrow=1) +
guides(color = FALSE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
df_fc_1N %>% group_by(ER) %>% summarise(p_x_auto = ks.test(diff[type == "X"], diff[type == "auto"])$p.value, p_auto_dosage = ks.test(diff[type == "auto-dosage"], diff[type == "auto"])$p.value) %>% print()
## # A tibble: 2 x 3
## ER p_x_auto p_auto_dosage
## 1 ER+ 0 0.025082963
## 2 ER- 0 0.006876647
fc_p
## Warning: Removed 5500 rows containing non-finite values (stat_density).
fc_p_1N
## Warning: Removed 5431 rows containing non-finite values (stat_density).
11.3 Checking dosage compensation on all X loci
get_xist_loci
## function ()
## {
## {
## xist_expr <- get_gene_expression_mat() %>% filter(name ==
## "XIST") %>% gather("samp", "expr", -(chrom:name3.chr)) %>%
## as_tibble()
## all_norm_meth <- fread(here("data/all_norm_meth.tsv")) %>%
## filter(chrom == "chrX") %>% as_tibble()
## ER_positive_mat <- all_norm_meth %>% select(chrom:end,
## any_of(ER_positive_samples)) %>% intervs_to_mat()
## ER_negative_mat <- all_norm_meth %>% select(chrom:end,
## any_of(ER_negative_samples)) %>% intervs_to_mat()
## xist_expr_vec <- xist_expr %>% select(samp, expr) %>%
## deframe()
## samples <- intersect(names(xist_expr_vec), colnames(ER_positive_mat))
## ER_pos_cors <- tgs_cor(t(ER_positive_mat[, samples]),
## as.matrix(xist_expr_vec[samples]), pairwise.complete.obs = TRUE)
## samples <- intersect(names(xist_expr_vec), colnames(ER_negative_mat))
## ER_neg_cors <- tgs_cor(t(ER_negative_mat[, samples]),
## as.matrix(xist_expr_vec[samples]), pairwise.complete.obs = TRUE)
## xist_loci <- cbind(ER_pos_cors, ER_neg_cors) %>% mat_to_intervs() %>%
## filter(V1 >= 0.2 | V2 >= 0.2) %>% distinct(chrom,
## start, end)
## } %cache_df% here("data/xist_loci.tsv") %>% as_tibble()
## }
## <bytecode: 0xfcb8860>
all_norm_meth_x <- fread(here("data/all_norm_meth.tsv")) %>%
filter(chrom == "chrX") %>% as_tibble()
X_loci <- all_norm_meth_x %>% distinct(chrom, start, end)
get_xist_cna
## function ()
## {
## xist_cna <- get_xist_loci() %>% gintervals.neighbors1(cna %>%
## mutate(end = ifelse(start == end, start + 1, end)) %>%
## filter(chrom == "chrX"), maxneighbors = nrow(samp_data)) %>%
## filter(dist == 0) %>% select(chrom, start, end, samp,
## cna = cna_round)
## xist_cna <- xist_cna %>% mutate(cna_grp = cut(cna, breaks = c(-1,
## 0, 1, 2, 10), labels = c("0N", "1N", "2N", ">=3N"))) %>%
## filter(cna_grp != "0N") %>% left_join(samp_data %>% select(samp,
## ER = ER1))
## return(xist_cna)
## }
## <bytecode: 0x1612c170>
X_cna <- X_loci %>% gintervals.neighbors1(cna %>%
mutate(end = ifelse(start == end, start + 1, end)) %>%
filter(chrom == "chrX"), maxneighbors = nrow(samp_data)) %>%
filter(dist == 0) %>% select(chrom, start, end, samp,
cna = cna_round)
X_cna <- X_cna %>% mutate(cna_grp = cut(cna, breaks = c(-1,
0, 1, 2, 10), labels = c("0N", "1N", "2N", ">=3N"))) %>%
filter(cna_grp != "0N") %>% left_join(samp_data %>% select(samp,
ER = ER1)) %cache_df% here("data/X_cna.tsv") %>% as_tibble()
head(X_cna)
## # A tibble: 6 x 7
## chrom start end samp cna cna_grp ER
## 1 chrX 192489 193040 MB_0006 2 2N ER+
## 2 chrX 192489 193040 MB_0028 1 1N ER+
## 3 chrX 192489 193040 MB_0046 1 1N ER+
## 4 chrX 192489 193040 MB_0050 1 1N ER+
## 5 chrX 192489 193040 MB_0053 1 1N ER+
## 6 chrX 192489 193040 MB_0054 2 2N ER+
get_xist_loci_meth
## function ()
## {
## get_xist_loci() %>% inner_join(get_promoter_avg_meth()) %>%
## gather("samp", "meth", -(chrom:end))
## }
## <bytecode: 0x498253b0>
X_loci_meth <- X_loci %>% inner_join(get_promoter_avg_meth()) %>%
gather("samp", "meth", -(chrom:end))
## Joining, by = c("chrom", "start", "end")
options(repr.plot.width = 5, repr.plot.height = 3)
p_boxp_meth_cna_all <- X_cna %>%
left_join(X_loci_meth) %>%
group_by(cna_grp, ER, chrom, start, end) %>%
summarise(meth = mean(meth, na.rm = TRUE)) %>%
filter(!is.na(ER)) %>%
mutate(cna_grp = factor(cna_grp, levels = c("1N", "2N", ">=3N"))) %>%
mutate(ER = factor(ER, levels = c("ER+", "ER-"))) %>%
ggplot(aes(x = cna_grp, y = meth, fill = ER, group = cna_grp)) +
geom_boxplot(linewidth=0.1, fatten=0.5, outlier.size = 0.1) +
scale_fill_manual(values = annot_colors$ER1, guide = FALSE) +
xlab("") +
ylab("Methylation in X promoters") +
facet_grid(. ~ ER) +
ylim(0, 1.1) +
ggpubr::stat_compare_means(label = "p.signif", comparisons = list(c("1N", "2N"), c("2N", ">=3N"))) +
theme(
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
)
## Joining, by = c("chrom", "start", "end", "samp")
## Warning: Ignoring unknown parameters: linewidth
p_boxp_meth_cna_all
## Warning: Removed 41928 rows containing non-finite values (stat_boxplot).
## Warning: Removed 41928 rows containing non-finite values (stat_signif).
## Warning: Removed 6 rows containing missing values (geom_signif).
## Warning: It is deprecated to specify `guide = FALSE` to remove a guide. Please
## use `guide = "none"` instead.
Looks very similar to the XIST associated version
11.4 % of X lost
11.4.0.1 Extended Data Figure 9C
chr_x_len <- gintervals.all() %>% filter(chrom == "chrX") %>% pull(end)
p_x_perc <- cna %>%
filter(chrom == "chrX") %>% mutate(l = end - start) %>%
mutate(cna = cut(cna_round, breaks = c(0, 1, 2, 20), include.lowest=TRUE, labels=c("Loss", "normal", "Gain"))) %>%
group_by(samp, cna) %>%
summarise(p = sum(l) / chr_x_len) %>%
filter(cna != "normal") %>%
ungroup() %>%
add_ER() %>%
filter(!is.na(ER)) %>%
ggplot(aes(x=p, color=ER, y=1-..y..)) +
stat_ecdf() +
facet_wrap(~cna) +
scale_color_manual(values=annot_colors$ER1) +
theme(aspect.ratio=1) +
xlab("% of X") +
ylab("% of samples") +
scale_x_continuous(labels=scales::percent) +
scale_y_continuous(labels=scales::percent)
p_x_perc
11.5 Plot Expression-Methylation correlation for X
meth_mat <- all_norm_meth_x %>% select(chrom:end, any_of(ER_positive_samples)) %>% intervs_to_mat()
expr_m <- get_gene_expression_mat() %>% select(-any_of(c("chrom", "start", "end", "name3.chr")))
expr_mat <- expr_m %>%
as.data.frame() %>%
column_to_rownames("name")
f <- rowSums(!is.na(expr_mat)) > 0
expr_mat <- expr_mat[f, ]
dim(expr_mat)
## [1] 17691 2124
em_cross <- em_cross_cor(meth_mat, expr_mat, meth_cor_thresh = 0.25, expr_cor_thresh = 0.25) %cache_rds% here("data/X_er_positive_em_cross_cor.rds")
em_cross_clust <- cluster_em_cross_cor(em_cross, k_meth = 32, k_expr = 32) %cache_rds% here("data/X_er_positive_em_cross_cor_clust.rds")
11.5.0.1 Extended Data Figure 9A
options(repr.plot.width = 8, repr.plot.height = 13)
plot_em_cross_cor(em_cross_clust)
## plotting em cross
em_cross_clust$em_cross[1:5, 1:5]
## AARS ABCA6 ABCA8 ABCB1
## chrX_12156083_12156634 -0.009473256 0.069027019 0.089264904 0.014470847
## chrX_14261885_14262436 -0.029288914 -0.007331325 -0.003098204 -0.054763278
## chrX_23017576_23018127 0.091659895 -0.029980425 -0.017798906 -0.016557969
## chrX_23800773_23801324 -0.142085957 0.028894237 0.062800268 -0.001669589
## chrX_23925621_23926172 -0.080076720 -0.025302800 0.012101469 0.012796371
## ABI3BP
## chrX_12156083_12156634 0.089861306
## chrX_14261885_14262436 -0.001929155
## chrX_23017576_23018127 -0.010672580
## chrX_23800773_23801324 0.016375597
## chrX_23925621_23926172 -0.010378608
dim(em_cross_clust$em_cross)
## [1] 1218 595