13 Export supplementary tables

source(here::here("scripts/init.R"))

13.1 Table 1: METABRIC samples metadata

METABRIC samples profiled in this study.

Metadata fields are:

  • samp
  • patient
  • batch
  • track
  • total_reads
  • mapped_reads
  • mapped_frac
  • cg_num
  • meth_calls
  • global_avg_meth
  • type
  • age
  • grade
  • stage
  • ER
  • IHC
  • iC10
  • PAM50
  • matched_normal
  • matched_tumor
  • digpath_lymph
  • digpath_stromal
  • IMC_Fibroblasts
  • IMC_Lymphocytes
  • ASCAT_cellularity
  • ASCAT_ploidy
  • MathScore
  • log10_global_epm
samp_qc <- fread(here("data/sample_qc.csv")) %>% as_tibble()
tab <- samp_data %>% left_join(samp_qc) %>% select(samp, patient, batch, track, total_reads, mapped_reads, mapped_frac, cg_num, meth_calls, global_avg_meth, type, age, grade, stage, ER, IHC, iC10, PAM50, matched_normal, matched_tumor, digpath_lymph, digpath_stromal, IMC_Fibroblasts, IMC_Lymphocytes, ASCAT_cellularity, MathScore, log10_global_epm)
## Joining, by = "track"
writexl::write_xlsx(tab, here("export/S1 - Sample Information.xlsx"))

13.2 Table 2: Immune expression signature

Table of genes that were used to define the Immune expression signature, separated by ER status.

immune_genes_tab <- map2_dfr(c("ER_positive", "ER_negative", "normal"), c("ER+", "ER-", "normal"), ~
        tibble(gene = get_TME_genes(readr::read_rds(here(glue("data/{.x}_norm_meth.rds")))$em_cross_clust, caf_gene = NULL)) %>% mutate(ER = .y) ) %cache_df% here("data/immune_genes_by_er.tsv")

immune_genes_tab %>% count(ER)
##       ER   n
## 1    ER- 345
## 2    ER+ 195
## 3 normal 864
head(immune_genes_tab)
##      gene  ER
## 1   ACAP1 ER+
## 2     ADA ER+
## 3   ADAM7 ER+
## 4    AIM2 ER+
## 5    AOAH ER+
## 6 APBB1IP ER+
writexl::write_xlsx(immune_genes_tab, here("export/S2 - Immune expression signature.xlsx"))

13.3 Table 3: CAF expression signature

Table of genes that were used to define the CAF expression signature, separated by ER status.

caf_genes_tab <- map2_dfr(c("ER_positive", "ER_negative", "normal"), c("ER+", "ER-", "normal"), ~
        tibble(gene = get_TME_genes(readr::read_rds(here(glue("data/{.x}_norm_meth.rds")))$em_cross_clust, immune_gene = NULL)) %>% mutate(ER = .y) ) %cache_df% here("data/caf_genes_by_er.tsv")

caf_genes_tab %>% count(ER)
##       ER   n
## 1    ER- 360
## 2    ER+ 207
## 3 normal 592
head(caf_genes_tab)
##      gene  ER
## 1   ABCA6 ER+
## 2   ABCA8 ER+
## 3   ABCB1 ER+
## 4  ACVRL1 ER+
## 5 ADAMTS9 ER+
## 6 ALDH1A2 ER+
writexl::write_xlsx(caf_genes_tab, here("export/S3 - CAF expression signature.xlsx"))

13.4 Table 4: Expression-Methylation correlation

Expression-Methylation correlation tables. Shown are pairs of tables of the following datasets: 1. Raw promoter methylation (Extended Data Fig. 2a,b). 2. Immune-CAF normalized methylation for loci that were correlated with MG/ML epigenomic instability (Fig. 2f). 3. X chromosome immune-CAF normalized methylation (Extended Data Fig 9a).

Each dataset is represented by an expression table, showing the mean correlation of each one of 30/32 methylation clusters in each gene, and a methylation table showing the mean correlation of every locus to 30/32 expression clusters.

em_raw <- list(
        `ER+` = readr::read_rds(here("data/ER_positive_norm_meth.rds"))$em_cross_clust,
        `ER-` = readr::read_rds(here("data/ER_negative_norm_meth.rds"))$em_cross_clust,
        `normal` = readr::read_rds(here("data/normal_norm_meth.rds"))$em_cross_clust
)
em_raw <- map(em_raw, parse_em_cors)
norm_em <- readr::read_rds(here("data/MG_ML_em_cross_cor_clust.rds"))
norm_em <- parse_em_cors(norm_em)
x_em <- readr::read_rds(here("data/X_er_positive_em_cross_cor_clust.rds"))
x_em <- parse_em_cors(x_em)
sheets <- list(
    "Raw promoters ER+ (Expression)" = em_raw[["ER+"]]$expr_tab, 
    "Raw promoters ER+ (Methylation)" = em_raw[["ER+"]]$meth_tab,
    "Raw promoters ER- (Expression)" = em_raw[["ER-"]]$expr_tab, 
    "Raw promoters ER- (Methylation)" = em_raw[["ER-"]]$meth_tab, 
    "MG,ML loci ER+ (Expression)" = norm_em$expr_tab,
    "MG,ML loci ER+ (Methylation)" = norm_em$meth_tab,
    "X chromosome ER+ (Expression)" = x_em$expr_tab, 
    "X chromosome ER+ (Methylation)" = x_em$meth_tab
)
writexl::write_xlsx(sheets, here("export/S4 - Expression-methylation correlations.xlsx"))

13.5 Table 5: Methylation scores

Table with CAF, Immune, Clock, MG and ML methylation scores per METABRIC sample.

tab <- get_all_features()
writexl::write_xlsx(tab, here("export/S5 - Methylation scores.xlsx"))

13.6 Table 6: Gene expression correlation to epigenomic instability

Tables of genes that have an absolute expression correlation higher than 0.3 to MG/ML/Clock scores, together with their respective correlations.

feat_gene_cors <- get_expression_features_cors()
cor_thresh <- 0.3
tab <- feat_gene_cors %>% filter(abs(MG) >= cor_thresh | abs(ML) >= cor_thresh | abs(clock) > cor_thresh) %>% mutate(ER = factor(ER, levels = c("ER+", "ER-", "normal"))) %>% select(name, ER,  clock.cor = clock, MG.cor = MG, ML.cor = ML, caf.cor = caf, immune.cor = immune) %>% arrange(ER, clock.cor, MG.cor, ML.cor)
writexl::write_xlsx(tab, here("export/S6 - Gene expression correlation to methylation scores.xlsx"))

13.7 Table 7: Methylation layers loci

Coordinates of loci that were used in order to calculate the CAF, Immune, Clock, MG and ML scores.

clust_df <- fread(here("data/ER_positive_loci_clust.tsv") ) %>% as_tibble()
tab <- clust_df %>% rename(layer = clust) %>% filter(layer %in% c("ML", "MG", "clock")) %>% arrange(layer, chrom, start, end) 
head(tab)
## # A tibble: 6 x 4
##   chrom   start     end layer
## 1  chr1  134998  135215 clock
## 2  chr1  837884  838076 clock
## 3  chr1  907866  908002 clock
## 4  chr1  996514  996647 clock
## 5  chr1 1086481 1086732 clock
## 6  chr1 1381236 1381347 clock
writexl::write_xlsx(tab, here("export/S7 - Methylation layers loci.xlsx"))

13.8 Table 8: Cis regulation candidates

Pairs of loci and genes that are candidates for cis regulation in different FDR thresholds. Promoters and genomic loci are shown in separate tables.

min_dist <- 5e5
min_tss_dist <- 200
genomic_cis_cands <- bind_rows(
    fread(here("data/genomic_cis_cands_ER_positive.tsv")),
    fread(here("data/genomic_cis_cands_ER_negative.tsv")),
    fread(here("data/genomic_cis_cands_normal.tsv")))
head(genomic_cis_cands)
##   chrom  start    end type rank   gene        cor chrom_expr start_expr
## 1  chr1  10496  10587  obs    1 CT45A3 -0.2446525       chrX  134883487
## 2  chr1  10588  10639  obs    1  DSCR8 -0.1658337      chr21   39493544
## 3  chr1 134998 135215  obs    1 MAGEC2 -0.2381125       chrX  141293076
## 4  chr1 546168 546310  obs    1 MAGEA8 -0.2542376       chrX  149009940
## 5  chr1 565396 565791  obs    1 RAD51C -0.2153869      chr17   56769933
## 6  chr1 567121 567237  obs    1 TIMM23 -0.2271249      chr10   51623386
##    end_expr strand_expr dist n_obs n_shuff        fdr  ER
## 1 134883488           1   NA  2680      92 0.03432836 ER+
## 2  39493545           1   NA  2680      92 0.03432836 ER+
## 3 141293077          -1   NA  2680      92 0.03432836 ER+
## 4 149009941           1   NA  2680      92 0.03432836 ER+
## 5  56769934           1   NA  2680      92 0.03432836 ER+
## 6  51623387          -1   NA  2680      92 0.03432836 ER+
dim(genomic_cis_cands)
## [1] 55616700       16
cands_genomic <- genomic_cis_cands %>%
    filter(type == "obs", !is.na(dist), abs(dist) <= min_dist, abs(dist) >= min_tss_dist) %>%
    arrange(cor) %>% 
    as_tibble()
source(here::here("scripts/init.R"))
genomic_list <- list(
    `Genomic (best correlation)` = cands_genomic %>% filter(rank == 1),
    `Genomic (FDR < 0.05)` = cands_genomic %>% filter(fdr < 0.05), 
    `Genomic (FDR < 0.1)` = cands_genomic %>% filter(fdr < 0.1)
)

genomic_list <- map(genomic_list, ~ annotate_cis_cands(.x, sigma_meth = 2, sigma_expr = 2, meth_diff = 0.2, expr_diff = 1))
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "gene"
genomic_list <- map(genomic_list, ~ .x %>% 
    select(gene, 
           ER, 
           chrom, 
           start, 
           end,            
           distance = dist, 
           fdr, 
           rank, 
           cor, 
           mean_meth, 
           sd_meth, 
           mean_expr, 
           sd_expr, 
           normal_meth, 
           normal_meth_sd, 
           normal_expr, 
           normal_expr_sd, 
           n_hypometh, 
           n_induced, 
           n_hypermeth, 
           n_repressed, 
           N_considered, 
           n_hypometh_vs_normal,
           n_stable_vs_normal, 
           n_hypermeth_vs_normal, 
           n_repressed_vs_normal, 
           n_stable_expr_vs_normal, 
           n_induced_vs_normal) %>% 
                    arrange(cor) 
)
cands_prom <- fread(here("data/promoter_cis_cands.tsv")) %>% as_tibble() 
prom_list <- list(
    `Promoters (best correlation)` = cands_prom %>% filter(r == 1),
    `Promoters (FDR < 0.005)` = cands_prom %>% filter(fdr < 0.005), 
    `Promoters (FDR < 0.01)` = cands_prom %>% filter(fdr < 0.01),
    `Promoters (FDR < 0.05)` = cands_prom %>% filter(fdr < 0.05)
)
prom_list <- map(prom_list, ~ annotate_cis_cands(.x %>% rename(gene = name), sigma_meth = 2, sigma_expr = 2, meth_diff = 0.2, expr_diff = 1))
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "coord"
## Joining, by = "coord"
## Joining, by = "gene"
## Joining, by = "gene"
prom_list <- map(prom_list, ~ 
                .x %>% 
        mutate(diff = abs(best - kth)) %>% 
        select(gene, ER, chrom, start, end, fdr, rank = r, cor, diff_from_second_best = diff, mean_meth, sd_meth, mean_expr, sd_expr, normal_meth, normal_meth_sd, normal_expr, normal_expr_sd, n_hypometh, n_induced, n_hypermeth, n_repressed, N_considered, n_hypometh_vs_normal, n_stable_vs_normal, n_hypermeth_vs_normal, n_repressed_vs_normal, n_stable_expr_vs_normal, n_induced_vs_normal) %>%
                arrange(cor) )
cis_cands <- c(prom_list, genomic_list)    
writexl::write_xlsx(x = cis_cands, path = here("export/S8 - Cis Regulation Candidates.xlsx"))

13.9 Table 9: Dosage compensation in autosomes

Table of autosome promoters that show increase of at least 10% in methylation when amplified to at least 3N vs 2N (‘Gain 3N’ table), at least 4N vs 2N (‘Amplification 4N’). The table ‘Loss’ shows loci that showed decreased methylation of at least 10% when losing a copy (1N vs 2N).

gen_dosage_comp_excel()
gc()
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells    4173577   222.9    8029078   428.8    8029078   428.8
## Vcells 2635840254 20109.9 5129628008 39136.0 5101547905 38921.8