C Baskets of other FSHD signatures
We use the RNA-seq expression data from the longitudinal cohort to identify representative markers of extra cellular matrix and inflammatory/immune response that are best in distinguishing mildly and moderately affected FSHD muscles relative to the controls.
# load libraries and data
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(DESeq2))
suppressPackageStartupMessages(library(writexl))
suppressPackageStartupMessages(library(kableExtra))
suppressPackageStartupMessages(library(knitr))
pkg_dir <- "/Users/cwon2/CompBio/Wellstone_BiLateral_Biopsy"
ext_dir <- file.path(pkg_dir, "extdata")
load(file.path(pkg_dir, "data", "longitudinal_dds.rda"))
annotation <- as.data.frame(rowData(longitudinal_dds)) %>%
dplyr::select(gene_id, gene_name)
Steps:
- Load the list of differentially expressed genes (DEGs) in severely affected FSHD biopsies in the longitudinal cohort there were previously reported in Chao-Jen Wong7 (supplementary table 5) and that are associated with ECM, DUX4, stress, inflammatory biological precesses
- Identify differentially expressed genes in mildly to moderately affected FSHD using DESeq2 in the longitudinal cohort
- Retain ECM, stress and inflammatory associated DEGs that are in both step 1 and 2
- Save to /Users/cwon2/CompBio/Wellstone_BiLateral_Biopsy/stats/FSHD_signatures_baskets.xlsx
# previous report on FSHD: High FSHD vs. controls and enriched GO terms
file_name <-
file.path(ext_dir,
"suppl_table_5_Candidate_Biomarkers_and_Enriched_Go.xlsx")
#' High vs Control differential analysis sheet
de <- readxl::read_xlsx(path=file_name, sheet=1, skip=3)
# DEGs that are associated with ECM signatures
ecm_de <- de %>%
filter(ecm == TRUE) %>%
pull(gencode_id) %>% as.character(.)
# DEGs associated with DUX4 signatures
dux4_de <- de %>%
filter(DUX4_induced == TRUE) %>%
pull(gencode_id) %>% as.character(.)
inflamm_de <- de %>%
filter(inflamm == TRUE | immune == TRUE) %>%
pull(gencode_id) %>%
as.character(.)
stress_de <- de %>%
filter(stress == TRUE) %>%
pull(gencode_id) %>% as.character(.)
ig_de <- de %>%
filter(grepl("IGH", gene_name) | grepl("IGK", gene_name)) %>%
pull(gencode_id) %>% as.character(.)
de_list <- list(Extracellur_Matrix=ecm_de,
DUX4=dux4_de,
Inflammatory=inflamm_de,
Immunoglobulin=ig_de)
C.1 Identify differentially expressed genes in Mild+Moderate FSHDs
Method:
-Hypothesis testing approach: use DESeq2 to estimate fold changes and adjusted p-values for comparison between the Mild+Moderate FHSD biopsies and the control group -Criteria for significance: adjusted p-value < 0.0.5 corresponding to \(H_0:|logFC(\frac{FSHD}{control})| \leq 1\).
sub_dds <- longitudinal_dds[, longitudinal_dds$cluster %in%
c("Control", "Mild", "Moderate")]
DESeq2::design(sub_dds) <- formula(~ pheno_type)
sub_dds <- DESeq(sub_dds)
tpm <- assays(longitudinal_dds)[["TPM"]]
avg_TPM <-
data.frame(
avg_TPM_controls = rowMeans(tpm[,
longitudinal_dds$cluster == "Control"]),
avg_TPM_M = rowMeans(tpm[, longitudinal_dds$cluster %in%
c("Mild", "Moderate")]),
avg_TPM_H = rowMeans(tpm[, longitudinal_dds$cluster %in%
c("IG-High", "High")])) %>%
rownames_to_column(var="gene_id")
res <- as.data.frame(results(sub_dds, alpha=0.05, lfcThreshold=1)) %>%
rownames_to_column(var="gene_id") %>%
dplyr::select(gene_id, log2FoldChange, padj) %>%
dplyr::mutate(up_sig = padj < 0.05 & log2FoldChange > 0)
C.2 Extracellular Matrix basket
ecm <- res %>% dplyr::filter(up_sig, gene_id %in% ecm_de) %>%
dplyr::left_join(annotation, by="gene_id") %>%
left_join(avg_TPM, by="gene_id") %>%
relocate(gene_name, .after=gene_id)
knitr::kable(ecm, format="html", escape = F,
caption="Extracellular matrix basket canidiates. log2FoldChange and padj rendered by the comparison of Mild+Moderate vs. Controls. Columns `Avg_TPM_M` and `avg_TPM_H` are the average TPM of Mild+Moderate and High+IG-High, respectively.") %>%
kableExtra::kable_styling(font_size = 12) %>%
kableExtra::kable_paper() %>%
kableExtra::scroll_box(width = "800px", height="100%")
gene_id | gene_name | log2FoldChange | padj | up_sig | avg_TPM_controls | avg_TPM_M | avg_TPM_H |
---|---|---|---|---|---|---|---|
ENSG00000041982.15 | TNC | 3.386434 | 0.0040765 | TRUE | 0.3138395 | 4.2978095 | 27.105923 |
ENSG00000082293.12 | COL19A1 | 3.425878 | 0.0490725 | TRUE | 0.0201126 | 0.2730210 | 2.410165 |
ENSG00000105664.10 | COMP | 7.218632 | 0.0000001 | TRUE | 0.0283236 | 5.3475486 | 144.507766 |
ENSG00000118785.13 | SPP1 | 5.146615 | 0.0038728 | TRUE | 0.0072142 | 0.3696096 | 7.582766 |
ENSG00000137801.10 | THBS1 | 2.631680 | 0.0140311 | TRUE | 0.3721547 | 4.9745294 | 16.271863 |
ENSG00000145423.4 | SFRP2 | 3.609936 | 0.0137626 | TRUE | 0.6066932 | 9.1443302 | 332.027319 |
ENSG00000148848.14 | ADAM12 | 3.360888 | 0.0377372 | TRUE | 0.0178207 | 0.2586541 | 6.186523 |
C.3 Inflamm/immune response basket
inflamm <- res %>% dplyr::filter(up_sig, gene_id %in% inflamm_de) %>%
dplyr::left_join(annotation, by="gene_id") %>%
left_join(avg_TPM, by="gene_id") %>%
relocate(gene_name, .after=gene_id)
knitr::kable(inflamm, format="html", escape = F,
caption="Inflamm/immunse response basket canidiates. log2FoldChange and padj rendered by the comparison Mild+Moderate vs. Controls. Avg_TPM_M and avg_TPM_H are the average TPM of Mild+Moderate and High+IG-High, respectively. ") %>%
kableExtra::kable_styling(font_size = 12) %>%
kableExtra::kable_paper() %>%
kableExtra::scroll_box(width = "800px", height="100%")
gene_id | gene_name | log2FoldChange | padj | up_sig | avg_TPM_controls | avg_TPM_M | avg_TPM_H |
---|---|---|---|---|---|---|---|
ENSG00000095970.16 | TREM2 | 5.110800 | 0.0236694 | TRUE | 0.0018253 | 0.2273485 | 3.302698 |
ENSG00000116690.12 | PRG4 | 3.541993 | 0.0110717 | TRUE | 0.2267296 | 3.6249377 | 34.209170 |
ENSG00000118785.13 | SPP1 | 5.146615 | 0.0038728 | TRUE | 0.0072142 | 0.3696096 | 7.582766 |
ENSG00000137801.10 | THBS1 | 2.631680 | 0.0140311 | TRUE | 0.3721547 | 4.9745294 | 16.271863 |
ENSG00000159216.18 | RUNX1 | 2.522424 | 0.0211987 | TRUE | 0.1647490 | 1.2523059 | 7.024218 |
ENSG00000172724.11 | CCL19 | 5.445712 | 0.0289911 | TRUE | 0.0000000 | 0.4903472 | 13.311797 |
ENSG00000187908.15 | DMBT1 | 7.084251 | 0.0001474 | TRUE | 0.0000000 | 0.1825078 | 2.733225 |
ENSG00000188257.10 | PLA2G2A | 4.589421 | 0.0079048 | TRUE | 0.0924933 | 3.1238318 | 87.816923 |
ENSG00000275385.1 | CCL18 | 5.354389 | 0.0079048 | TRUE | 0.0151312 | 0.8857681 | 6.660016 |
C.4 Stress response basket
Some stressed related differentially expressed genes are overlapping with inflamm/immune response and extracellular matrix basket genes.
stress <- res %>% dplyr::filter(up_sig, gene_id %in% stress_de) %>%
dplyr::left_join(annotation, by="gene_id") %>%
left_join(avg_TPM, by="gene_id") %>%
relocate(gene_name, .after=gene_id)
stress %>%
dplyr::mutate(gene_name = kableExtra::cell_spec(gene_name,
color = if_else(gene_name %in% c(inflamm$gene_name, ecm$gene_name), "blue", "green"))) %>%
knitr::kable(format="html", escape = F,
caption="Stree basket canidiates. log2FoldChange and padj rendered by the comparison Mild+Moderate vs. Controls. Blue reprepsents genes associated with ECM or inflamm/immune response and green are exclusively stress related. Avg_TPM_M and avg_TPM_H are the average TPM of Mild+Moderate and High+IG-High, respectively.") %>%
kableExtra::kable_styling(font_size = 12) %>%
kableExtra::kable_paper() %>%
kableExtra::scroll_box(width = "800px", height="100%")
gene_id | gene_name | log2FoldChange | padj | up_sig | avg_TPM_controls | avg_TPM_M | avg_TPM_H |
---|---|---|---|---|---|---|---|
ENSG00000041982.15 | TNC | 3.386434 | 0.0040765 | TRUE | 0.3138395 | 4.2978095 | 27.1059228 |
ENSG00000095970.16 | TREM2 | 5.110800 | 0.0236694 | TRUE | 0.0018253 | 0.2273485 | 3.3026980 |
ENSG00000118785.13 | SPP1 | 5.146615 | 0.0038728 | TRUE | 0.0072142 | 0.3696096 | 7.5827665 |
ENSG00000124762.13 | CDKN1A | 3.544649 | 0.0000380 | TRUE | 0.5294775 | 8.1761862 | 46.2143815 |
ENSG00000137801.10 | THBS1 | 2.631680 | 0.0140311 | TRUE | 0.3721547 | 4.9745294 | 16.2718634 |
ENSG00000145423.4 | SFRP2 | 3.609936 | 0.0137626 | TRUE | 0.6066932 | 9.1443302 | 332.0273188 |
ENSG00000147889.16 | CDKN2A | 4.430698 | 0.0110429 | TRUE | 0.0028771 | 0.0568843 | 0.4493352 |
ENSG00000172724.11 | CCL19 | 5.445712 | 0.0289911 | TRUE | 0.0000000 | 0.4903472 | 13.3117973 |
ENSG00000187616.4 | TMEM8C | 4.810346 | 0.0249134 | TRUE | 0.0299104 | 0.7739439 | 14.1112682 |
ENSG00000187908.15 | DMBT1 | 7.084251 | 0.0001474 | TRUE | 0.0000000 | 0.1825078 | 2.7332248 |
ENSG00000188257.10 | PLA2G2A | 4.589421 | 0.0079048 | TRUE | 0.0924933 | 3.1238318 | 87.8169232 |
ENSG00000275385.1 | CCL18 | 5.354389 | 0.0079048 | TRUE | 0.0151312 | 0.8857681 | 6.6600161 |
FSHD_signatures_baskets <- list(ecm=ecm, inflamm=inflamm, stress=stress)
save(FSHD_signatures_baskets,
file=file.path(pkg_dir, "data", "FSHD_signatures_baskets.rda"))
write_xlsx(FSHD_signatures_baskets,
path=file.path(pkg_dir, "stats", "FSHD_signatures_baskets.xlsx"))
Chao-Jen Wong, Seth D. Friedman, Leo H. Wang. “Longitudinal Measures of RNA Expression and Disease Activity in FSHD Muscle Biopsies.” Human Molecular Genetics, no. 6 (2020). doi: 10.1093/hmg/ddaa031.
Leo H. Wang, Dennis Shaw, Seth D. Friedman. “MRI-Informed Muscle Biopsies Correlate MRI with Pathology and DUX4 Target Gene Expression in FSHD.” Human Molecular Genetics, 2019. doi: 10.1093/hmg/ddy364.
R. Abbas, Dhays Seshasayee, Kristen Wolslegel. “IRIS: Deconvolution of Blood Microarray Data Identifies Cellular Activation Patterns in Systemic Lupus Erythematosus.” PLOS ONE, 2009. https://doi.org/10.1371/journal.pone.0006098.
Weiguang Mao, Boris M. Hartmann, Elena Zaslavsky. “Pathway-Level Information Extractor (PLIER) for Gene Expression Data.” Nature Methods, 2019.
Zizhen Yao, Judit Balog, Lauren Snider. “DUX4-Induced Gene Expression Is the Major Molecular Signature in FSHD Skeletal Muscle,” 2014. https://doi.org/doi: 10.1093/hmg/ddu251.