exp_genes <- read.csv('data/unique_consensus_expressed.csv')
DEG <- read.csv('data/DEGgenes.csv')
notDEG <- setdiff(exp_genes$gene, DEG$gene)
cbind(nrow(DEG), length(notDEG), nrow(exp_genes))
## [,1] [,2] [,3]
## [1,] 3431 17606 21025
message("3441 total mouse DEG mapped to ", length(unique(DEG$gene)), " unique genes")
## 3441 total mouse DEG mapped to 3419 unique genes
GWAS_cat <- read.csv('data/GWAS_catalog_gene_centered.csv')
gwas.all <- GWAS_cat[grep("autism", GWAS_cat$disease),]
gwas <- gwas.all[which(gwas.all$gene %in% exp_genes$gene), ]
message(nrow(gwas.all), " total gwas risk genes mapped to ", nrow(gwas), " expressed mouse genes")
## 820 total gwas risk genes mapped to 462 expressed mouse genes
DEG.gwas <- length(intersect(DEG$gene, gwas$gene))
DEG.notGwas <- length(setdiff(DEG$gene, gwas$gene))
notDEG.gwas <- length(intersect(notDEG, gwas$gene))
notDEG.notGwas <- length(setdiff(notDEG, gwas$gene))
pct.DEG <- length(intersect(DEG$gene, gwas$gene))/length(DEG$gene)*100
pct.notDEG <- length(intersect(notDEG, gwas$gene))/length(notDEG)*100
mat <- rbind(c(DEG.gwas, DEG.notGwas), c(notDEG.gwas, notDEG.notGwas))
rownames(mat) <- c("DEG & GWAS | DEG & notGWAS", "notDEG & GWAS | notDEG & notGWAS")
mat
## [,1] [,2]
## DEG & GWAS | DEG & notGWAS 95 3324
## notDEG & GWAS | notDEG & notGWAS 367 17239
fit <- chisq.test(mat)
fit
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: mat
## X-squared = 6.0988, df = 1, p-value = 0.01353
print(paste0(round(pct.DEG, 2), '% of DEG are associated with Autism'))
## [1] "2.77% of DEG are associated with Autism"
print(paste0(round(pct.notDEG, 2), '% of the expressed but not DEG are associated with Autism'))
## [1] "2.08% of the expressed but not DEG are associated with Autism"
if(pct.DEG > pct.notDEG & fit$p.value < 0.05){
print(paste0('Risk variations ARE significantly enriched in DEG P-Value = ', round(fit$p.value,3)))
} else {
print(paste0('Risk variations are NOT significantly enriched in DEG P-Value = ', round(fit$p.value,3)))
}
## [1] "Risk variations ARE significantly enriched in DEG P-Value = 0.014"