Analysis of ATAC Seq Data

MIA vs CON after LPS Treatment

Load Data

Convert to a DESEQ2 object to evaluate interactions

DDS <- dba.analyze(DBA, bRetrieveAnalysis=DBA_DESEQ2)
MvCinLPS <- results(DDS, list( c("Condition_MIA_vs_CON","ConditionMIA.TreatmentLPS")))

Volcano

t1 <- as.data.frame(MvCinLPS)
t1$sig <- t1$pvalue < 0.05
ggplot(t1, aes(x=log2FoldChange, y=-log10(pvalue), color = sig)) + geom_point() + scale_color_manual(values = c("black", "blue")) + theme_cowplot() + xlim(-1, 1.1) + labs(subtitle = "MIA vs CON at LPS", color="pval < 0.05")

Permutation Test for MIA vs CON at LPS

setwd("~/Data/ATACseq/ATAC-Nova-v3")
samples <- read.csv("ATAC_sampleSheet_34.csv", header = TRUE, stringsAsFactors = TRUE)
table(MvCinLPS$pvalue<0.05, dnn="p-value sig")
## p-value sig
## FALSE  TRUE 
## 67362  2212
table(MvCinLPS$padj<0.05, dnn="FDR sig")
## FDR sig
## FALSE 
## 69574

Prepare counts for permutation test

reference: http://jtleek.com/genstats/inst/doc/03_14_P-values-and-Multiple-Testing.html

counts <- dba.peakset(DBA, bRetrieve=TRUE, DataType=DBA_DATA_FRAME)
peaks <- counts[,1:3]
counts <- counts[,4:37]
counts <- as.matrix(counts)
out <- cbind(peaks, t1)
lps <- samples$Treatment == "LPS"
edata <- counts[,lps]
group <- samples$Condition[lps]
group <- factor(group, levels = c("MIA","CON"))
# sign of t-statistic is group1 - group2 so I want it to be MIA - CON so if + = up, - =down
set.seed(9)
B = 1000
tstats_obj = rowttests(edata,group)
tstat0 = matrix(NA,nrow=dim(edata)[1],ncol=B)
tstat = tstats_obj$statistic
for(i in 1:B){
  group0 = sample(group)
  tstat0[,i] = rowttests(edata,group0)$statistic
}

t-statistic distribution

  1. the red histogram is the observed t-statistic values for all peaks
  2. the black histogram is the means of the 1000 iterations for all peaks
t0mean <- apply(tstat0, 1, mean)
t1 <- hist(t0mean, breaks = 100, plot = F)
t3 <- hist(tstat, breaks = 100, plot = F)
plot(t3, col="red", xlim=c(-2,5), ylim=c(0,5000), lty="blank")
plot(t1, col="black",  add=T, lty="blank")

Calculate the percentage of null t-values that are more extreme than the test t-value and that becomes the new p-value.

th = rep(NA, length(tstat))
for(i in 1:length(tstat)){
  th[i] = mean(abs(tstat0[i,])>abs(tstat[i]))
}
table(th<0.05)
## 
## FALSE  TRUE 
## 61782  7792
# 7792 is more than the previous 2212 from DESeq which makes sense because its taking each comparison from its own null distribution. 

multiple comparison correction of the permuted p-values

bonf = p.adjust(th,method="bonferroni")
table(bonf)
## bonf
##     0     1 
##   113 69461
bh = p.adjust(th,method="BH")
table(bh<0.05)
## 
## FALSE  TRUE 
## 69461   113

113 DAR

compared the raw p-value with the multiple comparison values

pp <- cbind(tstats_obj$p.value, bonf, bh)
colnames(pp)=c("raw.pval", "bonf","bh")
pp <- as.data.frame(pp)
a<-ggplot(pp, aes(x=raw.pval, y=bonf)) + geom_point(size=1) + theme_cowplot()
b<-ggplot(pp, aes(x=raw.pval, y=bh)) + geom_point(size=1) + theme_cowplot()
plot_grid(a,b)

Evaluate fold changes

sig <- bh<0.05
df <- data.frame((MvCinLPS))
CON <- edata[,group=="CON"]
MIA <- edata[,group=="MIA"]
df$count.con.lps <- apply(CON,1,mean)
df$count.mia.lps <- apply(MIA,1,mean)
df$count.fc <- log2(df$count.mia.lps/df$count.con.lps)
plot(df$log2FoldChange, df$count.fc, xlim=c(-1,1.5), ylim=c(-1,1.5), xlab="DiffBind Fold Change",ylab="Fold Change by Count")
points(df[sig,]$log2FoldChange, df[sig,]$count.fc, col="red")
abline(a=0,b=1, col="blue")

Use homer to get peak annotations from bed file (annotatePeaks.pl)

sessionInfo()
## R version 4.0.3 (2020-10-10)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] qvalue_2.22.0               genefilter_1.72.1          
##  [3] cowplot_1.1.1               DESeq2_1.30.1              
##  [5] DiffBind_3.0.15             SummarizedExperiment_1.20.0
##  [7] Biobase_2.50.0              MatrixGenerics_1.2.1       
##  [9] matrixStats_0.59.0          GenomicRanges_1.42.0       
## [11] GenomeInfoDb_1.26.7         IRanges_2.24.1             
## [13] S4Vectors_0.28.1            BiocGenerics_0.36.1        
## [15] forcats_0.5.1               stringr_1.4.0              
## [17] dplyr_1.0.7                 purrr_0.3.4                
## [19] readr_1.4.0                 tidyr_1.1.3                
## [21] tibble_3.1.2                ggplot2_3.3.5              
## [23] tidyverse_1.3.1            
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.1               tidyselect_1.1.1         RSQLite_2.2.7           
##   [4] AnnotationDbi_1.52.0     grid_4.0.3               BiocParallel_1.24.1     
##   [7] munsell_0.5.0            base64url_1.4            systemPipeR_1.24.6      
##  [10] withr_2.4.2              colorspace_2.0-2         Category_2.56.0         
##  [13] highr_0.9                knitr_1.33               rstudioapi_0.13         
##  [16] labeling_0.4.2           bbmle_1.0.23.1           GenomeInfoDbData_1.2.4  
##  [19] mixsqp_0.3-43            hwriter_1.3.2            farver_2.1.0            
##  [22] bit64_4.0.5              pheatmap_1.0.12          coda_0.19-4             
##  [25] batchtools_0.9.15        vctrs_0.3.8              generics_0.1.0          
##  [28] xfun_0.24                BiocFileCache_1.14.0     R6_2.5.0                
##  [31] apeglm_1.12.0            invgamma_1.1             locfit_1.5-9.4          
##  [34] rsvg_2.1.2               bitops_1.0-7             cachem_1.0.5            
##  [37] DelayedArray_0.16.3      assertthat_0.2.1         scales_1.1.1            
##  [40] gtable_0.3.0             rlang_0.4.11             splines_4.0.3           
##  [43] rtracklayer_1.50.0       broom_0.7.8              brew_1.0-6              
##  [46] checkmate_2.0.0          yaml_2.2.1               reshape2_1.4.4          
##  [49] modelr_0.1.8             GenomicFeatures_1.42.3   backports_1.2.1         
##  [52] RBGL_1.66.0              tools_4.0.3              ellipsis_0.3.2          
##  [55] gplots_3.1.1             jquerylib_0.1.4          RColorBrewer_1.1-2      
##  [58] Rcpp_1.0.7               plyr_1.8.6               progress_1.2.2          
##  [61] zlibbioc_1.36.0          RCurl_1.98-1.3           prettyunits_1.1.1       
##  [64] openssl_1.4.4            ashr_2.2-47              haven_2.4.1             
##  [67] ggrepel_0.9.1            fs_1.5.0                 magrittr_2.0.1          
##  [70] data.table_1.14.0        reprex_2.0.0             truncnorm_1.0-8         
##  [73] mvtnorm_1.1-2            SQUAREM_2021.1           amap_0.8-18             
##  [76] hms_1.1.0                evaluate_0.14            xtable_1.8-4            
##  [79] XML_3.99-0.6             emdbook_1.3.12           jpeg_0.1-8.1            
##  [82] readxl_1.3.1             compiler_4.0.3           biomaRt_2.46.3          
##  [85] bdsmatrix_1.3-4          KernSmooth_2.23-20       V8_3.4.2                
##  [88] crayon_1.4.1             htmltools_0.5.1.1        GOstats_2.56.0          
##  [91] geneplotter_1.68.0       lubridate_1.7.10         DBI_1.1.1               
##  [94] dbplyr_2.1.1             MASS_7.3-54              rappdirs_0.3.3          
##  [97] ShortRead_1.48.0         Matrix_1.3-4             cli_3.0.0               
## [100] pkgconfig_2.0.3          GenomicAlignments_1.26.0 numDeriv_2016.8-1.1     
## [103] xml2_1.3.2               annotate_1.68.0          bslib_0.2.5.1           
## [106] XVector_0.30.0           AnnotationForge_1.32.0   rvest_1.0.0             
## [109] VariantAnnotation_1.36.0 digest_0.6.27            graph_1.68.0            
## [112] Biostrings_2.58.0        rmarkdown_2.9            cellranger_1.1.0        
## [115] edgeR_3.32.1             GSEABase_1.52.1          GreyListChIP_1.22.0     
## [118] curl_4.3.2               Rsamtools_2.6.0          gtools_3.9.2            
## [121] rjson_0.2.20             lifecycle_1.0.0          jsonlite_1.7.2          
## [124] askpass_1.1              limma_3.46.0             BSgenome_1.58.0         
## [127] fansi_0.5.0              pillar_1.6.1             lattice_0.20-44         
## [130] fastmap_1.1.0            httr_1.4.2               survival_3.2-11         
## [133] GO.db_3.12.1             glue_1.4.2               png_0.1-7               
## [136] bit_4.0.4                Rgraphviz_2.34.0         stringi_1.6.2           
## [139] sass_0.4.0               blob_1.2.1               latticeExtra_0.6-29     
## [142] caTools_1.18.2           memoise_2.0.0            DOT_0.1                 
## [145] irlba_2.3.3