# Run quality control template first to generate [bcbioRNADataSet]
source("setup.R")
library(DESeq2)
data(bcb) # only works if rda saved in .data

# Design formula. Uncomment with correct values.
# design <- formula(~genotype)

# Contrast. Uncomment with correct values.
# 1. Design matrix parameter.
# 2. Numerator for LFC (expt).
# 3. Denominator for LFC (control).
# @seealso [DESeq2::results()]
# contrast <- c("genotype", "mutant", "wildtype")

# Significance cutoffs
alpha <- 0.05
lfc <- 0

Overview

> metadata_table(bcb)
Sample metadata
group
ctrl
ctrl
ko
ko

> dds <- DESeqDataSetFromTximport(
+     txi = txi(bcb),
+     colData = colData(bcb),
+     design = design) %>%
+     DESeq
> rld <- rlog(dds)
> save_data(dds, rld, dir = data_dir)

Alpha level (FDR) cutoffs

Let’s take a look at the number of genes we get with different false discovery rate (FDR) cutoffs. These tests subset P values that have been multiple test corrected using the Benjamini Hochberg (BH) method (Benjamini and Hochberg 1995).

> alpha_summary(dds)
0.1 0.05 0.01 0.001 1e-06
LFC > 0 (up) 179, 0.61% 142, 0.48% 93, 0.32% 58, 0.2% 25, 0.085%
LFC < 0 (down) 140, 0.48% 104, 0.35% 58, 0.2% 38, 0.13% 15, 0.051%
outliers 0, 0% 0, 0% 0, 0% 0, 0% 0, 0%
low counts 11599, 39% 11046, 38% 11046, 38% 10494, 36% 16570, 56%
cutoff (mean count < 7) (mean count < 6) (mean count < 6) (mean count < 5) (mean count < 74)

Results

> res_unshrunken <- results(
+     dds,
+     contrast = contrast,
+     alpha = alpha)
> 
> # DESeqResults with shrunken log2 fold changes (LFC).
> # Still being prototyped, see `?lfcShrink` (DESeq 1.16).
> res_shrunken <- lfcShrink(
+     dds = dds,
+     contrast = contrast,
+     res = res_unshrunken)
> 
> # Compare these two results and pick which one to report
> res <- res_shrunken
> save_data(res, res_unshrunken, res_shrunken, dir = data_dir)

We performed the analysis using a BH adjusted P value cutoff of 0.05 and a log fold-change (LFC) ratio cutoff of 0.

Plots

Mean average (MA)

An MA plot compares transformed counts on M (log ratio) and A (mean average) scales (Y. H. Yang et al. 2002).

> plot_ma(res)

Volcano

A volcano plot compares significance (BH-adjusted P value) against fold change (log2) (Cui and Churchill 2003; Li et al. 2014). Genes in the green box with text labels have an adjusted P value are likely to be the top candidate genes of interest.

> plot_volcano(bcb, res = res, lfc = lfc)