#Reading the data
library(edgeR)
load("../tutorial/Day3/Counts.RData")
head(tmp)
Counts <- tmp$counts
colnames(Counts)
colnames(Counts) <- c("16N", "16T", "18N", "18T", "19N", "19T")
Counts
dim(Counts)
head(Counts)
rownames(Counts)
#Creating a DGEList object
dg <- DGEList(counts = Counts, genes = rownames(Counts))
dg
head(dg$counts)
head(dg$genes)

#Filtering genes with at least 1 cpm
countsPerMillion <- cpm(dgList)
summary(countsPerMillion)

countCheck <- countsPerMillion > 1
head(countCheck)

keep <- which(rowSums(countCheck) >= 2)
dg <- dg[keep,]
summary(cpm(dgList))

#Normalisation
dg <- calcNormFactors(dg, method = "TMM")
summary(dg)

#Data Exploration
plotMDS(dg)

#Setting up the Model
sampleType <- rep("N", ncol(dg))
sampleType
sampleType[grep("T", colnames(dgList))] <- "T"
sampleType
sampleReplicate <- paste("S", rep(1:3, each=2), sep = "")

designMat <- model.matrix(~sampleReplicate + sampleType)
designMat

#Estimating Dispersions
dgList <- estimateGLMCommonDisp(dgList, design = designMat)
dgList <- estimateGLMTrendedDisp(dgList, design = designMat)
dgList <- estimateGLMTagwiseDisp(dgList, design = designMat)

plotBCV(dgList)

#Differential Expression
fit <- glmFit(dgList, designMat)
lrt <- glmLRT(fit, coef = 4)
edgeR_result <- topTags(lrt)
?topTags
save(topTags(lrt, n=15000)$table, file = "tutorial/Day3/edgeR_Result.RData")

?decideTests
deGenes <- decideTestsDGE(lrt, p=0.001)
deGenes <- rownames(lrt)[as.logical(deGenes)]
plotSmear(lrt, de.tags = deGenes)
abline(h=c(-1,1), col=2)
