######################################################################
# Code for "A guide to QTL mapping with R/qtl"
# Karl W Broman and Saunak Sen
#
# Chapter 4: Single-QTL analysis
#######################################################################

######################################################################
# 4.1 Marker regression
######################################################################

# Load R/qtl and the hyper data
library(qtl)
data(hyper)

# Phenotype by genotype for two markers in the hyper data
par(mfrow=c(1,2))
plot.pxg(hyper, "D4Mit214")
plot.pxg(hyper, "D12Mit20")

# single-QTL scan by marker regression with the hyper data
out.mr <- scanone(hyper, method="mr")

# results for chromosome 12
out.mr[ out.mr$chr == 12, ]

# summary of out.mr
summary(out.mr, threshold=3)

# The biggest peak
max(out.mr)

# plot of marker regression results for chr 4 and 12
plot(out.mr, chr=c(4, 12), ylab="LOD score")

######################################################################
# 4.2 Interval mapping
######################################################################

###################################
# 4.2.1 Standard interval mapping
###################################

# Calculate genotype probabilities for the hyper data
hyper <- calc.genoprob(hyper, step=1, error.prob=0.001)

# illustrate the use of jittermap
hyper <- jittermap(hyper)

# Standard interval mapping for the hyper data
out.em <- scanone(hyper, method="em")

# Equivalent to the previous command
out.em <- scanone(hyper)

# Plot results of scanone with method="em"
plot(out.em, ylab="LOD score")

# Plot interval mapping and marker regression results together
plot(out.em, out.mr, chr=c(4, 12), col=c("blue", "red"),
     ylab="LOD score")

# Equivalent to the previous code
plot(out.em, chr=c(4, 12), col="blue", ylab="LOD score")
plot(out.mr, chr=c(4, 12), col="red", add=TRUE)

# A black/white version of the plot
plot(out.em, out.mr, chr=c(4, 12), col="black", lty=1:2,
     ylab="LOD score")

###################################
# 4.2.2 Haley--Knott regression
###################################

# Haley--Knott regression with the hyper data
out.hk <- scanone(hyper, method="hk")

# Plot results of EM and H-K together
plot(out.em, out.hk, chr=c(1,4,15), col=c("blue","red"),
     ylab="LOD score")

# Difference between H-K and EM results for hyper data
plot(out.hk - out.em, chr=c(1,4,15), ylim=c(-0.5, 1.0),
     ylab=expression(LOD[HK] - LOD[EM]))
abline(h=0, lty=3)

# Illustration of fancy math symbols in plots
plot(rnorm(100), rnorm(100), xlab=expression(hat(mu)[0]),
      ylab=expression(alpha^beta),
      main=expression(paste("Plot of ", alpha^beta, 
          " versus ", hat(mu)[0])))

###################################
# 4.2.3 Extended Haley--Knott regression
###################################

# extended Haley--Knott method with the hyper data
out.ehk <- scanone(hyper, method="ehk")

# Plot the three methods together
plot(out.em, out.hk, out.ehk, chr=c(1,4,15), ylab="LOD score",
     lty=c(1,1,2))

# Equivalent to the above
plot(out.em, chr=c(1,4,15), ylab="LOD score")
plot(out.hk, chr=c(1,4,15), col="blue", add=TRUE)
plot(out.ehk, chr=c(1,4,15), col="red", lty=2, add=TRUE)

# Plot differences between LOD scores
plot(out.hk - out.em, out.ehk - out.em, chr=c(1, 4, 15),
     col=c("blue", "red"), ylim=c(-0.5, 1),
     ylab=expression(LOD[HK] - LOD[EM]))
abline(h=0, lty=3)

###################################
# 4.2.4 Multiple imputation
###################################

# Perform multiple imputations with hyper data
hyper <- sim.geno(hyper, step=1, n.draws=64, error.prob=0.001)

# Interval mapping by multiple imputation with hyper data
out.imp <- scanone(hyper, method="imp")

# Plot imputation and em results for hyper data
plot(out.em, out.imp, chr=c(1,4,15), col=c("blue", "red"),
     ylab="LOD score")

# Difference between H-K and EM results for hyper data
plot(out.imp - out.em, chr=c(1,4,15), ylim=c(-0.5, 0.5),
     ylab=expression(LOD[IMP] - LOD[EM]))
abline(h=0, lty=3)

###################################
# 4.2.5 Comparison of methods
###################################

# Omit data from two chr 1 markers in listeria data
data(listeria)
mar2drop <- markernames(listeria, chr=1)[2:12]
listeria <- drop.markers(listeria, mar2drop)

# Run standard interval mapping 
listeria <- calc.genoprob(listeria, step=1, error.prob=0.001)
outl.em <- scanone(listeria, chr=1)
outl.hk <- scanone(listeria, chr=1, method="hk")
outl.ehk <- scanone(listeria, chr=1, method="ehk")
plot(outl.em, outl.hk, outl.ehk, ylab="LOD score", 
     lty=c(1,1,2))

# drop markers in hyper data having most missing data
data(hyper)
nt.mar <- ntyped(hyper, "mar")
mar2drop <- names(nt.mar[nt.mar < 92])
hyper.rev <- drop.markers(hyper, mar2drop)

# omit intermediate individuals' genotype data
nm.ind <- nmissing(hyper.rev)
ind2drop <- nm.ind > 0
for(i in 1:nchr(hyper))
  hyper.rev$geno[[i]]$data[ind2drop,] <- NA

# scanone with all individuals
hyper.rev <- calc.genoprob(hyper.rev, step=1, 
                           error.prob=0.001)
out1.em <- scanone(hyper.rev)
out1.hk <- scanone(hyper.rev, method="hk")
out1.ehk <- scanone(hyper.rev, method="ehk")

# plot LOD curves for EM, HK and eHK against one another
plot(out1.em, out1.hk, out1.ehk, ylab="LOD score", 
     lty=c(1,1,2))

# plot differences HK-EM and eHK-EM
plot(out1.hk-out1.em, out1.ehk-out1.em, col=c("blue", "red"),
     ylim=c(-0.1, 4), ylab=expression(LOD[HK] - LOD[EM]))
abline(h=0, lty=3)

# scanone with only the genotyped individuals
hyper.ex <- subset(hyper.rev, ind = !ind2drop)
out2.em <- scanone(hyper.ex)
out2.hk <- scanone(hyper.ex, method="hk")
out2.ehk <- scanone(hyper.ex, method="ehk")

# plot difference in LOD scores with fully genotyped individuals vs everyone
plot(out2.em-out1.em, out2.hk-out1.em, out2.ehk-out1.em,
     ylim=c(-0.1, 0.2), col=c("black", "green", "red"), 
     lty=c(1,3,2), ylab="Difference in LOD scores")
abline(h=0, lty=3)

# Time to run calc.genoprob
data(hyper)
system.time(hyper <- calc.genoprob(hyper, step=0.25, 
                                   error.prob=0.001))

# Time to run scanone
system.time(test.hk <- scanone(hyper, method="hk"))
system.time(test.ehk <- scanone(hyper, method="ehk"))
system.time(test.em <- scanone(hyper, method="em"))

# Time to do multiple imputation
system.time(hyper <- sim.geno(hyper, step=0.25, 
                              error.prob=0.001, n.draws=32))
system.time(test.imp <- scanone(hyper, method="imp"))

######################################################################
# 4.3 Significance thresholds
######################################################################

# Calculate genotype probabilities for the hyper data
data(hyper)
hyper <- calc.genoprob(hyper, step=1, error.prob=0.001)

# Code to illustrate a permutation test 
operm <- scanone(hyper, n.perm=1000, verbose=FALSE)

# First five permutation results 
operm[1:5]

# histogram of the permutation results
plot(operm)

# LOD thresholds
summary(operm, alpha=c(0.20, 0.05))

# define strata for hyper data
strat <- (ntyped(hyper) > 100)

# Code to illustrate a stratified permutation test 
operms <- scanone(hyper, n.perm=1000, perm.strata=strat,
                  verbose=FALSE)

summary(operms, alpha=c(0.20, 0.05))

# peaks meeting 10% significance level
summary(out.em, perms=operms, alpha=0.1)

# P-values for the peak LOD scores
summary(out.em, perms=operms, alpha=0.1, pvalues=TRUE)

# Upper confidence limit on true p-value.
binom.test(0, 1000)$conf.int

######################################################################
# 4.4 The X chromosome
######################################################################

###################################
# 4.4.1 Analysis
###################################

###################################
# 4.4.2 Significance thresholds
###################################

###################################
# 4.4.3 Example
###################################

# Access the iron data and make a plot
library(qtlbook)
data(iron)
plot(iron, pheno=1:2)

# scatterplot of log2(liver) and log2(spleen) in the iron data
plot(log2(liver) ~ log2(spleen), data=iron$pheno, 
     col=c("red", "blue")[iron$pheno$sex],
     pch=c(1,4)[iron$pheno$sex])

# Genome scan for the iron data
iron$pheno[,1] <- log2(iron$pheno[,1])
iron <- calc.genoprob(iron, step=1, error.prob=0.001)
out.liver <- scanone(iron)

# peaks with LOD>3
summary(out.liver, 3)

# autosome- and X-chr-specific permutation tests
operm.liver <- scanone(iron, n.perm=1000, perm.Xsp=TRUE,
                       verbose=FALSE)

# LOD thresholds
summary(operm.liver, alpha=0.05)

# Summary with p-values
summary(out.liver, perms=operm.liver, alpha=0.05, 
        pvalues=TRUE)

# Create strata
strat <- as.numeric(iron$pheno$sex) + iron$pheno$pgm*2
table(strat)

# Perform stratified permutation test
operm.liver.strat <- scanone(iron, n.perm=1000, perm.Xsp=TRUE,
                             perm.strata=strat, verbose=FALSE)

# LOD thresholds from stratified permutation test
summary(operm.liver.strat, alpha=0.05)

######################################################################
# 4.5 Interval estimates of QTL location
######################################################################

# LOD support and Bayes credible intervals for hyper, chr 4
lodint(out.em, 4, 1.5)
bayesint(out.em, 4, 0.95)

# Intervals for hyper, chr 4, expanded to the nearest flanking markers
lodint(out.em, 4, 1.5, expandtomarkers=TRUE)
bayesint(out.em, 4, 0.95, expandtomarkers=TRUE)

# bootstrap for QTL location
out.boot <- scanoneboot(hyper, chr=4, n.boot=1000)

# Get bootstrap confidence interval
summary(out.boot)

######################################################################
# 4.6 QTL effects
######################################################################

# find marker nearest 29.5 cM on chr 4
find.marker(hyper, 4, 29.5)

# effectplot with hyper data, marker D4Mit164
hyper <- sim.geno(hyper, n.draws=16, error.prob=0.001)
effectplot(hyper, mname1="D4Mit164")

# Get the estimates from effectplot
eff <- effectplot(hyper, mname1="D4Mit164", draw=FALSE)
eff

# Plot phenotypes against genotype at D4Mit164
plot.pxg(hyper, marker="D4Mit164")

######################################################################
# 4.7 Multiple phenotypes
######################################################################

# Add log(liver) and log(spleen) to the phenotypes
data(iron)
iron$pheno <- cbind(iron$pheno[,1:2], 
                    log2liver=log2(iron$pheno$liver),
                    log2spleen=log2(iron$pheno$spleen),
                    iron$pheno[,3:4])

# Genome scan with log(liver)
iron <- calc.genoprob(iron, step=1, error.prob=0.001)
out.logliver <- scanone(iron, pheno.col=3)

# Genome scan, referring to the phenotype by name.
out.logliver <- scanone(iron, pheno.col="log2liver")

# Genome scan, referring to the phenotype by name.
out.logliver <- scanone(iron, 
                        pheno.col=log2(iron$pheno$liver))

# Genome scan with all four phenotypes
out.all <- scanone(iron, pheno.col=1:4)

# Look at the beginning of out.all
out.all[1:5,]

# summary of out.all
summary(out.all, threshold=3, lodcolumn=4)

# summary of out.all with format="allpheno"
summary(out.all, threshold=3, format="allpheno")

# summary of out.all with format="allpeaks"
summary(out.all, threshold=3, format="allpeaks")

# plot of all LOD curves
plot(out.all, lodcolumn=1:2, col=c("blue", "red"), 
     chr=c(2, 7, 8, 9, 16), ylim=c(0,12.7), ylab="LOD score")
plot(out.all, lodcolumn=3:4, col=c("blue", "red"), lty=2,
     chr=c(2, 7, 8, 9, 16), add=TRUE)

# Permutations for all 4 iron phenotypes
operm.all <- scanone(iron, pheno.col=1:4, n.perm=1000, 
                     perm.Xsp=TRUE)

# LOD thresholds for all phenotypes
summary(operm.all, alpha=0.05)

# summary with 5% thresholds calculated from perms, and with pvalues
summary(out.all, format="allpeaks", perms=operm.all, 
        alpha=0.05, pvalues=TRUE)

# end of chap04.R