###################################################################### # Code for "A guide to QTL mapping with R/qtl" # Karl W Broman and Saunak Sen # # Chapter 6: Experimental design and power ####################################################################### ###################################################################### # 6.1 Phenotypes and covariates ###################################################################### ###################################################################### # 6.2 Strains and strain surveys ###################################################################### ###################################################################### # 6.3 Theory ###################################################################### ################################### # 6.3.1 Variance attributable to a locus ################################### ################################### # 6.3.2 Residual error variance ################################### ################################### # 6.3.3 Information content ################################### ###################################################################### # 6.4 Examples with R/qtlDesign ###################################################################### ################################### # 6.4.1 Functions ################################### ################################### # 6.4.2 Choosing a cross ################################### # Load R/qtlDesign library(qtlDesign) # LOD threshold for a mouse backcross thresh(G=1440, cross="bc", p=0.05) # LOD threshold for a mouse intercross thresh(G=1440, cross="f2", p=0.05) # Detectable effects for a sample size of 100 detectable(cross="bc", n=100, sigma2=1, thresh=3.2) detectable(cross="f2", n=100, sigma2=1, thresh=4.2) # Sample size in intercross samplesize(cross="f2", effect=c(5,0), env.var=64, gen.var=25, thresh=4.2) # Sample size in intercross for larger genetic variance samplesize(cross="f2", effect=c(5,0), env.var=64, gen.var=50, thresh=4.2) samplesize(cross="f2", effect=c(5,0), env.var=64, gen.var=100, thresh=4.2) # Sample sizes in a backcross samplesize(cross="bc", effect=5, env.var=64, gen.var=25, thresh=3.2) samplesize(cross="bc", effect=5, env.var=64, gen.var=50, thresh=3.2) samplesize(cross="bc", effect=5, env.var=64, gen.var=100, thresh=3.2) # LOD threshold in RIL by sibling mating thresh(G=1440*4, cross="bc", p=0.05) # Sample size in RIL samplesize(cross="ri", effect=5, env.var=64, gen.var=25, thresh=3.8) # RIL sample size with biological replicates samplesize(cross="ri", effect=5, env.var=64, gen.var=25, thresh=3.8, bio.rep=2) samplesize(cross="ri", effect=5, env.var=64, gen.var=25, thresh=3.8, bio.rep=4) samplesize(cross="ri", effect=5, env.var=64, gen.var=25, thresh=3.8, bio.rep=16) samplesize(cross="ri", effect=5, env.var=64, gen.var=25, thresh=3.8, bio.rep=100) ################################### # 6.4.3 Genotyping strategies ################################### # Optimal marker spacing for intercross when all individuals are typed optspacing(cost=1/300, G=1440, sel.frac=1, cross="f2") # Optimal marker spacing and selection fraction for an intercross optspacing(cost=10/3000, G=1440, sel.frac=NULL, cross="f2") ################################### # 6.4.4 Phenotyping strategies ################################### # Plot illustrating phenotyping strategies library(qtl) mp <- sim.map(len=rep(100,5), n.mar=11, include.x=FALSE, eq.spacing=TRUE) cr <- sim.cross(mp, model=c(1,50,1,0), n.ind=200, type="f2") idx40 <- mma(pull.geno(cr, chr=1), p=40) cr <- calc.genoprob(cr, step=2) out1 <- scanone(cr) out2 <- scanone(subset(cr, ind=idx40$cList)) out3 <- scanone(subset(cr, ind=sample(1:200, 40))) plot(out1, out2, out3, ylab="LOD score") ################################### # 6.4.5 Fine mapping ################################### # Median length of confidence intervals in backcross and intercross ci.length(cross="bc", n=250, effect=5, p=0.95, gen.var=25, env.var=64) ci.length(cross="f2", n=250, effect=c(5,0), p=0.95, gen.var=25, env.var=64) ###################################################################### # 6.5 Other experimental populations ###################################################################### ###################################################################### # 6.6 Estimating power and precision by simulation ###################################################################### # load R/qtl and the map10 object library(qtl) data(map10) # simulate under null n.sim <- 10000 res0 <- rep(NA, n.sim) for(i in 1:n.sim) { x <- sim.cross(map10[1:19], n.ind=250, type="f2") x <- calc.genoprob(x, step=1) out <- scanone(x, method="hk") res0[i] <- max(out[,3]) } # estimated LOD threshold print(thr <- quantile(res0, 0.95)) # genome length print(G <- sum(summary(map10)[1:19,"length"])) # Estimate significance threshold thresh(G, "f2", d=10, p=0.05) # histogram of Genome-wide maximum LOD scores hist(res0, breaks=100, xlab="Genome-wide maximum LOD score") rug(res0) # simulate data with one QTL alpha <- sqrt(2*0.08/(1-0.08)) n.sim <- 10000 loda <- est <- lo <- hi <- rep(NA, n.sim) for(i in 1:n.sim) { x <- sim.cross(map10[1], n.ind=250, type="f2", model=c(1, 54, alpha, 0)) x <- calc.genoprob(x, step=1) out <- scanone(x, method="hk") loda[i] <- max(out[,3]) temp <- out[out[,3]==loda[i],2] if(length(temp) > 1) temp <- sample(temp, 1) est[i] <- temp li <- lodint(out) lo[i] <- li[1,2] hi[i] <- li[nrow(li),2] } # estimated power mean(loda >= thr) # Estimate power with R/qtlDesign powercalc("f2", 250, sigma2=1, effect=c(alpha,0), thresh=thr, theta=0.09) # histogram of genome-wide maximum LOD scores hist(loda, breaks=100, xlab="Maximum LOD score") # histogram of genome-wide maximum LOD scores hist(est, breaks=100, xlab="Estimated QTL location (cM)") rug(map10[[1]]) # Average estimated location mean(est) # SD of estimated locations sd(est) # SD of estimated location when LOD exceeds threshold sig <- (loda >= thr) sd(est[sig]) # Estimated coverage of 1.5-LOD support interval mean(lo <= 54 & hi >= 54) # Estimated coverage when LOD exceeds threshold mean(lo[sig] <= 54 & hi[sig] >= 54) # histogram of width of 1.5-LOD support interval hist(hi[sig]-lo[sig], breaks=100, xlab="Width of 1.5-LOD support interval (cM)") # end of chap06.R