Simulation Example
November 12, 2014
The motivation for this example has to do with different ways one can calculate individual differences with regard to the effect of some manipulation that pushes around cortisol levels.
As a general resource for simulation in R, I highly recommend Hadley Wickham’s simulation presntation. Also, the psych
package has some great tools for generating data (with great support in this helpful guide by William Revelle).
library(plyr)
library(ggplot2)
library(reshape2)
Set parameters here
cortPopMeanT1 <- 10 # Mean of population
cortPopSDT1 <- 2# SD of population
cortIndivDiffMean <- 4 # Mean of true absolute difference
cortIndivDiffSD <- 1 # SD of true absolute difference
epsilonM <- 0 # Mean of error
epsilonSD <- 1 # SD of error
N <- 100 # sample size
ITERS <- 1000
expRawDiff <- cortIndivDiffMean
expPercDiff <- cortIndivDiffMean/cortPopMeanT1
1. Make a function to let us draw from some distributions without worrying about their parameterization.
We can also use this to get each person’s difference score.
someSample <- function(n,popmean,popsd,dist='gamma'){
if(dist == 'gamma'){
shape=popmean^2/popsd^2
scale=popsd^2/popmean
return(rgamma(n,shape=shape,scale=scale))
}
if(dist == 'norm'){
return(rnorm(n,mean=popmean,sd=popsd))
}
}
2. Now, let’s write a function to build our data frame
prePostDFgen <- function(n,meanInit,sdInit,meanDiff,sdDiff,eM,eSD,distInit='gamma',distDiff='norm'){
#Returns a DF with c('pre','post') %in% names(DF)
initSamp <- someSample(n,meanInit,sdInit,distInit)
diffSamp <- someSample(n,meanDiff,sdDiff,distDiff)
errSampI <- rnorm(n,eM,eSD)
errSampD <- rnorm(n,eM,eSD)
data.frame(
pre=initSamp+errSampI,
post=initSamp+diffSamp+errSampD)
}
3. Now, three functions to calculate the within-person changes
testDF<-prePostDFgen(
N,
cortPopMeanT1,
cortPopSDT1,
cortIndivDiffMean,
cortIndivDiffSD,
epsilonM,
epsilonSD,
distInit='gamma',
distDiff='norm')
rawDiff <- function(pre,post) post-pre # pre and post should be vectors
percDiff <- function(pre,post) (post-pre)/pre
residDiff <- function(pre,post) resid(lm(post~1+pre))
rawDiffMean <- function(pre,post) mean(rawDiff(pre,post))
percDiffMean <- function(pre,post) mean(percDiff(pre,post))
residDiffMean <- function(pre,post) mean(residDiff(pre,post))
rawDiffSD <- function(pre,post) sd(rawDiff(pre,post))
percDiffSD <- function(pre,post) sd(percDiff(pre,post))
residDiffSD <- function(pre,post) sd(residDiff(pre,post))
corrDiffTests <- function(pre,post) {
aDF<-data.frame(
rawDiff = rawDiff(pre,post),
percDiff = percDiff(pre,post),
residDiff = residDiff(pre,post)
)
aCor<-cor(aDF)
aList<-as.list(aCor[lower.tri(aCor)])
names(aList)<-combn(colnames(aCor),2,FUN=paste,collapse='_')
aList
}
4. For each iteration, we want to do some things to the DF and record our results.
getPrePostSummary <- function(pre,post,listOfStatsFuncs){
library(plyr)
llply(listOfStatsFuncs,
function(aFunc){
rez<-aFunc(pre,post)
rez
})
}
I want to put the list of functions together so we can pass it to our summary function.
listOfFunc <- list(rawDiffMean, rawDiffSD, percDiffMean, percDiffSD, residDiffMean, residDiffSD, corrDiffTests)
names(listOfFunc) <- c('rawDiffMean', 'rawDiffSD', 'percDiffMean', 'percDiffSD', 'residDiffMean', 'residDiffSD','corrDiffTests')
5. Let’s put it all together.
simRezzies <- ldply(
1:ITERS,
function(x){
aDF <- prePostDFgen(
N,
cortPopMeanT1,
cortPopSDT1,
cortIndivDiffMean,
cortIndivDiffSD,
epsilonM,
epsilonSD,
distInit='gamma',
distDiff='norm')
sumStats <- getPrePostSummary(aDF$pre, aDF$post, listOfFunc)
as.data.frame(sumStats)
})
Here’s a sample of a one of our simulated data sets
testDF<-prePostDFgen(
N,
cortPopMeanT1,
cortPopSDT1,
cortIndivDiffMean,
cortIndivDiffSD,
epsilonM,
epsilonSD,
distInit='gamma',
distDiff='norm')
ggplot(melt(testDF),aes(x=value))+
geom_histogram(aes(fill=variable,y=..density..),position=position_dodge())+
geom_density(alpha=.2, aes(fill=variable))+theme(panel.background=element_rect(fill='#FFFFFF'))
## No id variables; using all as measure variables
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
Here’s the result!
simRezzies$RawBias <- expRawDiff-simRezzies$rawDiffMean
simRezzies$PercBias <- expPercDiff-simRezzies$percDiffMean
library(knitr)
kable(simRezzies,digits=2)
rawDiffMean | rawDiffSD | percDiffMean | percDiffSD | residDiffMean | residDiffSD | corrDiffTests.rawDiff_percDiff | corrDiffTests.rawDiff_residDiff | corrDiffTests.percDiff_residDiff | RawBias | PercBias |
---|---|---|---|---|---|---|---|---|---|---|
4.01 | 1.73 | 0.44 | 0.26 | 0 | 1.62 | 0.87 | 0.94 | 0.67 | -0.01 | -0.04 |
4.09 | 1.49 | 0.43 | 0.22 | 0 | 1.42 | 0.87 | 0.95 | 0.69 | -0.09 | -0.03 |
3.95 | 1.74 | 0.43 | 0.23 | 0 | 1.55 | 0.92 | 0.89 | 0.68 | 0.05 | -0.03 |
4.23 | 1.81 | 0.48 | 0.27 | 0 | 1.70 | 0.85 | 0.94 | 0.65 | -0.23 | -0.08 |
4.02 | 1.82 | 0.45 | 0.35 | 0 | 1.67 | 0.80 | 0.92 | 0.59 | -0.02 | -0.05 |
4.29 | 1.72 | 0.46 | 0.23 | 0 | 1.64 | 0.88 | 0.95 | 0.71 | -0.29 | -0.06 |
4.04 | 1.89 | 0.44 | 0.25 | 0 | 1.81 | 0.88 | 0.95 | 0.71 | -0.04 | -0.04 |
4.28 | 1.98 | 0.47 | 0.29 | 0 | 1.86 | 0.87 | 0.94 | 0.68 | -0.28 | -0.07 |
3.94 | 1.67 | 0.43 | 0.22 | 0 | 1.66 | 0.86 | 0.99 | 0.79 | 0.06 | -0.03 |
3.92 | 1.73 | 0.44 | 0.25 | 0 | 1.69 | 0.89 | 0.98 | 0.78 | 0.08 | -0.04 |
4.33 | 1.61 | 0.47 | 0.21 | 0 | 1.61 | 0.79 | 1.00 | 0.79 | -0.33 | -0.07 |
4.22 | 1.65 | 0.46 | 0.25 | 0 | 1.58 | 0.85 | 0.96 | 0.69 | -0.22 | -0.06 |
3.61 | 1.39 | 0.41 | 0.23 | 0 | 1.33 | 0.80 | 0.96 | 0.62 | 0.39 | -0.01 |
3.99 | 1.50 | 0.46 | 0.28 | 0 | 1.39 | 0.84 | 0.93 | 0.63 | 0.01 | -0.06 |
3.75 | 1.62 | 0.42 | 0.22 | 0 | 1.51 | 0.89 | 0.94 | 0.71 | 0.25 | -0.02 |
3.90 | 1.92 | 0.43 | 0.28 | 0 | 1.83 | 0.83 | 0.95 | 0.66 | 0.10 | -0.03 |
4.22 | 1.75 | 0.44 | 0.22 | 0 | 1.74 | 0.87 | 1.00 | 0.82 | -0.22 | -0.04 |
4.10 | 1.66 | 0.45 | 0.22 | 0 | 1.64 | 0.90 | 0.99 | 0.82 | -0.10 | -0.05 |
4.25 | 1.49 | 0.46 | 0.21 | 0 | 1.45 | 0.87 | 0.98 | 0.75 | -0.25 | -0.06 |
3.77 | 1.75 | 0.40 | 0.22 | 0 | 1.74 | 0.87 | 1.00 | 0.84 | 0.23 | 0.00 |
4.07 | 1.80 | 0.44 | 0.22 | 0 | 1.76 | 0.89 | 0.98 | 0.80 | -0.07 | -0.04 |
3.90 | 1.86 | 0.41 | 0.27 | 0 | 1.80 | 0.88 | 0.97 | 0.76 | 0.10 | -0.01 |
4.23 | 1.67 | 0.46 | 0.25 | 0 | 1.60 | 0.88 | 0.96 | 0.72 | -0.23 | -0.06 |
… |
sumSimRezzies<-as.data.frame(sapply(simRezzies,mean))
kable(sumSimRezzies,col.names='Mean')
Mean | |
---|---|
rawDiffMean | 4.0030541 |
rawDiffSD | 1.7294373 |
percDiffMean | 0.4341326 |
percDiffSD | 0.2447261 |
residDiffMean | 0.0000000 |
residDiffSD | 1.6614440 |
corrDiffTests.rawDiff_percDiff | 0.8648646 |
corrDiffTests.rawDiff_residDiff | 0.9610976 |
corrDiffTests.percDiff_residDiff | 0.7275404 |
RawBias | -0.0030541 |
PercBias | -0.0341326 |