####################
#Demonstrating randomization tests
####################
library(dplyr)
library(tidyr)
#read in the data
d= read.csv("results.no.outliers.csv")
#############################
##randomization test for small numbers####
##This calcluates the complete set of randomizations; this can only be done for small numbers
#############################
#I am going to use the "combinat" package to create the permutations easily
#install the package
install.packages("combinat")
#load the package so you can use it. You have to load packages that you want to use every time you start R
library(combinat)
#define our dataset. I am just making these up.
dataset=c(-.71,.86,.07,-.71,-1.21,-.09)
#calculate the sum of all values; this will be useful later
total.sum=sum(dataset)
#Use the combn() function to get all of the permutations of 3 values (6 choose 3)
combinations=combn(c(dataset),3)
#Let's call these 3 values the control group (leaving the other 3 as the target group). Here we sum them.
#The apply() function is very useful. It applies a function to every item in a column (when MARGIN=2) or in a row (when MARGIN=1)
control.sums=apply(combinations, MARGIN=2, FUN=sum)
#here we calculate the mean
control.means=apply(combinations, MARGIN=2, FUN=mean)
#And here is where you see why the sums were useful. We can simply subtract the control sum from the total sum to obtain the target sum, and that way derive the target mean without ever directly specifying the target group
#Notice that R will subtract vectors from each other; this is really useful!
target.means=(total.sum-control.sums)/3
#Now we subtract the target means from the control means to obtain the mean differences
mean.diffs=control.means-target.means
#We can plote the mean differences in a histogram if we want:
hist(mean.diffs, breaks=12)
#Here we can calculate the difference that we observed in our experiment
observed.diff=(sum(-.71,.86,.07)/3)-(sum(-.71,-1.21,-.09)/3)
#And finally we can calculate the p-value:
p=length(mean.diffs[mean.diffs>=observed.diff])/length(mean.diffs)
#############################
##randomization test for large numbers
##This won't calculate the full set of randomizations, but it will calculate a large number of them
##Note that this is independent samples, which is probably not what we want
#############################
#first, let's combine our two groups into one group, as per the claim of the null hypothesis
#Let's do this for differences between means of the two "long" sentences
wh=subset(d, dependencyLength == "lg")
combined=wh$zscores
#create 9,999 randomizations
#we create one less than the number we want so that we can add the correction factor of +1 at the end
permuted = replicate(n=9999, expr=sample(combined))
#create a function to calculate mean differences. This treats the first 56 as the control group, and the last 56 as the target group
#This is a really useful feature of R. If you find yourself performing the same calculation over and over, you can simply define a function to do it.
#Also, some built-in functions like apply() require you to specify a function by name to apply to the rows/columns; if the function you want to run doesn't exist, you need to create it!
mean.diff <- function(dataset){
mean(dataset[1:56], na.rm=T) - mean(dataset[57:112], na.rm=T)
}
#now apply this new function to all of the permutations we created above
diffs = apply(X=permuted, MARGIN=2, FUN=mean.diff)
#now let's calculate the p-value
#first we calculate our observed difference
means = wh %>%
group_by(embeddedStructure) %>%
summarize(m = mean(zscores, na.rm=TRUE)) %>%
ungroup()
observed.diff=as.numeric(means[2,2] - means[1,2])
#Then we count the number of results in our simulation that are equal to or greater than our observed difference, and add 1 as the correction factor
numerator=length(diffs[diffs>=observed.diff])+1
#Then we count the number of simulations we ran, and add 1 as a correction factor
denominator=length(diffs)+1
#Finally we put them together to calculate the p-value
p=numerator/denominator
#############################
##A repeated measures randomization test##
#############################
wh=subset(d, dependencyLength == "lg")
wh = droplevels(wh)
#To run this, we need to do two things to pre-process the data
#First, we need only one observation per participant per condition. This means averaging the two observations per condition for each participant.
#Second, we need to reshape the data into a type of wide format, such that the two conditions for each participant are on the same row
#within-subject averaging
wh.averaged = wh %>%
group_by(subject, embeddedStructure) %>%
summarize(m = mean(zscores, na.rm=T))
#reshaping to wide format
#we can use the spread() function from tidyr to do this
wh.wide=spread(data=wh.averaged, key=embeddedStructure, value=m)
#we only need the scores, not the subject IDs, so we can use select() to only select the scores
wh.wide.scores = select(wh.wide, non, isl)
#Step 2: create a vector to house our reference distribution
reference=rep(0,times=9999)
#Step 3: run 9999 reshuffles, calculating the mean difference each time
for(i in 1:9999){
reshuffle=apply(X=wh.wide.scores, MARGIN=1, FUN=sample) #this reshuffles the values in each row once
means=apply(X=reshuffle, MARGIN=1, FUN=mean, na.rm=T) #this calculates the mean for each row of reshuffle (each row is a condition)
mean.difference=diff(rev(means)) #rev() reverses the order of the values in means; diff() caclulates the difference
reference[i]=mean.difference #this puts the result in our reference vector
}
#Alternate Steps 2 and 3: Combine step2 and step 3 into one command using replicate()
#basically, the argument expr= contains the three commands from the for-loop. See if you can unpack this!
reference=replicate(n=9999, expr=diff(apply(X=apply(X=wh.wide.scores, MARGIN=1, FUN=sample), MARGIN=1, FUN=mean, na.rm=T)))
#Step 4: Calculate the p-value
#first we calculate our observed difference
means = wh %>%
group_by(embeddedStructure) %>%
summarize(m = mean(zscores, na.rm=TRUE)) %>%
ungroup()
observed.diff=as.numeric(means[2,2] - means[1,2])
#Then we count the number of results in our simulation that are equal to or greater than our observed difference, and add 1 as the correction factor
numerator=length(reference[reference>=observed.diff])+1
#Then we count the number of simulations we ran, and add 1 as a correction factor
denominator=length(reference)+1
#Finally we put them together to calculate the p-value
p=numerator/denominator
################################################
#Demonstrating that the alpha level is the maximum Type I Error Rate
###############################################
#take 2 samples from a normal distribution 10,000 times; each sample is 50 (x 2 = 100)
#this represents running 10,000 experiments
#notice that each experiment really should be a NULL result because they came from the same population
boot.replicates=replicate(10000, expr=rnorm(100))
#calculate an anova for each replicate
#define a function to calcluate the t-test
t.function <- function(dataset){
t1=t.test(dataset[1:50], dataset[51:100], var.equal=T)
return(t1$p.value)
}
#We ccould also define a function to calculate an ANOVA if we want to use F instead of t
#anova.function <- function(dataset){
# d = data.frame(y = dataset, x = rep(0:1, each=50))
# d.aov = aov(y~x, data=d)
# return(summary(d.aov)[[1]][5][[1]][1]) #this returns only the p-value from the summary function
#}
#Next we apply that function to each column of "experiments", because each column represents the three samples from the experiment
experiments = apply(X=boot.replicates, MARGIN=2, FUN=t.function)
#Next set an alpha level
alpha=.05
#Calculate how many type 1 errors occurred for each contrast; this should be very near the alpha level (but not quite because we are randomly sampling a finite number of samples)
sum(experiments<=alpha)/10000
#######################
##Multiple Comparisons and the Dunn correction##
#######################
################################################
#Demonstrating that the alpha level is the maximum per-comparison Type I Error Rate
###############################################
#take 2 sample from a normal distribution 10,000 times; each sample is 50 (x 2 = 100)
#this represents running 10,000 experiments
#notice that each experiment really should be a NULL result because they came from the same population
boot.replicates=replicate(10000, expr=rnorm(100))
#calculate an anova for each replicate
#first we define a function to calcluate the anova
anova.function <- function(dataset){
d = data.frame(x = dataset, y = rep(0:1, each=50))
d.aov = aov(y~x, data=d)
return(summary(d.aov)[[1]][5][[1]][1]) #this returns only the p-value from the summary function
}
#Next we apply that function to each column of "experiments", because each column represents the three samples from the experiment
experiments = apply(X=boot.replicates, MARGIN=2, FUN=anova.function)
#Next set an alpha level
alpha=.05
#Calculate how many type 1 errors occurred for each contrast; this should be very near the alpha level (but not quite because we are randomly sampling a finite number of samples)
sum(experiments<=alpha)/10000
################################################
#Demonstrating that experimentwise/familywise error is larger than the alpha level when there are multiple comparisons in an experiment (or family)
###############################################
#take 3 samples from a normal distribution 10,000 times; each sample is 50 (x 3 = 150)
#this represents running 10,000 experiments, and doing 3 contrasts for each experiment
#notice that each experiment really should be a NULL result because they came from the same population
boot.replicates=replicate(10000, expr=rnorm(150))
#Define a function to calculate t-test for all three contrasts for every experiment
#first we define a function to calcluate the three t-tests
t.function <- function(dataset){
t1=t.test(dataset[1:50], dataset[51:100], var.equal=T)
t2=t.test(dataset[1:50], dataset[101:150], var.equal=T)
t3=t.test(dataset[51:100], dataset[101:150], var.equal=T)
return(c(t1$p.value,t2$p.value,t3$p.value))
}
#Next we apply that function to each column of "experiments", because each column represents the three samples from the experiment
experiments = apply(X=boot.replicates, MARGIN=2, FUN=t.function)
#Calculate the per comparison error rate
alpha = .05
sum(experiments0)
}
#Next we apply that function to all 10000 experimental results
experimentwise.errors=apply(X=experiments, MARGIN=2, FUN=error.function)
#And finally count the number of experiments that had at least one error. This is the experimentwise error, and it is much larger than .05
sum(experimentwise.errors)/10000
#The equation for experimentwise error
alpha=.05
Ncontrasts=3
ewer=1-(1-alpha)^Ncontrasts
##################
#Demonstrating the Dunn correction
##################
#First we define a function to look for at least one error in the three contrasts using the bonferroni level of .05/3
dunn.function <- function(dataset){
errors=sum(dataset<=(.05/3))
return(errors>0)
}
#Next we apply that function to all 10000 experimental results
dunn.errors=apply(X=experiments, MARGIN=2, FUN=dunn.function)
#And finally count the number of experiments that had at least one error. This is the experimentwise error, and it is much larger than .05
sum(dunn.errors)/10000