# Randomize a set of data, compute mean difference, record % exceeding observed mean # # filename is the name of the file containing the data - one column of data w/ header # n1 and n2 are the sample sizes for the two independent sets of data # iterations is the number of times to loop through the process -- I usually use 10000 resample.u.between <- function(filename,n1,n2,iterations) { #Read in data and initialize variables score <- read.table(filename,header=TRUE) names(score)[1] <- 'var1' nsample <- length(score$var1) startn2 <- n1+1 target <- abs(mean(score$var1[1:n1]) - mean(score$var1[startn2:nsample])) y <- 1 ucomp <- vector() uarray <- vector() #Loop starts here while (y <= iterations) { #Randomize the sample perm <- sample(score$var1,replace=F) #cat(perm,"\n") #Get the two samples and compute the absolute difference in means mean.first <- mean(perm[1:n1]) mean.second <- mean(perm[startn2:nsample]) mean.diff <- abs(mean.first - mean.second) #Compare mean.diff to target and store in ucomp array if (mean.diff >= target) { ucomp [y] <- 1 } else { ucomp [y] <- 0 } y <- y+1 } #Loop ends above here #Compute mean of ucomp which is proportion of u >= target uexceed <- mean(ucomp) #cat(uarray,"\n") #cat(ucomp,"\n") cat("Observed mean difference: ",target,"\n") cat("Proportion exceeding observed mean difference: ",uexceed) } resample.u.between('sampletimes.txt',11,9,10000) For testing: filename <- 'NPS_GTM_WebEx.txt' n1 <- 36 n2 <- 31 iterations <- 10000