#CHAPTER 13: PROVE THERE IS DIFFERENCE ######################################## #LIBRARY #################################################### library(ggplot2); library(pastecs); library(xlsx) library(compute.es); library(car); library(multcomp) library(effects); library(reshape); library(nlme) #################################################### #################################################### ##13.3. CONDUCT OF EFFECTS ANALYSES #################################################### #13.3.1. T-TEST TO COMPARE TWO MEANS #################################################### #PACKAGES: ggplot2, pastecs, xlsx #DOWNLOAD DATA # provided by the user to inform of location of data #LONG TABLE tmlong<- read.xlsx( "C:\\\\SkillsForCareerSecurity_Datasets.xlsx", sheetName="tblTwoMeanLong", header=TRUE) head(tmlong) str(tmlong) #WIDE TABLE # provided by the user to inform of location of data tmWide<- read.xlsx( "C:\\\\SkillsForCareerSecurity_Datasets.xlsx", sheetName="tblTwoMeanWide", header=TRUE) head(tmWide) str(tmWide) #EXPLORE THE DATA #BOXPLOT, MEANS AND POINTS boxPlt<- ggplot(tmlong, aes(Method, Score)) + geom_point() + geom_boxplot(size = 1) + geom_jitter() + geom_point(stat="summary", fun="mean", color = "black", size = 4, shape = 17) + theme( plot.title=element_text(size=14,face="bold"), axis.text=element_text(size=14,face="bold"), axis.title=element_text(size=14,face="bold")) boxPlt #DESCRIPTIVE by(tmlong$Score, tmlong$Method, stat.desc, basic=FALSE, norm=TRUE) #CONDUCT INDEPENDENT T TEST #WITH LONG DATA ind.t.test.l<- t.test(Score ~ Method, data = tmlong) ind.t.test.l #WITH WIDE DATA ind.t.test.w<- t.test(tmWide$Standard, tmWide$New) ind.t.test.w #CONDUCT DEPENDENT T TEST #WITH LONG DATA dep.t.test.l<- t.test(Score ~ Method, data = tmlong, paired=TRUE) dep.t.test.l #WITH WIDE DATA dep.t.test.w<- t.test(tmWide$Standard, tmWide$New, paired=TRUE) dep.t.test.w #################################################### #13.3.2. ONE-WAY ANOVA #################################################### #PACKAGES: ggplot2, pastecs, car, multicomp, xlsx #DOWNLOAD DATA # provided by the user to inform of location of data OrigAsTo<- read.xlsx( "C:\\\\SkillsForCareerSecurity_Datasets.xlsx", sheetName="tblBest", header=TRUE) AsTo<- OrigAsTo head(AsTo) str(AsTo) #ORDER VARIABLE TO SET LEVELS (CONDITIONS) AsTo$Strat<- factor(AsTo$Strat, levels=c("NoChng","DesignA","DesignB")) #EXPLORE DATA #GRAPH OF MEAN SCORE TO EACH STRATEGY Effect <- ggplot(AsTo, aes(Strat, Score)) Effect + stat_summary(fun = mean, geom = "line", size = 1, aes(group=1), colour = "black") + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 0.75) + stat_summary(fun = mean, geom = "point", size = 4) + stat_summary(fun = mean, geom = "point", size = 3) + labs(x = "Strategy", y = "Mean Score") + theme( plot.title=element_text(size=14,face="bold"), axis.text=element_text(size=14,face="bold"), axis.title=element_text(size=14,face="bold")) #DISCRIPTIVE STATISTICS by(AsTo$Score, AsTo$Strat, stat.desc, basic=FALSE, norm=TRUE) #LEVENE TEST FOR VARIANCE ACROSS STRATEGY leveneTest(AsTo$Score, AsTo$Strat, center= median) #ANOVA #ANOVA MODEL TO TEST FOR DIFFERENCE SuccessTest.aov<- aov(Score ~ Strat, data=AsTo) summary(SuccessTest.aov) #PLOT OF OF VARIANCE & NORMALITY OF RESIDUALS par(mfrow=c(2,2)) plot(SuccessTest.aov, lwd=3) par(mfrow=c(1,1)) #TEST WITH F TEST ADJUSTED FOR UNEQUAL VARIANCE oneway.test(Score ~ Strat, data=AsTo) ##BUILD CONTRAST #"NoChng","DesignA","DesignB" #FIRST CONTRAST contrast1<- c(-2,1,1) #SECOND CONTRAST contrast2<- c(0, 1,-1) #INSTALL CONTRASTS TO VARIABLE & CHECK contrasts(AsTo$Strat)<- cbind(contrast1,contrast2) #INSPECT AsTo$Strat #CALL MODEL UPON CONTRASTS modelContrasts<- aov(Score~Strat, data=AsTo) summary.lm(modelContrasts) #Note: Divide the p-value by 2 to reflect is a one-sided tail is the case. #POST HOC ANALYSIS #BONFERRONI & BENJAMINI-HOCHBERG (BH) #BONFERRONI pairwise.t.test(AsTo$Score, AsTo$Strat, p.adjust.method = "bonferroni") #BENJAMINI-HOCHBERG (BH) pairwise.t.test(AsTo$Score, AsTo$Strat, p.adjust.method = "BH") #TUKEY AND DUNNETT #TUKEY postHocs.T<- glht(SuccessTest.aov, linfct = mcp(Strat = "Tukey")) summary(postHocs.T) confint(postHocs.T) #DUNNETT postHocs.D<- glht(SuccessTest.aov, linfct = mcp(Strat = "Dunnett"), base = 1) summary(postHocs.D) confint(postHocs.D) #################################################### #13.3.3. ANALYSIS OF COVARIANCE, ANCOVA #################################################### #PACKAGES: ggplot2, car, pastecs, multcomp, xlsx #DOWNLOAD DATA # provided by the user to inform of location of data OrigAsToCov<- read.xlsx( "C:\\\\SkillsForCareerSecurity_Datasets.xlsx", sheetName="tblBestCov", header=TRUE) AsToCov<- OrigAsToCov head(AsToCov); str(AsToCov) #ORDER LEVELS TO THE VARIABLE AsToCov$Strat<- factor(AsToCov$Strat, levels=c("NoChng","DesignA","DesignB")) #EXPLORE DATA #GRAPHIC par(mfrow=c(1,2)) boxplot(Score ~ Strat, data = AsToCov, ylab="Score", xlab="Strategy", main="Score") boxplot(CovKPI ~ Strat, data = AsToCov, ylab="CovKPI", xlab="Strategy", main="CovKPI") par(mfrow=c(1,1)) #DESRIPTIVE by(AsToCov$Score, AsToCov$Strat, stat.desc, basic=FALSE, norm=TRUE) by(AsToCov$CovKPI, AsToCov$Strat, stat.desc, basic=FALSE, norm=TRUE) #LEVENE TEST FOR VARIANCE TO MEANS ACROSS GROUPS leveneTest(AsToCov$Score, AsToCov$Strat, center=median) #CHECK COVKPI IS INDEPENDENT OF DESIGN STRAT chkIndep<-aov(CovKPI ~ Strat, data = AsToCov) summary(chkIndep) summary.lm(chkIndep) #CREATE CONTRASTS contrasts(AsToCov$Strat)<- cbind(c(-2,1,1), c(0,-1,1)) #VIEW AND CONFIRM CONTRASTS AsToCov$Strat #ANCOVA MODEL AsToCovModel<- aov(Score ~ CovKPI + Strat, data = AsToCov) Anova(AsToCovModel, type = "III") #ADJUSTED MEANS FOR COVARIANT adjMeans<- effect("Strat", AsToCovModel, se=TRUE) summary(adjMeans) adjMeans$se #CONTRASTS IN THE MODEL summary.lm(AsToCovModel) #PLOT SCORE AND COVKPI AS SCATTER scatter.all<- ggplot(AsToCov, aes(CovKPI, Score)) scatter.all + geom_point(aes(shape=Strat),size = 3) + geom_smooth(method = "lm", alpha = 0.1) + labs(x = "CovKPI", y = "Score") + theme( plot.title=element_text(size=14,face="bold"), axis.text=element_text(size=14,face="bold"), axis.title=element_text(size=14,face="bold"), legend.title = element_text(size = 12), legend.text = element_text(size = 12)) #POST HOC TESTS IN ANCOVA postHocs<- glht(AsToCovModel, linfct = mcp(Strat = "Tukey")) summary(postHocs); confint(postHocs) #PLOT OF OF VARIANCE & NORMALITY OF RESIDUALS par(mfrow=c(2,2)) plot(AsToCovModel) par(mfrow=c(1,1)) #TEST FOR HOMOGENEITY OF REGRESSION SLOPE hoRS<- aov(Score ~ CovKPI*Strat, data = AsToCov) Anova(hoRS, type = "III") #GRAPHIC TO INSPECT HOMOGENIETY OF REGRESSIION SLOPE scatter.strat <- ggplot(AsToCov, aes(CovKPI, Score, shape=Strat, color=Strat)) scatter.strat + geom_point(size = 3) + geom_smooth(method = "lm", alpha = 0.1) + labs(x = "CovKPI", y = "Score") + theme( plot.title=element_text(size=14,face="bold"), axis.text=element_text(size=14,face="bold"), axis.title=element_text(size=14,face="bold"), legend.title = element_text(size = 12), legend.text = element_text(size = 12)) ################################################ #13.3.4. FACTORIAL ANOVA ################################################ #PACKAGES: ggplot2, car, pastecs, multcomp, nlme, xlsx #DOWNLOAD DATA # provided by the user to inform of location of data OrigAsToFactorial<- read.xlsx( "C:\\\\SkillsForCareerSecurity_Datasets.xlsx", sheetName="tblBestFactorial", header=TRUE) AsToFactorial<- OrigAsToFactorial head(AsToFactorial); str(AsToFactorial) #SET LEVELS TO STRAT VARIABLE AsToFactorial$Strat<- factor(AsToFactorial$Strat, levels=c("NoChng","DesignA","DesignB")) str(AsToFactorial$Strat) #EXPLORE DATA #GRAPHIC FactBox<- ggplot(AsToFactorial, aes(Strat, Score)) FactBox + geom_boxplot() + facet_wrap(~Grade) + geom_point(size = 2) + labs(x = "Design Category", y = "Mean Score") + theme( plot.title=element_text(size=14,face="bold"), axis.text=element_text(size=14,face="bold"), axis.title=element_text(size=14,face="bold"), strip.text = element_text(size=14)) #DISCRIPTIVE #TABLE INDIVIDUAL FACTORIALS by(AsToFactorial$Score, AsToFactorial$Strat, stat.desc, basic=FALSE, norm=TRUE) by(AsToFactorial$Score, AsToFactorial$Grade, stat.desc, basic=FALSE, norm=TRUE) #TABLE OF FACTORIALS AS INTERACTIVE by(AsToFactorial$Score, list(AsToFactorial$Strat, AsToFactorial$Grade), stat.desc, basic=FALSE, norm=TRUE) #LEVENE TEST OF HOMOGENIOUS VARIANCE #TEST IF VARIANCE BETWEEN SCORE AND EACH FACTORICAL VARIABLE leveneTest(AsToFactorial$Score, AsToFactorial$Strat, center=median) leveneTest(AsToFactorial$Score, AsToFactorial$Grade, center=median) #TEST OF VARIANCE ACROSS ALL SIX GROUPS leveneTest(AsToFactorial$Score, interaction(AsToFactorial$Strat,AsToFactorial$Grade), center=median) #CHOOSING CONTRASTS #CONSTRASTS FOR DESIGN contrasts(AsToFactorial$Strat)<- cbind(c(-2,1,1), c(0,-1,1)) AsToFactorial$Strat #CONTRASTS FOR GRADE contrasts(AsToFactorial$Grade)<- c(-1,1) AsToFactorial$Grade ##FITTING THE FACTORIAL ANOVA MODEL FactorialModel<- aov(Score~Grade+Strat+Grade:Strat, data=AsToFactorial) #Alternative code #FactorialModel.Alt<- aov(Score~Grade*Strat, data=AsToFactorial) #Run the model omnibus ANOVA Anova(FactorialModel, type="III") #GRAPHIC OF FINDINGS BY MODEL InterLine<- ggplot(AsToFactorial, aes(Strat, Score, color = Grade)) InterLine+ stat_summary(fun = mean, geom = "point") + stat_summary(fun = mean, geom = "line", aes(group= Grade), lwd=1) + stat_summary(fun.Grade =mean_cl_boot, geom = "errorbar", width = 0.2, lwd=1) + labs(x = "Strategy", y = "Score", colour = "Grade") + theme( plot.title=element_text(size=14,face="bold"), axis.text=element_text(size=14,face="bold"), axis.title=element_text(size=14,face="bold"), legend.title = element_text(size = 12), legend.text = element_text(size = 12)) ##INTERPRETING CONTRASTS FOR DIFFERENCE summary.lm(FactorialModel) #PLOTS IN FACTORIAL ANOVA par(mfrow=c(2,2)) plot(FactorialModel) par(mfrow=c(1,1)) ################################################ #13.3.5. ONE-WAY REPEATED MEASURES ################################################ #PACKAGES: ggplot2,pastecs, multcomp, nlme, reshape, xlsx #DOWNLOAD DATA # provided by the user to inform of location of data OrigBest1WayRepeat<- read.xlsx( "C:\\\\SkillsForCareerSecurity_Datasets.xlsx", sheetName="tblBest1WayRepeat", header=TRUE) Best1WayRepeat<- OrigBest1WayRepeat head(Best1WayRepeat); str(Best1WayRepeat) #MAKE IT LONG longBest1WayRepeat<- melt(Best1WayRepeat, id="Craft", measured=c("MethB1","MethA1","MethA2","MethB2")) #NAME THE COLUMNS names(longBest1WayRepeat)<- c("Craft","Design","Score") str(longBest1WayRepeat) #CONVERT DESIGN VARIABLE TO FACTOR WITH SUITABLE LEVELS longBest1WayRepeat$Design<- factor(longBest1WayRepeat$Design, labels=c("MethB1","MethA1","MethA2","MethB2")) str(longBest1WayRepeat) #SORT BY CRAFT PERSON (OPTIONAL) longBest1WayRepeat<-longBest1WayRepeat[order(longBest1WayRepeat$Craft),] longBest1WayRepeat head(longBest1WayRepeat); str(longBest1WayRepeat) #EXPLORE THE DATA #GRAPHIC Best1WayRepeatBoxplot<- ggplot(longBest1WayRepeat, aes(Design, Score)) Best1WayRepeatBoxplot + geom_boxplot() + labs(x = "Design", y = "Score") + theme( plot.title=element_text(size=14,face="bold"), axis.text=element_text(size=14,face="bold"), axis.title=element_text(size=14,face="bold")) #DESCRIPTIVE by(longBest1WayRepeat$Score, longBest1WayRepeat$Design, stat.desc, basic=FALSE, norm=TRUE) ##FORM CONTRASTS MethodAvsMethodB<- c(1,-1,-1,1) MethodA1VsMethodA2<- c(0,-1,1,0) MethodB1VsMethodB2<- c(-1,0,0,1) contrasts(longBest1WayRepeat$Design)<- cbind(MethodAvsMethodB, MethodA1VsMethodA2, MethodB1VsMethodB2) longBest1WayRepeat$Design ##WITH THE LME() MODEL Best1WayRepeatModel<-lme(Score~Design, random=~1|Craft/Design, data=longBest1WayRepeat, method='ML') summary(Best1WayRepeatModel) baseline1WayRepeat<-lme(Score~1, random=~1|Craft/Design, data=longBest1WayRepeat, method='ML') summary(baseline1WayRepeat) #COMPARE MODELS anova(baseline1WayRepeat, Best1WayRepeatModel) #POST HOC OF THE MODEL postHocs1WayRepeat<- glht(Best1WayRepeatModel, linfct=mcp(Design='Tukey')) summary(postHocs1WayRepeat) confint(postHocs1WayRepeat) #LEVENE TEST leveneTest(longBest1WayRepeat$Score, longBest1WayRepeat$Design, center=median) ################################################ #13.3.6. FACORIAL REPEATED-MEASURES ANOVA ################################################ #PACKAGES: ggplot2,pastecs, multcomp, nlme, reshape, xlsx #DOWNLOAD DATA # provided by the user to inform of location of data OrigBestFactRepeat<- read.xlsx( "C:\\\\SkillsForCareerSecurity_Datasets.xlsx", sheetName="tblBestFactRepeat", header=TRUE) BestFactRepeat<- OrigBestFactRepeat head(BestFactRepeat); str(BestFactRepeat) #MAKE IT LONG LongBestFactRepeat<- melt(BestFactRepeat, id = "Craft", measured = c("DesignBHigh", "DesignBLow", "DesingBNorm", "DesignAHigh", "DesignALow", "DesignANorm", "NoChngHigh", "NoChngLow", "NoChngNorm", "Craft")) names(LongBestFactRepeat)<- c("Craft", "Groups", "Score") head(LongBestFactRepeat) #CREATE THE FACTORIAL VARIABLES TO THE GROUPS VARIABLE LongBestFactRepeat$Design<-gl(3, 60, labels = c("DesignB", "DesignA", "NoChng")) LongBestFactRepeat$Difficult<-gl(3,20, 180, labels = c("High", "Low", "Norm")) #SORT THE EVENTS (OPTIONAL) LongBestFactRepeat<- LongBestFactRepeat[order(LongBestFactRepeat$Craft),] #EXPORING DATA #GRAPHIC BestFactRepeatBoxplot<- ggplot(LongBestFactRepeat, aes(Design, Score)) BestFactRepeatBoxplot + geom_boxplot() + facet_wrap(~Difficult, nrow = 1) + labs(x = "Type of Design", y = "Mean Score") + theme( plot.title=element_text(size=12,face="bold"), axis.text=element_text(size=12,face="bold"), axis.title=element_text(size=14,face="bold")) ##DESCRIPTIVE by(LongBestFactRepeat$Score, list(LongBestFactRepeat$Design, LongBestFactRepeat$Difficult), stat.desc, basic=F, norm = T) ##SETTING CONTRASTS ##DESIGN VARIABLE DesignVsNoChng<- c(1,1,-2) DesignBVsDesignA<- c(-1,1,0) contrasts(LongBestFactRepeat$Design) <-cbind(DesignVsNoChng,DesignBVsDesignA) LongBestFactRepeat$Design #DIFFICULT VARIABLE LowVsOther<- c(1,-2,1) HighVsNorm<- c(-1,0,1) contrasts(LongBestFactRepeat$Difficult)<- cbind(LowVsOther,HighVsNorm) LongBestFactRepeat$Difficult ##FACTORIAL REPEATED MEASURES DESIGN AS A GLM Baseline<- lme(Score ~1, random=~1|Craft/Design/Difficult, data=LongBestFactRepeat, method='ML') DesignModel<- update(Baseline, .~. + Design) DifficultModel<- update(DesignModel, .~. + Difficult) InteractModel<- update(DifficultModel, .~. + Design:Difficult) anova(Baseline, DesignModel, DifficultModel, InteractModel) #MODEL summary(InteractModel) #INSPECT FOR INTERACTION DesignDiffInt<- ggplot(LongBestFactRepeat, aes(Design, Score, shape= Difficult)) DesignDiffInt + stat_summary(fun = mean, geom = "point", size=5) + stat_summary(fun = mean, geom = "line", aes(group= Difficult), lwd=1) + stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, lwd=1) + labs(x = "Design", y = "Mean Score", colour = "Difficulty") + theme( plot.title=element_text(size=12,face="bold"), axis.text=element_text(size=12,face="bold"), axis.title=element_text(size=14,face="bold")) #POST HOC OF THE MODEL #CREATE MODEL WITH GROUPS AS FACTORIAL VARIABLE GrpSBestFactRepeatModel<-lme(Score~Groups, random=~1|Craft/Groups, data=LongBestFactRepeat, method='ML') #GENERATE POST HOC ON MODEL postHocsFactRepeat<- glht(GrpSBestFactRepeatModel, linfct=mcp(Groups='Tukey')) summary(postHocsFactRepeat) confint(postHocsFactRepeat)