############# TESTING OF STATISTICAL HYPOTHESIS ############## data<-read.table("H:/vyuka/ForestBiometry/data/plots.txt",sep='\t',header=TRUE) tmp<-data[!is.na(data$h_live), ] library(lattice) histogram(~h_live|species,data=data) shapiro.test(tmp[tmp$species=='SPRUCE',]$h_live) shapiro.test(tmp[tmp$species=='BEECH',]$h_live) shapiro.test(tmp[tmp$species=='beech',]$h_live) shapiro.test(tmp[tmp$species=='PINE',]$h_live) par(mfrow=c(2,2)) qqnorm(data[data$species=='SPRUCE',]$h_live, main="QQ plot h_live - spruce") qqline(data[data$species=='SPRUCE',]$h_live, col="red") qqnorm(data[data$species=='BEECH',]$h_live, main="QQ plot h_live - beech") qqline(data[data$species=='BEECH',]$h_live, col="red") qqnorm(data[data$species=='beech',]$h_live, main="QQ plot h_live - beech") qqline(data[data$species=='beech',]$h_live, col="red") qqnorm(data[data$species=='PINE',]$h_live, main="QQ plot h_live - pine") qqline(data[data$species=='PINE',]$h_live, col="red") #histograms for "cleaned" data mis1<-min(tmp[tmp$species=='SPRUCE',]$h_live)-5 mas1<-max(tmp[tmp$species=='SPRUCE',]$h_live)+5 mib1<-min(tmp[tmp$species=='BEECH',]$h_live)-5 mab1<-max(tmp[tmp$species=='BEECH',]$h_live)+5 mio1<-min(tmp[tmp$species=='beech',]$h_live)-5 mao1<-max(tmp[tmp$species=='beech',]$h_live)+5 mip1<-min(tmp[tmp$species=='PINE',]$h_live)-5 map1<-max(tmp[tmp$species=='PINE',]$h_live)+5 par(mfrow=c(2,2)) hist(tmp[tmp$species=='SPRUCE',]$h_live, col="lightblue", main="Histogram h-live - spruce", probability=TRUE,ylab="density", xlab="h-live [m]") lines(density(tmp[tmp$species=='SPRUCE',]$h_live), col="red", lty="dashed") lines(seq(mis1, mas1,0.1), dnorm(seq(mis1, mas1,0.1), mean(tmp[tmp$species=='SPRUCE',]$h_live), sd(tmp[tmp$species=='SPRUCE',]$h_live)) , col="green") hist(tmp[tmp$species=='BEECH',]$h_live, col="lightblue", main="Histogram h-live - beech", probability=TRUE,ylab="density", xlab="h-live [m]") lines(density(tmp[tmp$species=='BEECH',]$h_live), col="red", lty="dashed") lines(seq(mib1, mab1,0.1), dnorm(seq(mib1, mab1,0.1), mean(tmp[tmp$species=='BEECH',]$h_live), sd(tmp[tmp$species=='SPRUCE',]$h_live)) , col="green") hist(tmp[tmp$species=='beech',]$h_live, col="lightblue", main="Histogram h-live - beech", probability=TRUE,ylab="density", xlab="h-live [m]") lines(density(tmp[tmp$species=='beech',]$h_live), col="red", lty="dashed") lines(seq(mio1, mao1,0.1), dnorm(seq(mio1, mao1,0.1), mean(tmp[tmp$species=='beech',]$h_live), sd(tmp[tmp$species=='SPRUCE',]$h_live)) , col="green") hist(tmp[tmp$species=='PINE',]$h_live, col="lightblue", main="Histogram h-live - pine", probability=TRUE,ylab="density", xlab="h-live [m]") lines(density(tmp[tmp$species=='PINE',]$h_live), col="red", lty="dashed") lines(seq(mip1, map1,0.1), dnorm(seq(mip1, map1,0.1), mean(tmp[tmp$species=='PINE',]$h_live), sd(tmp[tmp$species=='SPRUCE',]$h_live)) , col="green") bhlive<-tmp[tmp$species=='BEECH',]$h_live length(bhlive) summary(bhlive) bhlive10<-sample(bhlive,10,replace=T) bhlive30<-sample(bhlive,30,replace=T) ############# PARAMETRIC TESTS ################ #################################################################### ## 1-S t-TEST OF MEAN (we test whether average value of h_live of beech is equal 11.5 m or not) #2-sided hypothesis t.test(bhlive,mu = 11.5, alternative="two.sided") t.test(bhlive10,mu = 11.5, alternative="two.sided") t.test(bhlive30,mu = 11.5, alternative="two.sided") #test for 1-sided alternative hypothesis - average value of h_live of beech is higher than 11.5 m t.test(bhlive,mu = 11.5, alternative="greater") t.test(bhlive10,mu = 11.5, alternative="greater") t.test(bhlive30,mu = 11.5, alternative="greater") #test pro 1-sided alternative hypothesis - average value of h_live of beech is lesser than 11.5 m t.test(bhlive,mu = 11.5, alternative="less") t.test(bhlive10,mu = 11.5, alternative="less") t.test(bhlive30,mu = 11.5, alternative="less") ## power of the test #power of 1-S t-test, if really important difference of mean of beech h-live from H0 is 1 m, 2 sided hypothesis, #alpha 0.05 power.t.test(n=length(bhlive), delta=1, sd=sd(bhlive),sig.level=0.05, power=NULL, type="one.sample", alternative="two.sided") power.t.test(n=length(bhlive10), delta=1, sd=sd(bhlive10),sig.level=0.05, power=NULL, type="one.sample", alternative="two.sided") power.t.test(n=length(bhlive30), delta=1, sd=sd(bhlive30),sig.level=0.05, power=NULL, type="one.sample", alternative="two.sided") #power of 1-S t-test, if really important difference of mean of beech h-live from H0 is 1 m, 2 sided hypothesis, #alpha 0.01 power.t.test(n=length(bhlive), delta=1, sd=sd(bhlive),sig.level=0.01, power=NULL, type="one.sample", alternative="two.sided") power.t.test(n=length(bhlive10), delta=1, sd=sd(bhlive10),sig.level=0.01, power=NULL, type="one.sample", alternative="two.sided") power.t.test(n=length(bhlive30), delta=1, sd=sd(bhlive30),sig.level=0.01, power=NULL, type="one.sample", alternative="two.sided") ## minimal detected effect by test #What difference between H0 and H1 (effect) is detected with probability 0.9 when we used sample size n #2 sided hypothesis, alpha 0.05 power.t.test(n=length(bhlive), delta=NULL, sd=sd(bhlive), sig.level=0.05, power=0.9, type="one.sample", alternative="two.sided") power.t.test(n=length(bhlive10), delta=NULL, sd=sd(bhlive10), sig.level=0.05, power=0.9, type="one.sample", alternative="two.sided") power.t.test(n=length(bhlive30), delta=NULL, sd=sd(bhlive30), sig.level=0.05, power=0.9, type="one.sample", alternative="two.sided") #What difference between H0 and H1 (effect) is detected with probability 0.9 when we used sample size n #2 sided hypothesis, alpha 0.01 power.t.test(n=length(bhlive), delta=NULL, sd=sd(bhlive), sig.level=0.01, power=0.9, type="one.sample", alternative="two.sided") power.t.test(n=length(bhlive10), delta=NULL, sd=sd(bhlive10), sig.level=0.01, power=0.9, type="one.sample", alternative="two.sided") power.t.test(n=length(bhlive30), delta=NULL, sd=sd(bhlive30), sig.level=0.01, power=0.9, type="one.sample", alternative="two.sided") ## minimal sample size for desired power and effect #minimal sample size for desired power 0.9 and practically important effect 1 m, #2 sided hypothesis, alpha 0.05 power.t.test(n=NULL, delta=1, sd=sd(bhlive), sig.level=0.05, power=0.9, type="one.sample", alternative="two.sided") power.t.test(n=NULL, delta=1, sd=sd(bhlive10), sig.level=0.05, power=0.9, type="one.sample", alternative="two.sided") power.t.test(n=NULL, delta=1, sd=sd(bhlive30), sig.level=0.05, power=0.9, type="one.sample", alternative="two.sided") #minimal sample size for desired power 0.9 and practically important effect 1 m, #2 sided hypothesis, alpha 0.01 power.t.test(n=NULL, delta=1, sd=sd(bhlive), sig.level=0.01, power=0.9, type="one.sample", alternative="two.sided") ## 2-S t-TEST OF MEAN #generating of distribution with different means and SDs #same means, different SDs x_25_3<-rnorm(100, 25, 3) x_30_3<-rnorm(100, 30, 3) #different both means and SDs x_25_6<-rnorm(100, 25, 6) x_30_2<-rnorm(100, 30, 2) #plot of generated distributions par(mfrow=c(1,1)) tmp<-seq(7,45, by=0.1) plot(tmp, dnorm(tmp, 25,3), xlim=c(7, 48), col="black", xlab="values", ylab="density",ylim=c(0,0.2)) points(tmp, dnorm(tmp, 30,3), col="red") points(tmp, dnorm(tmp, 25,6), col="blue") points(tmp, dnorm(tmp, 30,2), col="green") abline(v=25, col="blue", lty="dashed") abline(v=30, col="green", lty="dashed") legend("topright",pch=c(1, 1, 1, 1), legend=c("NR(25,3)", "NR(30,3)", "NR(25,6)", "NR(30,2)"),col=c("black", "red", "blue", "green")) ## 2-S test of equality of means for samples x_25_3 and x_30_3 #F test of variances equality - 2 sided hyp. var.test(x_25_3, x_30_3, alternative="two.sided") #Variances are equal in population, we use t-test for equal variances - 2- sided t.test(x_25_3, x_30_3, alternative="two.sided", var.equal=TRUE) #1 - sided alternative hyp.(mean of x_25_3 is greater than mean x_30_3 in population) t.test(x_25_3, x_30_3, alternative="greater", var.equal=TRUE) #1 - sided alternative hyp.(mean of x_25_3 is lesser than mean x_30_3 in population) t.test(x_25_3, x_30_3, alternative="less", var.equal=TRUE) ## 2-S test of equality of means for samples x_25_3 and x_30_2 #F test of variances equality - 2 sided hyp. var.test(x_25_3, x_30_2, alternative="two.sided") #Variances are not equal in population, we use t-test for nonequal variances - 2- sided alternative hyp. t.test(x_25_3, x_30_2, alternative="two.sided", var.equal=FALSE) #1 - sided alternative hyp.(mean of x_25_3 is greater than mean x_30_2 in population) t.test(x_25_3, x_30_2, alternative="greater", var.equal=FALSE) #1 - sided alternative hyp.(mean of x_25_3 is lesser than mean x_30_3 in population) t.test(x_25_3, x_30_2, alternative="less", var.equal=FALSE) ##power of the test of means (samples x_25_3 and x_30_3) #power of t-test when really important difference between population means of x_25_3 and x_30_3 is 1, #2-sided, alpha 0,05 power.t.test(n=100, delta=1, sd=3, sig.level=0.05, power=NULL, type="two.sample", alternative="two.sided") #minimal sample size for desired power of the test 0.9, when really important difference between population means #of x_25_3 and x_30_3 is 5 and 1, 2-sided, alpha 0,05 power.t.test(n=NULL, delta=5, sd=3, sig.level=0.05, power=0.9, type="two.sample", alternative="two.sided") power.t.test(n=NULL, delta=1, sd=3, sig.level=0.05, power=0.9, type="two.sample", alternative="two.sided") ## PAIRED t-TEST datacomp<-read.table("H:/ForestBiometry/data/data_comp.txt",sep='\t',quote='',header=TRUE) head(datacomp) # add id of place of measurement datacomp$id_pl<-seq(1, length(datacomp$id_tree), 1) #add new variables to dataframe datacomp - delta_foto (difference between diameters measured by caliper and #photo) and delta_crit (difference between diameters measured by caliper and Criterion) datacomp$delta_foto<-datacomp$da_calip-datacomp$da_foto datacomp$delta_crit<-datacomp$da_calip-datacomp$da_crit attach(datacomp) #boxplot with labels of biggest differences (> 100 mm) - possible gross errors and QQ plot par(mfrow=c(1,2)) boxplot(delta_foto, notch=TRUE, names=c("delta_foto"), main="Differences foto - caliper - boxplot") abline(0,0,col="red") text(1, delta_foto[abs(delta_foto)>100], labels=id_pl[abs(delta_foto)>100], pos=4) qqnorm(delta_foto, main="Differences foto - caliper - QQ plot") qqline(delta_foto,col="red") #boxplot, histogram and QQ plot for data without differences above 100 mm par(mfrow=c(1,3)) tmp<-delta_foto[abs(delta_foto) <= 100] boxplot(tmp, notch=TRUE, names=c("delta_foto"), main="Differences foto - caliper - boxplot") abline(0,0,col="red") hist(tmp, col="lightblue", main="Differences foto - caliper - histogram", probability=TRUE,ylab="density", xlab="difference [mm]") lines(density(tmp), col="red", lty="dashed") lines(seq(min(tmp, na.rm=TRUE)-50, max(tmp, na.rm=TRUE)+50,1), dnorm(seq(min(tmp, na.rm=TRUE)-50, max(tmp, na.rm=TRUE)+50,1), mean(tmp, na.rm=TRUE), sd(tmp, na.rm=TRUE) ), col="black") qqnorm(tmp, main="Differences foto - caliper - QQ plot") qqline(tmp,col="red") #shapiro wilk test of normality shapiro.test(tmp) #paired t-test, 2-sided hyp. t.test(da_calip[abs(delta_foto)<=100], da_foto[abs(delta_foto) <= 100], paired=TRUE, alternative="two.sided") #paired t-test, 1-sided hyp. t.test(da_calip[abs(delta_foto)<=100],da_foto[abs(delta_foto) <= 100], paired=TRUE, alternative="less") #paired t-test, 1-sided hyp. t.test(da_calip[abs(delta_foto)<=100], da_foto[abs(delta_foto) <= 100], paired=TRUE, alternative="greater") #power of the test #power of the paired t-test when really important difference in diameters between caliper and photo is 10 mm, #2 sided, alpha = 0,05 power.t.test(n=length(tmp), delta=10, sd=sd(delta_foto[abs(delta_foto) <= 100], na.rm=TRUE), sig.level=0.05, power=NULL, type="paired", alternative="two.sided") #minimal detected difference between photo and caliper - requested probability is 90% power.t.test(n=50, delta=NULL, sd=sd(delta_foto[abs(delta_foto) <= 100], na.rm=TRUE),sig.level=0.05, power=0.9, type="paired",alternative="two.sided") #minimal sample size with 90% probabilty that we detect at least 5 mm difference between photo and #caliper, 2- sided, alpha = 0,05 power.t.test(n=NULL, delta=5, sd=sd(tmp, na.rm=TRUE), sig.level=0.05, power=0.9, type="paired", alternative="two.sided")