# PREPARATION OF THE DATA # BREAST HEIGHT DIAMETER AND HEIGHT OF PINE ################################################################################ #import data file into R data<-read.table("http://user.mendelu.cz/drapela/Forest_Biometry/Data/plots.txt",sep='\t',header=TRUE) data$id<-paste(data$id_plot, data$id_tree, sep="-") head(data) tmp1<-data[,c(9,3,4,5)] head(tmp1) tmp1$d13<-tmp1$d13/10 tmp2<-tmp1[tmp1$species=='PINE',] head(tmp2) pine<-tmp2[!is.na(tmp2$h), ] head(pine) pine<-pine[,-2] head(pine) # SIMPLE LINEAR MODEL BETWEEN ONE RESPONSE AND ONE EXPLANATORY VARIABLE # RELATIONSHIP BETWEEN DBH AND HEIGHT ########################################################################## #scatterplot par(mfrow=c(1,1)) plot(pine$h~pine$d13, xlab="d13 [cm]", ylab="h [m]") #better scatterplot from car library with boxplots library(car) scatterplot(pine$d13,pine$h, id.n=5, lwd =2,lty.smooth = 5, lty.spread =3,lwd.smooth=1,lwd.spread=1) # BASIC MEASURES OF CORRELATION # covariance #by formula sum((pine$d13-mean(pine$d13)) * (pine$h-mean(pine$h)))/ (length(pine$d13)-1) #by function cov(pine$h,pine$d13) #Pearson correlation coeficient #by formula sum((pine$d13-mean(pine$d13)) * (pine$h-mean(pine$h)))/ (length(pine$d13)-1) / sd(pine$d13)/sd (pine$h) #by function cor(pine$h, pine$d13) #Spearman correlation coeficient cor(pine$h,pine$d13, method="spearman") #test of significance of corr.coef. cor.test(pine$h,pine$d13 ) # PARAMETER ESTIMATION OF LINEAR MODEL # parameters of linear model lm_01<-lm(pine$h~pine$d13) #possible outputs of model "lm_01" lm_01 summary(lm_01) anova(lm_01) names(lm_01) lm_01$coefficients lm_01$coefficients[1] lm_01$coefficients[2] out<-summary(lm_01) names(out) out$sigma out$r.squared #only coefficients of the model coef(lm_01) #residuals of the model residuals(lm_01)[1:20] #fitted values of the model fitted(lm_01)[1:20] #confidence intervals of model parameters confint(lm_01) #plot of the model with measured and model values plot(pine$h~pine$d13, xlab="diameters [cm]", ylab="heights[m]") abline(lm_01, col="blue", lty="dashed") plot(pine$h~pine$d13, xlab="diameters [cm]", ylab="heights[m]") abline(a=13.287,b=0.30478, col="red", lty="dashed") #computing of confidence a prediction intervals of the model tmp<-data.frame(d13=pine[order(pine$d13),2]) head(tmp) tmp_ci<-predict.lm(lm_01, new=tmp,interval="confidence") tmp_pi<-predict.lm(lm_01, new= tmp, interval="prediction") head(tmp_ci) head(tmp_pi) ci<-data.frame(cbind(tmp_ci, tmp_pi[,-1])) cii<-ci[order(ci$fit),] head(cii) #plot of confidence a prediction intervals of the model plot(pine$h~pine$d13, xlab="diameters [cm]", ylab="heights [m]" ) matlines(tmp$d13, cii, lty=c("solid", "dashed", "dashed", "dashed", "dashed"), col=c("blue", "red", "red", "green", "green")) # confidence and prediction intervals - function "ci.plot" - package "HH" pine1=data.frame(x=pine$d13,y=pine$h) lm1=lm(y ~ x, data=pine1) ci.plot(lm1) #diagnostics of residuals res<-residuals(lm_01) #box plot boxplot(res, main="Box plot for residuals of the model lm_01", notch=TRUE, col="lightgreen",boxwex=0.3) abline(0,0,col="red") #histogram hist(res, col="lightblue", main="Histogram of residuals of the model lm_01",ylim=c(0,0.3),probability=TRUE,ylab="density", xlab="residuals") lines(density(res), col="red", lty="dashed") res1<-seq(min(res)-2,max(res)+2,0.1) lines(res1,dnorm(res1, mean(res), sd(res)),col="green") #QQ plot qqnorm(res, main="QQ plot of residuals for model lm_01") qqline(res, col="red") #statistics of residuals library(pastecs) data1<-res options(scipen=100) tab_res<-stat.desc(data1,norm=T) tab_res<-round(tab_res,2) tab_res options(scipen=0) #test of normality of residuals shapiro.test(res) # Breusch-Pagan test against heteroskedasticity library(lmtest) bptest(lm_01) #autocorrelation test - Durbin Watson test dwtest(lm_01,alternative="two.sided") # saving of influential points according to several measures infl<-influence.measures(lm_01) # list of "suspected" values (possible influential points) summary(infl) #bubble plot of influential plot influencePlot(lm_01, main="Influence Plot", sub="Circle size is proportial to Cook's Distance", id.n=5 ) # 4 plots of various measures of influnetial points with indentification of 5 most "suspected" points influenceIndexPlot(lm_01,id.method="y",id.n=5) #selected diagnostic plots for linear model plot(lm_01)