# BREAST HEIGHT DIAMETER AND HEIGHT OF PINE ################################################################################ #import data file into R pine=read.table("http://user.mendelu.cz/drapela/Forest_Biometry/Data/pine1a.txt",sep='\t',header=TRUE) head(pine) # SIMPLE LINEAR MODEL BETWEEN ONE RESPONSE AND ONE EXPLANATORY VARIABLE # RELATIONSHIP BETWEEN DBH AND HEIGHT ########################################################################## #scatterplot par(mfrow=c(1,1)) plot(pine$h~pine$d13, xlab="d13 [cm]", ylab="h [m]") # BASIC MEASURES OF CORRELATION # covariance #by formula sum((pine$d13-mean(pine$d13)) * (pine$h-mean(pine$h)))/ (length(pine$d13)-1) #by function pinecov=cov(pine$h,pine$d13) #Pearson correlation coeficient #by formula pinecor=pinecov/(sd(pine$d13)*sd(pine$h)) pinecor #by function cor(pine$h, pine$d13) #Spearman correlation coeficient cor(pine$h,pine$d13, method="spearman") #test of significance of corr.coef. cor.test(pine$h,pine$d13 ) # PARAMETER ESTIMATION OF LINEAR MODEL # parameters of linear model lm_01<-lm(pine$h~pine$d13) #possible outputs of model "lm_01" lm_01 summary(lm_01) anova(lm_01) names(lm_01) lm_01$coefficients lm_01$coefficients[1] lm_01$coefficients[2] out<-summary(lm_01) names(out) out$sigma out$r.squared #only coefficients of the model coef(lm_01) #residuals of the model residuals(lm_01)[1:20] #fitted values of the model fitted(lm_01)[1:20] #confidence intervals of model parameters confint(lm_01) #plot of the model with measured and model values plot(pine$h~pine$d13, xlab="diameters [cm]", ylab="heights[m]") abline(lm_01, col="blue", lty="dashed") abline(a=13.287,b=0.30478, col="red", lty="dashed") #computing of confidence a prediction intervals of the model library(dplyr) library (HH) tmp=data.frame(matrix(NA, nrow = 61, ncol = 1)) tmp tmp_ci<-predict.lm(lm_01, new=tmp,interval="confidence") tmp_pi<-predict.lm(lm_01, new= tmp, interval="prediction") tmp_ci head(tmp_ci) head(tmp_pi) ci<-data.frame(cbind(tmp_ci, tmp_pi[,-1])) head(ci) ci cii1=arrange(ci,fit) cii1 p1=pine$d13 p1=as.data.frame(p1) p2=arrange(p1,p1) p2 #plot of confidence a prediction intervals of the model plot(pine$h~pine$d13, xlab="diameters [cm]", ylab="heights [m]" ) matlines(p2, cii1, lty=c("solid", "dashed", "dashed", "dashed", "dashed"), col=c("blue", "red", "red", "green", "green")) # confidence and prediction intervals - function "ci.plot" - package "HH" pine1=data.frame(x=pine$d13,y=pine$h) lm1=lm(y ~ x, data=pine1) ci.plot(lm1) #diagnostics of residuals res<-residuals(lm_01) #box plot boxplot(res, main="Box plot for residuals of the model lm_01", notch=TRUE, col="lightgreen",boxwex=0.3) abline(0,0,col="red") #histogram hist(res, col="lightblue", main="Histogram of residuals of the model lm_01",ylim=c(0,0.3),probability=TRUE,ylab="density", xlab="residuals") lines(density(res), col="red", lty="dashed") res1<-seq(min(res)-2,max(res)+2,0.1) lines(res1,dnorm(res1, mean(res), sd(res)),col="green") #QQ plot qqnorm(res, main="QQ plot of residuals for model lm_01") qqline(res, col="red") #statistics of residuals library(pastecs) data1<-res options(scipen=100) tab_res<-stat.desc(data1,norm=T) tab_res<-round(tab_res,2) tab_res options(scipen=0) #test of normality of residuals shapiro.test(res) # Breusch-Pagan test against heteroskedasticity library(lmtest) bptest(lm_01) #autocorrelation test - Durbin Watson test dwtest(lm_01,alternative="two.sided") # saving of influential points according to several measures infl<-influence.measures(lm_01) # list of "suspected" values (possible influential points) summary(infl) #bubble plot of influential plot influencePlot(lm_01, main="Influence Plot", sub="Circle size is proportial to Cook's Distance", id.n=5 ) # 4 plots of various measures of influnetial points with indentification of 5 most "suspected" points influenceIndexPlot(lm_01,id.method="y",id.n=5) #selected diagnostic plots for linear model plot(lm_01)