# LOAD THE DATA

A0 = data.frame(error=read.table("A0_curv1.m")[,2], curv1=read.table("A0_curv1.m")[,1], curv2=read.table("A0_curv2.m")[,1], medax=read.table("A0_medax.m")[,1], thick=read.table("A0_thick.m")[,1], centdist=read.table("A0_centdist.m")[,1], viewnorm=read.table("A0_viewnorm.m")[,1])
A1 = data.frame(error=read.table("A1_curv1.m")[,2], curv1=read.table("A1_curv1.m")[,1], curv2=read.table("A1_curv2.m")[,1], medax=read.table("A1_medax.m")[,1], thick=read.table("A1_thick.m")[,1], absdist=read.table("A1_absdist.m")[,1], absdiff=read.table("A1_absdiff.m")[,1], centdist=read.table("A1_centdist.m")[,1], viewnorm=read.table("A1_viewnorm.m")[,1])
A2 = data.frame(error=read.table("A2_curv1.m")[,2], curv1=read.table("A2_curv1.m")[,1],  curv2=read.table("A2_curv2.m")[,1],medax=read.table("A2_medax.m")[,1], thick=read.table("A2_thick.m")[,1], absdist=read.table("A2_absdist.m")[,1], absdiff=read.table("A2_absdiff.m")[,1], centdist=read.table("A2_centdist.m")[,1], viewnorm=read.table("A2_viewnorm.m")[,1])
A3 = data.frame(error=read.table("A3_curv1.m")[,2], curv1=read.table("A3_curv1.m")[,1],  curv2=read.table("A3_curv2.m")[,1],medax=read.table("A3_medax.m")[,1], thick=read.table("A3_thick.m")[,1], absdist=read.table("A3_absdist.m")[,1], absdiff=read.table("A3_absdiff.m")[,1], centdist=read.table("A3_centdist.m")[,1], viewnorm=read.table("A3_viewnorm.m")[,1])
A4 = data.frame(error=read.table("A4_curv1.m")[,2], curv1=read.table("A4_curv1.m")[,1], curv2=read.table("A4_curv2.m")[,1], medax=read.table("A4_medax.m")[,1], thick=read.table("A4_thick.m")[,1], absdist=read.table("A4_absdist.m")[,1], absdiff=read.table("A4_absdiff.m")[,1], centdist=read.table("A4_centdist.m")[,1], viewnorm=read.table("A4_viewnorm.m")[,1])
A5 = data.frame(error=read.table("A5_curv1.m")[,2], curv1=read.table("A5_curv1.m")[,1], curv2=read.table("A5_curv2.m")[,1], medax=read.table("A5_medax.m")[,1], thick=read.table("A5_thick.m")[,1], centdist=read.table("A5_centdist.m")[,1], viewnorm=read.table("A5_viewnorm.m")[,1])
A6 = data.frame(error=read.table("A6_curv1.m")[,2], curv1=read.table("A6_curv1.m")[,1], curv2=read.table("A6_curv2.m")[,1], medax=read.table("A6_medax.m")[,1], thick=read.table("A6_thick.m")[,1], absdist=read.table("A6_absdist.m")[,1], absdiff=read.table("A6_absdiff.m")[,1], centdist=read.table("A6_centdist.m")[,1], viewnorm=read.table("A6_viewnorm.m")[,1])
A7 = data.frame(error=read.table("A7_curv1.m")[,2], curv1=read.table("A7_curv1.m")[,1], curv2=read.table("A7_curv2.m")[,1], medax=read.table("A7_medax.m")[,1], thick=read.table("A7_thick.m")[,1], absdist=read.table("A7_absdist.m")[,1], absdiff=read.table("A7_absdiff.m")[,1], centdist=read.table("A7_centdist.m")[,1], viewnorm=read.table("A7_viewnorm.m")[,1])
A8 = data.frame(error=read.table("A8_curv1.m")[,2], curv1=read.table("A8_curv1.m")[,1], curv2=read.table("A8_curv2.m")[,1], medax=read.table("A8_medax.m")[,1], thick=read.table("A8_thick.m")[,1], absdist=read.table("A8_absdist.m")[,1], absdiff=read.table("A8_absdiff.m")[,1], centdist=read.table("A8_centdist.m")[,1], viewnorm=read.table("A8_viewnorm.m")[,1])
A9 = data.frame(error=read.table("A9_curv1.m")[,2], curv1=read.table("A9_curv1.m")[,1], curv2=read.table("A9_curv2.m")[,1], medax=read.table("A9_medax.m")[,1], thick=read.table("A9_thick.m")[,1], absdist=read.table("A9_absdist.m")[,1], absdiff=read.table("A9_absdiff.m")[,1], centdist=read.table("A9_centdist.m")[,1], viewnorm=read.table("A9_viewnorm.m")[,1])

# Transformation tests

cor(abs(A0$thick),      A0$error) # A0 TEST
cor(abs(A0$thick)^2, A0$error)
cor(abs(A1$thick),      A1$error) # A1 TEST
cor(abs(A1$thick)^2, A1$error)
cor(abs(A2$thick),      A2$error) # A2 TEST
cor(abs(A2$thick)^2, A2$error)
cor(abs(A3$thick),      A3$error) # A3 TEST
cor(abs(A3$thick)^2, A3$error)
cor(abs(A4$thick),      A4$error) # A4 TEST
cor(abs(A4$thick)^2, A4$error)
cor(abs(A5$thick),      A5$error) # A5 TEST
cor(abs(A5$thick)^2, A5$error)
cor(abs(A6$thick),      A6$error) # A6 TEST
cor(abs(A6$thick)^2, A6$error)
cor(abs(A7$thick),      A7$error) # A7 TEST
cor(abs(A7$thick)^2, A7$error)
cor(abs(A8$thick),      A8$error) # A8 TEST
cor(abs(A8$thick)^2, A8$error)
cor(abs(A9$thick),      A9$error) # A9 TEST
cor(abs(A9$thick)^2, A9$error)

# MORE SETUP

A0$X = cbind(sqrt(abs(A0$curv1)))
A1$X = cbind(A1$thick^2)

A0$X = cbind(abs(A0$curv1)^0.25, abs(A0$curv2)^0.25, abs(A0$curv1*A0$curv2)^0.25, abs((A0$curv1+A0$curv2)*0.5)^0.25, A0$medax, A0$thick^2, A0$centdist, A0$viewnorm)
A1$X = cbind(abs(A1$curv1)^0.25, abs(A1$curv2)^0.25, abs(A1$curv1*A1$curv2)^0.25, abs((A1$curv1+A1$curv2)*0.5)^0.25, A1$medax, A1$thick^2, A1$centdist, A1$viewnorm, A1$absdist, A1$absdiff)
A2$X = cbind(abs(A2$curv1)^0.25, abs(A2$curv2)^0.25, abs(A2$curv1*A2$curv2)^0.25, abs((A2$curv1+A2$curv2)*0.5)^0.25, A2$medax, A2$thick^2, A2$centdist, A2$viewnorm, A2$absdist, A2$absdiff)
A3$X = cbind(abs(A3$curv1)^0.25, abs(A3$curv2)^0.25, abs(A3$curv1*A3$curv2)^0.25, abs((A3$curv1+A3$curv2)*0.5)^0.25, A3$medax, A3$thick^2, A3$centdist, A3$viewnorm, A3$absdist, A3$absdiff)
A4$X = cbind(abs(A4$curv1)^0.25, abs(A4$curv2)^0.25, abs(A4$curv1*A4$curv2)^0.25, abs((A4$curv1+A4$curv2)*0.5)^0.25, A4$medax, A4$thick^2, A4$centdist, A4$viewnorm, A4$absdist, A4$absdiff)
A5$X = cbind(abs(A5$curv1)^0.25, abs(A5$curv2)^0.25, abs(A5$curv1*A5$curv2)^0.25, abs((A5$curv1+A5$curv2)*0.5)^0.25, A5$medax, A5$thick^2, A5$centdist, A5$viewnorm)
A6$X = cbind(abs(A6$curv1)^0.25, abs(A6$curv2)^0.25, abs(A6$curv1*A6$curv2)^0.25, abs((A6$curv1+A6$curv2)*0.5)^0.25, A6$medax, A6$thick^2, A6$centdist, A6$viewnorm, A6$absdist, A6$absdiff)
A7$X = cbind(abs(A7$curv1)^0.25, abs(A7$curv2)^0.25, abs(A7$curv1*A7$curv2)^0.25, abs((A7$curv1+A7$curv2)*0.5)^0.25, A7$medax, A7$thick^2, A7$centdist, A7$viewnorm, A7$absdist, A7$absdiff)
A8$X = cbind(abs(A8$curv1)^0.25, abs(A8$curv2)^0.25, abs(A8$curv1*A8$curv2)^0.25, abs((A8$curv1+A8$curv2)*0.5)^0.25, A8$medax, A8$thick^2, A8$centdist, A8$viewnorm, A8$absdist, A8$absdiff)
A9$X = cbind(abs(A9$curv1)^0.25, abs(A9$curv2)^0.25, abs(A9$curv1*A9$curv2)^0.25, abs((A9$curv1+A9$curv2)*0.5)^0.25, A9$medax, A9$thick^2, A9$centdist, A9$viewnorm, A9$absdist, A9$absdiff)

cor(abs(A0$curv1)^0.25, A0$error)

GLMNET STUFF

library("glmnet")

# Test model for any condition - just change X and y appropriately
X = A2$X
y = A2$error
#fit_cv = cv.glmnet(X, y, alpha=1, nfolds=length(y), type.measure="mae") #leave-one-out CV
fit_cv = cv.glmnet(X, y, alpha=1, nfolds=10, type.measure="mae") #10-fold CV
fit = predict(fit_cv, X)
rmspe = sqrt(mean((fit - y)^2)) # root mean squared prediction error
rmsbe = sqrt(mean((mean(y) - y)^2)) # root mean squared BASE error
mape = mean(abs(fit - y)) # mean absolute prediction error
mabe = mean(abs(mean(y) - y)) # mean absolute BASE error
coef(fit_cv)
rmspe
rmsbe
mape
mabe
1 - (mape / mabe) #percent improvement

#COMPARE PREDICTED TO OBSERVED FIGURE
par(mfrow=c(2,5))

X = scale(A0$X)
y = A0$error
fit_cv = cv.glmnet(X, y, alpha=1, nfolds=10, type.measure="mae")
fit = predict(fit_cv, X)
plot(fit, y, xlab="Predicted Error", ylab="Observed Error", main="Fixed view surface", col="red1", xlim=c(0,90), ylim=c(0,90))
abline(0, 1, lty=2, col="black")
X = scale(A1$X)
y = A1$error
fit_cv = cv.glmnet(X, y, alpha=1, nfolds=10, type.measure="mae")
fit = predict(fit_cv, X)
plot(fit, y, xlab="Predicted Error", ylab="Observed Error", main="Fixed view MPS", col="orange1", xlim=c(0,90), ylim=c(0,90))
abline(0, 1, lty=2, col="black")
X = scale(A2$X)
y = A2$error
fit_cv = cv.glmnet(X, y, alpha=1, nfolds=10, type.measure="mae")
fit = predict(fit_cv, X)
plot(fit, y, xlab="Predicted Error", ylab="Observed Error", main="Fixed view XYZ", col="green1", xlim=c(0,90), ylim=c(0,90))
abline(0, 1, lty=2, col="black")
X = scale(A3$X)
y = A3$error
fit_cv = cv.glmnet(X, y, alpha=1, nfolds=10, type.measure="mae")
fit = predict(fit_cv, X)
plot(fit, y, xlab="Predicted Error", ylab="Observed Error", main="Fixed view radial", col="blue1", xlim=c(0,90), ylim=c(0,90))
abline(0, 1, lty=2, col="black")
X = scale(A4$X)
y = A4$error
fit_cv = cv.glmnet(X, y, alpha=1, nfolds=10, type.measure="mae")
fit = predict(fit_cv, X)
plot(fit, y, xlab="Predicted Error", ylab="Observed Error", main="Fixed view crdbrd", col="purple1", xlim=c(0,90), ylim=c(0,90))
abline(0, 1, lty=2, col="black")
X = scale(A5$X)
y = A5$error
fit_cv = cv.glmnet(X, y, alpha=1, nfolds=10, type.measure="mae")
fit = predict(fit_cv, X)
plot(fit, y, xlab="Predicted Error", ylab="Observed Error", main="Rotated view surface", col="red4", xlim=c(0,90), ylim=c(0,90))
abline(0, 1, lty=2, col="black")
X = scale(A6$X)
y = A6$error
fit_cv = cv.glmnet(X, y, alpha=1, nfolds=10, type.measure="mae")
fit = predict(fit_cv, X)
plot(fit, y, xlab="Predicted Error", ylab="Observed Error", main="Rotated view MPS", col="orange4", xlim=c(0,90), ylim=c(0,90))
abline(0, 1, lty=2, col="black")
X = scale(A7$X)
y = A7$error
fit_cv = cv.glmnet(X, y, alpha=1, nfolds=10, type.measure="mae")
fit = predict(fit_cv, X)
plot(fit, y, xlab="Predicted Error", ylab="Observed Error", main="Rotated view XYZ", col="green2", xlim=c(0,90), ylim=c(0,90))
abline(0, 1, lty=2, col="black")
X = scale(A8$X)
y = A8$error
fit_cv = cv.glmnet(X, y, alpha=1, nfolds=10, type.measure="mae")
fit = predict(fit_cv, X)
plot(fit, y, xlab="Predicted Error", ylab="Observed Error", main="Rotated view radial", col="blue4", xlim=c(0,90), ylim=c(0,90))
abline(0, 1, lty=2, col="black")
X = scale(A9$X)
y = A9$error
fit_cv = cv.glmnet(X, y, alpha=1, nfolds=10, type.measure="mae")
fit = predict(fit_cv, X)
plot(fit, y, xlab="Predicted Error", ylab="Observed Error", main="Rotated view crdbrd", col="purple4", xlim=c(0,90), ylim=c(0,90))
abline(0, 1, lty=2, col="black")


fit_0_cv = cv.glmnet(A0$X, A0$error^(lambda), alpha=1, nfolds=10)
fit_0 = predict(fit_0_cv, A0$X)
plot(fit_0^(1.0/lambda), A0$error, xlab="predicted error", ylab="observed error")
rmsqe = sqrt(mean(((fit_0)^(1/lambda) - A0$error)^2)) # root mean squared prediction error
mean(abs((fit_0)^(1/lambda) - A0$error)) # mean absolute prediction error
mean(abs(mean(A0$error) - A0$error)) # mean absolute BASE error

lambda=1.0
fit_1_cv = cv.glmnet(A1$X, A1$error^lambda, alpha=1, nfolds=10)
fit_1 = predict(fit_1_cv, A1$X)
plot(fit_1, A1$error^lambda, xlab="predicted", ylab="observed")


BOXCOX FIGURE

library("lars")
library("DAAG")
library("e1071")

png(filename="boxcox.png", width=1500, height=1500)
par(mfrow=c(5, 5))

boxcox(error ~ curv, data=A0)
boxcox(error ~ medax, data=A0)
boxcox(error ~ thick, data=A0)
plot.new();
plot.new();

boxcox(error ~ curv, data=A1)
boxcox(error ~ medax, data=A1)
boxcox(error ~ thick, data=A1)
boxcox(error ~ absdist, data=A1)
boxcox(error ~ absdiff, data=A1)

boxcox(error ~ curv, data=A2)
boxcox(error ~ medax, data=A2)
boxcox(error ~ thick, data=A2)
boxcox(error ~ absdist, data=A2)
boxcox(error ~ absdiff, data=A2)

boxcox(error ~ curv, data=A3)
boxcox(error ~ medax, data=A3)
boxcox(error ~ thick, data=A3)
boxcox(error ~ absdist, data=A3)
boxcox(error ~ absdiff, data=A3)

boxcox(error ~ curv, data=A4)
boxcox(error ~ medax, data=A4)
boxcox(error ~ thick, data=A4)
boxcox(error ~ absdist, data=A4)
boxcox(error ~ absdiff, data=A4)

dev.off();

# RESIDUAL HISTOGRAM AND QQPLOT FIGURE

png(filename="hist_qq.png", width=1000, height=1000)
par(mfrow=c(2, 2))

fit <- lm(error ~ curv + medax + thick, data=A0)
fit_2 <- lm(log(error) ~ curv + medax + thick, data=A0)

hist(fit$resid, main="", col="red", xlab="Residual")
hist(fit_2$resid, main="", col="green", xlab="Residual")
qqnorm(fit$resid, main="", col="red")
qqline(fit$resid)
qqnorm(fit_2$resid, main="", col="green")
qqline(fit_2$resid)

dev.off()

fit_curv <- lm(error ~ curv, data=A0)
A0$curv_fourth = A0$curv^0.25
fit_curv_fourth <- lm(error ~ curv_fourth, data=A0)
dev.new(width=8, height=8)
par(mfrow=c(2,2))
hist(fit_curv$resid, main="", col="red", xlab="Residual")
hist(fit_curv_fourth$resid, main="", col="green", xlab="Residual")
qqnorm(fit_curv$resid, main="", col="red")
qqline(fit_curv$resid)
qqnorm(fit_curv_fourth$resid, main="", col="green")
qqline(fit_curv_fourth$resid)

# BOXCOX FIGURE JUST FOR SURFACE

png(filename="boxcox_0.png", width=500, height=500)
par(mfrow=c(1, 1))
boxcox(error ~ curv + medax + thick, lambda=seq(-0.5, 0.5, 0.05), data=A0)
dev.off();

# ALTERNATES
png(filename="boxcox_1.png", width=500, height=500)
par(mfrow=c(1, 1))
boxcox(error ~ curv + medax + thick + absdist + absdiff, lambda=seq(-0.5, 0.5, 0.05), data=A1)
dev.off();

# ALTERNATES
png(filename="boxcox_2.png", width=500, height=500)
par(mfrow=c(1, 1))
boxcox(error ~ curv + medax + thick + absdist + absdiff, lambda=seq(-0.5, 0.5, 0.05), data=A2)
dev.off();

# ALTERNATES
png(filename="boxcox_3.png", width=500, height=500)
par(mfrow=c(1, 1))
boxcox(error ~ curv + medax + thick + absdist + absdiff, lambda=seq(-2, 2, 0.05), data=A3)
dev.off();

# ALTERNATES
png(filename="boxcox_4.png", width=500, height=500)
par(mfrow=c(1, 1))
boxcox(error ~ curv + medax + thick + absdist + absdiff, lambda=seq(-1, 1, 0.05), data=A4)
dev.off();

# ALTERNATES
png(filename="boxcox_5.png", width=500, height=500)
par(mfrow=c(1, 1))
boxcox(error ~ curv + medax + thick, lambda=seq(-1, 1, 0.05), data=A5)
dev.off();

# ALTERNATES
png(filename="boxcox_6.png", width=500, height=500)
par(mfrow=c(1, 1))
boxcox(error ~ curv + medax + thick + absdist + absdiff, lambda=seq(-1, 1, 0.05), data=A6)
dev.off();

# ALTERNATES
png(filename="boxcox_7.png", width=500, height=500)
par(mfrow=c(1, 1))
boxcox(error ~ curv + medax + thick + absdist + absdiff, lambda=seq(-1, 1, 0.05), data=A7)
dev.off();

# ALTERNATES
png(filename="boxcox_8.png", width=500, height=500)
par(mfrow=c(1, 1))
boxcox(error ~ curv + medax + thick + absdist + absdiff, lambda=seq(-2, 2, 0.05), data=A8)
dev.off();

# ALTERNATES
png(filename="boxcox_9.png", width=500, height=500)
par(mfrow=c(1, 1))
boxcox(error ~ curv + medax + thick + absdist + absdiff, lambda=seq(-1, 1, 0.05), data=A9)
dev.off();

# DO IT ALL FOR A MODEL (once you know the variables you want, and the lambda you are using):

#fixed view surface 
lambda = 0.25
fit_0 <- lm(error^lambda ~ curv + medax + thick, data=A0)
fit_0_cv <- CVlm(m=10, form.lm=formula(error^lambda ~ curv + thick), df=A0)
fit_0_cv_base <- CVlm(m=10, form.lm=formula(error^lambda ~ zeroes), df=A0 )
fit_0 <- lm(error^lambda ~ curv + thick, data=A0)
rmsqe = sqrt(mean(((fit_0_cv$cvpred)^(1/lambda) - A0$error)^2)) # root mean squared prediction error
mape = mean(abs((fit_0_cv$cvpred)^(1/lambda) - A0$error)) # mean absolute prediction error
mabe = mean(abs((fit_0_cv_base$cvpred)^(1/lambda) - A0$error)) # mean absolute BASE error
rmsqe
mape
mabe
1 - (mape / mabe) #percent improvement

#fixed view MPS
lambda = 0.25
fit_1 <- lm(error^lambda ~ curv + medax + thick + absdist + absdiff, data=A1)
fit_1_cv <- CVlm(m=10, form.lm=formula(error^lambda ~ curv + thick + absdist + absdiff), df=A1)
fit_1_cv_base <- CVlm(m=10, form.lm=formula(error^lambda ~ zeroes), df=A1)
fit_1 <- lm(error^lambda ~ curv + thick + absdist + absdiff, data=A1)
rmsqe = sqrt(mean(((fit_1_cv$cvpred)^(1/lambda) - A1$error)^2)) # root mean squared prediction error
mape = mean(abs((fit_1_cv$cvpred)^(1/lambda) - A1$error)) # mean absolute prediction error
mabe = mean(abs((fit_1_cv_base$cvpred)^(1/lambda) - A1$error)) # mean absolute BASE error
rmsqe
mape
mabe
1 - (mape / mabe) #percent improvement

#fixed view XYZ
lambda = 0.25
fit_2 <- lm(error^lambda ~ curv + medax + thick + absdist + absdiff, data=A2)
fit_2_cv <- CVlm(m=10, form.lm=formula(error^lambda ~ curv + medax + thick + absdist + absdiff), df=A2)
fit_2_cv_base <- CVlm(m=10, form.lm=formula(error^lambda ~ zeroes), df=A2)
fit_2 <- lm(error^lambda ~ curv + medax + thick + absdist + absdiff, data=A2)
rmsqe = sqrt(mean(((fit_2_cv$cvpred)^(1/lambda) - A2$error)^2)) # root mean squared prediction error
mape = mean(abs((fit_2_cv$cvpred)^(1/lambda) - A2$error)) # mean absolute prediction error
mabe = mean(abs((fit_2_cv_base$cvpred)^(1/lambda) - A2$error)) # mean absolute BASE error
rmsqe
mape
mabe
1 - (mape / mabe) #percent improvement

#fixed view radial
lambda = 1.0
fit_3 <- lm(error ~ curv + medax + thick + absdist + absdiff, data=A3)
fit_3_cv <- CVlm(m=10, form.lm=formula(error ~ medax + thick + absdist + absdiff), df=A3)
fit_3_cv_base <- CVlm(m=10, form.lm=formula(error ~ zeroes), df=A3)
fit_3 <- lm(error ~ medax + thick + absdist + absdiff, data=A3)
rmsqe = sqrt(mean(((fit_3_cv$cvpred)^(1/lambda) - A3$error)^2)) # root mean squared prediction error
mape = mean(abs((fit_3_cv$cvpred)^(1/lambda) - A3$error)) # mean absolute prediction error
mabe = mean(abs((fit_3_cv_base$cvpred)^(1/lambda) - A3$error)) # mean absolute BASE error
rmsqe
mape
mabe
1 - (mape / mabe) #percent improvement

#fixed view crdbrd
lambda = 0.25
fit_4 <- lm(error^lambda ~ curv + medax + thick + absdist + absdiff, data=A4)
fit_4_cv <- CVlm(m=10, form.lm=formula(error^lambda ~ curv + medax + thick + absdist + absdiff), df=A4)
fit_4_cv_base <- CVlm(m=10, form.lm=formula(error^lambda ~ zeroes), df=A4)
fit_4 <- lm(error^lambda ~ curv + medax + thick + absdist + absdiff, data=A4)
rmsqe = sqrt(mean(((fit_4_cv$cvpred)^(1/lambda) - A4$error)^2)) # root mean squared prediction error
mape = mean(abs((fit_4_cv$cvpred)^(1/lambda) - A4$error)) # mean absolute prediction error
mabe = mean(abs((fit_4_cv_base$cvpred)^(1/lambda) - A4$error)) # mean absolute BASE error
rmsqe
mape
mabe
1 - (mape / mabe) #percent improvement

#rotated view surface
lambda = 0.25
fit_5 <- lm(error^lambda ~ curv + medax + thick, data=A5)
fit_5 <- lm(error^lambda ~ curv + thick, data=A5)
fit_5_cv <- CVlm(m=10, form.lm=formula(error^lambda ~ curv + thick), df=A5)
fit_5_cv_base <- CVlm(m=10, form.lm=formula(error^lambda ~ zeroes), df=A5)
rmsqe = sqrt(mean(((fit_5_cv$cvpred)^(1/lambda) - A5$error)^2)) # root mean squared prediction error
mape = mean(abs((fit_5_cv$cvpred)^(1/lambda) - A5$error)) # mean absolute prediction error
mabe = mean(abs((fit_5_cv_base$cvpred)^(1/lambda) - A5$error)) # mean absolute BASE error
rmsqe
mape
mabe
1 - (mape / mabe) #percent improvement

#rotated view MPS
lambda = 0.25
fit_6 <- lm(error^lambda ~ curv + medax + thick + absdist + absdiff, data=A6)
fit_6 <- lm(error^lambda ~ curv + thick + absdist + absdiff, data=A6)
fit_6_cv <- CVlm(m=10, form.lm=formula(error^lambda ~ curv + thick + absdist + absdiff), df=A6)
fit_6_cv_base <- CVlm(m=10, form.lm=formula(error^lambda ~ zeroes), df=A6)
rmsqe = sqrt(mean(((fit_6_cv$cvpred)^(1/lambda) - A6$error)^2)) # root mean squared prediction error
mape = mean(abs((fit_6_cv$cvpred)^(1/lambda) - A6$error)) # mean absolute prediction error
mabe = mean(abs((fit_6_cv_base$cvpred)^(1/lambda) - A6$error)) # mean absolute BASE error
rmsqe
mape
mabe
1 - (mape / mabe) #percent improvement

# rotated view XYZ
lambda = 0.25
fit_7 <- lm(error^lambda ~ curv + medax + thick + absdist + absdiff, data=A7)
fit_7 <- lm(error^lambda ~ curv + medax + thick + absdist + absdiff, data=A7)
fit_7_cv <- CVlm(m=10, form.lm=formula(error^lambda ~ curv + medax + thick + absdist + absdiff), df=A7)
fit_7_cv_base <- CVlm(m=10, form.lm=formula(error^lambda ~ zeroes), df=A7)
rmsqe = sqrt(mean(((fit_7_cv$cvpred)^(1/lambda) - A7$error)^2)) # root mean squared prediction error
mape = mean(abs((fit_7_cv$cvpred)^(1/lambda) - A7$error)) # mean absolute prediction error
mabe = mean(abs((fit_7_cv_base$cvpred)^(1/lambda) - A7$error)) # mean absolute BASE error
rmsqe
mape
mabe
1 - (mape / mabe) #percent improvement

# rotated view radial
lambda = 1.0
fit_8 <- lm(error ~ curv + medax + thick + absdist + absdiff, data=A8)
fit_8 <- lm(error ~ curv + medax + thick + absdist + absdiff, data=A8)
fit_8_cv <- CVlm(m=10, form.lm=formula(error ~ medax + thick + absdist + absdiff), df=A8)
fit_8_cv_base <- CVlm(m=10, form.lm=formula(error ~ zeroes), df=A8)
rmsqe = sqrt(mean(((fit_8_cv$cvpred)^(1/lambda) - A8$error)^2)) # root mean squared prediction error
mape = mean(abs((fit_8_cv$cvpred)^(1/lambda) - A8$error)) # mean absolute prediction error
mabe = mean(abs((fit_8_cv_base$cvpred)^(1/lambda) - A8$error)) # mean absolute BASE error
rmsqe
mape
mabe
1 - (mape / mabe) #percent improvement

# rotated view crdbrd
lambda = 0.5
fit_9 <- lm(error^lambda ~ curv + medax + thick + absdist + absdiff, data=A9)
fit_9_cv <- CVlm(m=10, form.lm=formula(error^lambda ~ curv + medax + absdist + absdiff), df=A9)
fit_9_cv_base <- CVlm(m=10, form.lm=formula(error^lambda ~ zeroes), df=A9)
fit_9 <- lm(error^lambda ~ curv + medax + absdist + absdiff, data=A9)
rmsqe = sqrt(mean(((fit_9_cv$cvpred)^(1/lambda) - A9$error)^2)) # root mean squared prediction error
mape = mean(abs((fit_9_cv$cvpred)^(1/lambda) - A9$error)) # mean absolute prediction error
mabe = mean(abs((fit_9_cv_base$cvpred)^(1/lambda) - A9$error)) # mean absolute BASE error
rmsqe
mape
mabe
1 - (mape / mabe) #percent improvement

# SETUP THE ZERO COLUMNS

A0$zeroes = numeric(length(A0$error))
A1$zeroes = numeric(length(A1$error))
A2$zeroes = numeric(length(A2$error))
A3$zeroes = numeric(length(A3$error))
A4$zeroes = numeric(length(A4$error))
A5$zeroes = numeric(length(A5$error))
A6$zeroes = numeric(length(A6$error))
A7$zeroes = numeric(length(A7$error))
A8$zeroes = numeric(length(A8$error))
A9$zeroes = numeric(length(A9$error))


# NOW COMPUTE THE BASE ERROR

base_error = mean(abs(A0$error - mean(A0$error))) # the base error rate
abs_error = mean(abs((fit_0_cv$Predicted)^4 - A0$error)) # mean absolute error
1 - (abs_error / base_error) #percent improvement

base_error = mean(abs(A1$error - mean(A1$error))) # the base error rate
abs_error = mean(abs((fit_1_cv$Predicted)^4 - A1$error)) # mean absolute error
1 - (abs_error / base_error) #percent improvement

base_error = mean(abs(A2$error - mean(A2$error))) # the base error rate
abs_error = mean(abs((fit_2_cv$Predicted)^4 - A2$error)) # mean absolute error
1 - (abs_error / base_error) #percent improvement

base_error = mean(abs(A3$error - mean(A3$error))) # the base error rate
abs_error = mean(abs((fit_3_cv$Predicted) - A3$error)) # mean absolute error
1 - (abs_error / base_error) #percent improvement

base_error = mean(abs(A4$error - mean(A4$error))) # the base error rate
abs_error = mean(abs((fit_4_cv$Predicted)^4 - A4$error)) # mean absolute error
1 - (abs_error / base_error) #percent improvement

base_error = mean(abs(A5$error - mean(A5$error))) # the base error rate
abs_error = mean(abs((fit_5_cv$Predicted)^4 - A5$error)) # mean absolute error
1 - (abs_error / base_error) #percent improvement

base_error = mean(abs(A6$error - mean(A6$error))) # the base error rate
abs_error = mean(abs((fit_6_cv$Predicted)^4 - A6$error)) # mean absolute error
1 - (abs_error / base_error) #percent improvement

base_error = mean(abs(A7$error - mean(A7$error))) # the base error rate
abs_error = mean(abs((fit_7_cv$Predicted)^4 - A7$error)) # mean absolute error
1 - (abs_error / base_error) #percent improvement

base_error = mean(abs(A8$error - mean(A8$error))) # the base error rate
abs_error = mean(abs(fit_8_cv$Predicted - A8$error)) # mean absolute error
1 - (abs_error / base_error) #percent improvement

base_error = mean(abs(A9$error - mean(A9$error))) # the base error rate
abs_error = mean(abs((fit_9_cv$Predicted)^2 - A9$error)) # mean absolute error
1 - (abs_error / base_error) #percent improvement

# PLOTTING

plot(lars_test, xvar="norm", lwd=2, lty="solid", breaks=FALSE)

A0_curv <- read.table("A0_curv.m") - LOAD a plain matrix

save(A0_curv, file="A0_curv.Rda") - Save a dataframe
write.table(A0_curv, file=A0_curv.m") - Save a dataframe plain

cbind - appends matrices together (column-wise)
rbind - appends matrices together (row-wise)

ls() - show all the objects in the workspace

dim(A0_curv) - dimensions of matrix

A0 = data.frame(curv=A0_curv, medax=A0_medax)

A0_curv[629, 1] - access matrix elements using [,]

A0_curv[629, ] - leave one blank for all rows/columns

A0_curv[625:629, ] - colon : specifies ranges

solve(A0_curv) - computes matrix inverse
solve(A, b) - solves x for system Ax = b

str(A0_curv) - see the variables
class(A0_curv) - what is A0_curv?  a matrix?
names(A0_curv) - the names inside (accessed with $ )

A0_curv <- as.matrix(A0_curv) - makes A0_curv a matrix

#ctrl - L - clear the screen

# SET Inf/-Inf's to ZERO:

A0$curv_log[is.infinite(A0$curv_log)] <- 0

# BOX-COX:

show_boxcox <- boxcox(error ~ curv, data=A0)

# INSTALL LIBRARIES/PACKAGES:

install.packages("LIBRARYNAME")

# LOAD LIBRARY:

library("LIBRARYNAME")


# TO DO A SCATTERPLOT:

plot(A0)

# PLOT HISTOGRAM OF RESIDUALS:

hist(fit$resid)

# PLOT Q-Q PLOT:

qqnorm(fit$resid)
qqline(fit$resid)

# SAVE PLOT TO FILE:
png(filename="out.png")
plot(fit)
dev.off()

# MAKE A NEW WINDOW
dev.new()

# TO DO THE PLOTS TO ASSESS THE MODEL:

oldpar <- par(mfrow = c(2,2))
plot(fit)
par(oldpar)

# COMPARE TWO MODELS WITH ANOVA (say we removed a var with largest p-val)

compFit1Fit2 <- anova(lmFit2, lmFit1)
compFit1Fit2

# DO THE 10-fold CROSS VALIDATION:

fit <- CVlm(df=A0, m=10, form.lm=formula(error ~ curv + medax + thick))
fit <- lm(error ~ curv + thick, data=A0)

# The p-value in the ANOVA tells you whether to accept the null hypothesis (p > 0.05), which is that the model with the extra variable is not significantly better than the one without

#%*% - matrix multiply
#* - elementwise multiply

#A * B	Element-wise multiplication
#A %*% B	Matrix multiplication
#A %o% B	Outer product. AB'
#crossprod(A,B)
#crossprod(A)	A'B and A'A respectively.
#t(A)	Transpose
#diag(x)	Creates diagonal matrix with elements of x in the principal diagonal
#diag(A)	Returns a vector containing the elements of the principal diagonal
#diag(k)	If k is a scalar, this creates a k x k identity matrix. Go figure.
#solve(A, b)	Returns vector x in the equation b = Ax (i.e., A-1b)
#solve(A)	Inverse of A where A is a square matrix.
#ginv(A)	Moore-Penrose Generalized Inverse of A. 
#ginv(A) requires loading the MASS package.
#y<-eigen(A)	y$val are the eigenvalues of A
#y$vec are the eigenvectors of A
#y<-svd(A)	Single value decomposition of A.
#y$d = vector containing the singular values of A
#y$u = matrix with columns contain the left singular vectors of A 
#y$v = matrix with columns contain the right singular vectors of A
#R <- chol(A)	Choleski factorization of A. Returns the upper triangular factor, such that R'R = A.
#y <- qr(A)	QR decomposition of A. 
#y$qr has an upper triangle that contains the decomposition and a lower triangle that contains information on the Q decomposition.
#y$rank is the rank of A. 
#y$qraux a vector which contains additional information on Q. 
#y$pivot contains information on the pivoting strategy used.
#cbind(A,B,...)	Combine matrices(vectors) horizontally. Returns a matrix.
#rbind(A,B,...)	Combine matrices(vectors) vertically. Returns a matrix.
#rowMeans(A)	Returns vector of row means.
#rowSums(A)	Returns vector of row sums.
#colMeans(A)	Returns vector of column means.
#colSums(A)	Returns vector of coumn means.
