# Examples of section 3
library("adabag")
data("iris")
train <- c(sample(1:50, 25), sample(51:100, 25), sample(101:150, 25))
iris.adaboost <- boosting(Species ~ ., data=iris[train,], mfinal=10, control=rpart.control(maxdepth=1))
  
iris.adaboost
barplot(iris.adaboost$imp[order(iris.adaboost$imp,decreasing=TRUE)], ylim = c(0, 100), main="Variables relative importance", col = "lightblue")

# Training error
table(iris.adaboost$class, iris$Species[train], dnn=c("Predicted Class", "Observed Class")) 
1-sum(iris.adaboost$class==iris$Species[train])/length(iris$Species[train])
iris.predboosting<- predict.boosting(iris.adaboost, newdata=iris[-train,])
iris.predboosting

iris.boostcv <- boosting.cv(Species ~ ., v=10, data=iris, mfinal=10, control=rpart.control(maxdepth=1))
iris.boostcv
iris.bagging <- bagging(Species ~ ., data=iris[train,], mfinal=10, control=rpart.control(maxdepth=1))
iris.bagging

barplot( iris.bagging$imp[order(iris.bagging$imp,decreasing=TRUE)], ylim = c(0, 100), main="Variables relative importance", col = "lightblue")

#Training error
table(iris.bagging$class, iris$Species[train], dnn=c("Predicted Class", "Observed Class")) 
1-sum(iris.bagging$class==iris$Species[train])/length(iris$Species[train])
iris.predbagging<- predict.bagging(iris.bagging, newdata=iris[-train,])
iris.predbagging
iris.baggingcv <- bagging.cv(Species ~ ., v=10, data=iris, mfinal=10, control=rpart.control(maxdepth=1))

iris.baggingcv

# 3.3. The margins and errorevol functions
iris.bagging.margins <- margins(iris.bagging,iris[train,])  # training set
iris.bagging.predmargins <- margins(iris.predbagging,iris[-train,])  # test set

iris.bagging.margins # training set
iris.bagging.predmargins # test set

margins.test<-iris.bagging.predmargins[[1]]
margins.train<-iris.bagging.margins[[1]]


plot(sort(margins.train), (1:length(margins.train))/length(margins.train), type="l", xlim=c(-1,1),main="Margin cumulative distribution graph", xlab="m", ylab="% observations", col="blue3", lwd=2)

abline(v=0, col="red",lty=2, lwd=2)

lines(sort(margins.test), (1:length(margins.test))/length(margins.test), type="l", cex = .5 ,col="green", lwd=2)
legend("topleft", c("test","train"), col = c("green", "blue"), lty=1, lwd=2)

evol.test <- errorevol(iris.adaboost,iris[-train,]) 
evol.train <- errorevol(iris.adaboost,iris[train,]) 
plot(evol.test$error, type="l", ylim=c(0,1),  main="Boosting error versus number of trees", xlab="Iterations", ylab="Error", col = "red", lwd=2)
lines(evol.train$error, cex = .5 ,col="blue", lty=2, lwd=2)
legend("topleft", c("test","train"), col = c("red", "blue"), lty=1:2, lwd=2)

# 4. Examples
# 4.1. A dichotomous example
n <- 12000
p <- 10
set.seed(100)
x <- matrix(rnorm(n*p), ncol=p)
y <- as.factor(c(-1, 1)[as.numeric(apply(x^2, 1, sum)  > 9.34) + 1])
data <- data.frame(y, x)

train <- sample(1:n, 2000, FALSE)

formula <- y ~ .
vardep <- data[,as.character(formula[[2]])]

cntrl <- rpart.control(maxdepth = 1, minsplit = 0,  cp = -1)
mfinal <-400

data.boosting <- boosting(formula=formula, data=data[train,], mfinal=mfinal, coeflearn= 'Breiman', boos=FALSE, control=cntrl)
data.boostingBreimanFalse <- data.boosting

#Training error
table(data.boosting$class, vardep[train], dnn=c("Clase estimada", "Clase real")) 
1-sum(data.boosting$class==vardep[train])/length(vardep[train])

#Test error
data.predboost <- predict.boosting(data.boosting, newdata=data[-train,]) 
data.predboost$confusion
data.predboost$error
data.boosting$imp

data.boosting <- boosting(formula=formula, data=data[train,], mfinal=mfinal, coeflearn= 'Breiman', boos=TRUE, control=cntrl)
data.boostingBreimanTrue <- data.boosting


# Training error
table(data.boosting$class, vardep[train], dnn=c("Predicted Class", "Observed Class")) 
1-sum(data.boosting$class==vardep[train])/length(vardep[train])
# Test error
data.predboost <- predict.boosting(data.boosting, newdata=data[-train,]) 
data.predboost$confusion
data.predboost$error
data.boosting$imp

data.boosting <- boosting(formula=formula, data=data[train,], mfinal=mfinal, coeflearn= 'Freund', boos=FALSE, control=cntrl)
data.boostingFreundFalse <- data.boosting

# Training error
table(data.boosting$class, vardep[train], dnn=c("Predicted Class", "Observed Class")) 
1-sum(data.boosting$class==vardep[train])/length(vardep[train])
# Test error
data.predboost <- predict.boosting(data.boosting, newdata=data[-train,])
data.predboost$confusion
data.predboost$error
data.boosting$imp

data.boosting <- boosting(formula=formula, data=data[train,], mfinal=mfinal, coeflearn= 'Freund', boos=TRUE, control=cntrl)
data.boostingFreundTrue <- data.boosting

# Training error
table(data.boosting$class, vardep[train], dnn=c("Predicted Class", "Observed Class")) 
1-sum(data.boosting$class==vardep[train])/length(vardep[train])

# Test error
data.predboost <- predict.boosting(data.boosting, newdata=data[-train,])
data.predboost$confusion
data.predboost$error
data.boosting$imp

# The error evolution plots
data.boosting <- data.boostingBreimanFalse

errorevol.train <- errorevol(data.boosting, data[train,]) 
errorevol.test <- errorevol(data.boosting, data[-train,])

plot(errorevol.test[[1]], type="l", ylim=c(0,0.5),  main="Adaboost error versus number of trees", xlab="Iterations", ylab="Error", col = "red",lwd=2)
lines(errorevol.train[[1]], cex = .5 ,col="blue", lty=1,lwd=2)
legend("topright", c("test","train"), col = c("red", "blue"), lty=1,lwd=2)    
abline(h=min(errorevol.test[[1]]), col="red",lty=2,lwd=2)
abline(h=min(errorevol.train[[1]]), col="blue",lty=2,lwd=2)


data.boosting <- data.boostingBreimanTrue

errorevol.train <- errorevol(data.boosting, data[train,]) 
errorevol.test <- errorevol(data.boosting, data[-train,])

plot(errorevol.test[[1]], type="l", ylim=c(0,0.5),  main="Adaboost error versus number of trees", xlab="Iterations", ylab="Error", col = "red",lwd=2)
lines(errorevol.train[[1]], cex = .5 ,col="blue", lty=1,lwd=2)
legend("topright", c("test","train"), col = c("red", "blue"), lty=1,lwd=2)    
abline(h=min(errorevol.test[[1]]), col="red",lty=2,lwd=2)
abline(h=min(errorevol.train[[1]]), col="blue",lty=2,lwd=2)


data.boosting <- data.boostingFreundFalse


errorevol.train <- errorevol(data.boosting, data[train,]) 
errorevol.test <- errorevol(data.boosting, data[-train,])

plot(errorevol.test[[1]], type="l", ylim=c(0,0.5),  main="Adaboost error versus number of trees", xlab="Iterations", ylab="Error", col = "red",lwd=2)
lines(errorevol.train[[1]], cex = .5 ,col="blue", lty=1,lwd=2)
legend("topright", c("test","train"), col = c("red", "blue"), lty=1,lwd=2)    
abline(h=min(errorevol.test[[1]]), col="red",lty=2,lwd=2)
abline(h=min(errorevol.train[[1]]), col="blue",lty=2,lwd=2)


data.boosting <- data.boostingFreundTrue

errorevol.train <- errorevol(data.boosting, data[train,]) 
errorevol.test <- errorevol(data.boosting, data[-train,])

plot(errorevol.test[[1]], type="l", ylim=c(0,0.5),  main="Adaboost error versus number of trees", xlab="Iterations", ylab="Error", col = "red",lwd=2)
lines(errorevol.train[[1]], cex = .5 ,col="blue", lty=1,lwd=2)
legend("topright", c("test","train"), col = c("red", "blue"), lty=1,lwd=2)    
abline(h=min(errorevol.test[[1]]), col="red",lty=2,lwd=2)
abline(h=min(errorevol.train[[1]]), col="blue",lty=2,lwd=2)



# pruning
data.prune <- predict.boosting(data.boosting, newdata=data[-train,], newmfinal=253) 
data.prune$confusion
data.prune$error


# 4.2. A multiclass example
data("Vehicle")

# repeat 50 iterations to calculate the average error

rows<-50
col<-4
errortrain<- array(0, c(rows,col))  
errortest<- array(0, c(rows,col))  

l <- length(Vehicle[,1])
maxdepth <- 5
mfinal<-50
matrix.sub <- array(0, c(2*l/3,rows)) 
cntrl<-rpart.control(maxdepth=5, cp=-1, minsplit=0)

for (m in 1:rows) {

sub <- sample(1:l,2*l/3)
matrix.sub[,m]<-sub


# rpart
Vehicle.rpart <- rpart(Class~.,data=Vehicle[sub,],maxdepth=maxdepth)

# Training error
Vehicle.rpart.pred <- predict(Vehicle.rpart,newdata=Vehicle[sub, ],type="class")
tb <- table(Vehicle.rpart.pred,Vehicle$Class[sub])
errortrain[m,1] <- 1-(sum(diag(tb))/sum(tb))

# Test error
Vehicle.rpart.pred <- predict(Vehicle.rpart,newdata=Vehicle[-sub, ],type="class")
tb <- table(Vehicle.rpart.pred,Vehicle$Class[-sub])
errortest[m,1] <- 1-(sum(diag(tb))/sum(tb))

# bagging
Vehicle.bagging <- bagging(Class ~ ., data=Vehicle[sub,], mfinal=mfinal, control=cntrl)


# Training error
errortrain[m,2] <-1-sum(Vehicle.bagging$class==Vehicle$Class[sub])/length(Vehicle$Class[sub])

# Test error
Vehicle.predbagging<- predict.bagging(Vehicle.bagging, newdata=Vehicle[-sub,])
errortest[m,2] <-Vehicle.predbagging$error

# AdaBoost.M1 
Vehicle.adaboost <- boosting(Class ~.,data=Vehicle[sub, ],mfinal=mfinal, 
	coeflearn="Freund", boos=T, control=cntrl)

# Training error
errortrain[m,3] <-1-sum(Vehicle.adaboost$class==Vehicle$Class[sub])/length(Vehicle$Class[sub])


# Test error
Vehicle.adaboost.pred <- predict.boosting(Vehicle.adaboost,newdata=Vehicle[-sub, ])
errortest[m,3] <-Vehicle.adaboost.pred$error

Vehicle.SAMME <- boosting(Class ~.,data=Vehicle[sub, ],mfinal=mfinal, 
	coeflearn="Zhu", boos=T, control=cntrl)

# Training error
# table(Vehicle.SAMME$class, Vehicle$Class[sub], dnn=c("Predicted Class", "Observed Class")) 
errortrain[m,4] <-1-sum(Vehicle.SAMME$class==Vehicle$Class[sub])/length(Vehicle$Class[sub])


# Test error
Vehicle.SAMME.pred <- predict.boosting(Vehicle.SAMME,newdata=Vehicle[-sub, ])
#Vehicle.SAMME.pred$confusion
errortest[m,4] <-Vehicle.SAMME.pred$error


if(m==1){

Vehicle.rpart.best <- Vehicle.rpart
Vehicle.bagging.best <- Vehicle.bagging
Vehicle.adaboost.best <- Vehicle.adaboost
Vehicle.SAMME.best <- Vehicle.SAMME

} 

else{
if(errortest[m,1]<min(errortest[1:m-1,1])){ Vehicle.rpart.best <- Vehicle.rpart}
if(errortest[m,2]<min(errortest[1:m-1,2])){ Vehicle.bagging.best <- Vehicle.bagging}
if(errortest[m,3]<min(errortest[1:m-1,3])){ Vehicle.adaboost.best <- Vehicle.adaboost}
if(errortest[m,4]<min(errortest[1:m-1,4])){ Vehicle.SAMME.best <- Vehicle.SAMME}

} 

} # end 50 iterations

errortrain<-as.data.frame(errortrain)
errortest<-as.data.frame(errortest)

names(errortrain)<-c("CART","Bagging","AdaBoost.M1","SAMME")
names(errortest)<-c("CART","Bagging","AdaBoost.M1","SAMME")

errortrain
errortest



apply(errortest, 2, mean)
apply(errortest, 2, sd)
apply(errortest, 2, min)
apply(errortest, 2, max)

Vehicle.rpart<-Vehicle.rpart.best
Vehicle.bagging<-Vehicle.bagging.best
Vehicle.adaboost<-Vehicle.adaboost.best
Vehicle.SAMME<-Vehicle.SAMME.best



# rpart
sub<-matrix.sub[,which(errortest[,1]==min(errortest[,1]))[1]]
Vehicle.rpart.pred <- predict(Vehicle.rpart,newdata=Vehicle,type="class")

# Training error
1-sum(Vehicle.rpart.pred[sub]==Vehicle$Class[sub])/length(Vehicle$Class[sub])

# Test error
tb <- table(Vehicle.rpart.pred[-sub],Vehicle$Class[-sub])
error.rpart <- 1-(sum(diag(tb))/sum(tb))
tb
error.rpart


# Bagging
sub<-matrix.sub[,which(errortest[,2]==min(errortest[,2]))[1]]

# Training error
table(Vehicle.bagging$class, Vehicle$Class[sub], dnn=c("Predicted Class", "Observed Class")) 
1-sum(Vehicle.bagging$class==Vehicle$Class[sub])/length(Vehicle$Class[sub])

# Test error 
Vehicle.predbagging<- predict.bagging(Vehicle.bagging, newdata=Vehicle[-sub,])
Vehicle.predbagging$confusion
Vehicle.predbagging$error

# Adaboost.M1
sub<-matrix.sub[,which(errortest[,3]==min(errortest[,3]))[1]]

# Training error
table(Vehicle.adaboost$class, Vehicle$Class[sub], dnn=c("Predicted Class", "Observed Class")) 
1-sum(Vehicle.adaboost$class==Vehicle$Class[sub])/length(Vehicle$Class[sub])
# Test error
Vehicle.adaboost.pred <- predict.boosting(Vehicle.adaboost,newdata=Vehicle[-sub, ])
Vehicle.adaboost.pred$confusion
Vehicle.adaboost.pred$error

# SAMME
sub<-matrix.sub[,which(errortest[,4]==min(errortest[,4]))[1]]

# Training error
table(Vehicle.SAMME$class, Vehicle$Class[sub], dnn=c("Predicted Class", "Observed Class")) 
1-sum(Vehicle.SAMME$class==Vehicle$Class[sub])/length(Vehicle$Class[sub])
# Test error
Vehicle.SAMME.pred <- predict.boosting(Vehicle.SAMME,newdata=Vehicle[-sub, ])
Vehicle.SAMME.pred$confusion
Vehicle.SAMME.pred$error

# Variable importance comparison
sort(Vehicle.bagging$importance,decreasing=TRUE)
sort(Vehicle.adaboost$importance,decreasing=TRUE)
sort(Vehicle.SAMME$importance,decreasing=TRUE)

barplot(sort(Vehicle.bagging$importance,decreasing=TRUE), main="Variables relative importance", col = "lightblue", horiz = TRUE,las=1, cex.names=.6, xlim = c(0, 20))
barplot(sort(Vehicle.adaboost$importance,decreasing=TRUE), main="Variables relative importance", col = "lightblue", horiz = TRUE,las=1, cex.names=.6, xlim = c(0, 20))
barplot(sort(Vehicle.SAMME$importance,decreasing=TRUE), main="Variables relative importance", col = "lightblue", horiz = TRUE,las=1, cex.names=.6, xlim = c(0, 20))


# The margins plots
sub<-matrix.sub[,which(errortest[,2]==min(errortest[,2]))[1]]
margins.train <- margins(Vehicle.bagging,Vehicle[sub,])[[1]]
margins.test <- margins(Vehicle.predbagging,Vehicle[-sub,])[[1]] 


plot(sort(margins.train), (1:length(margins.train))/length(margins.train), 
  type="l", xlim=c(-1,1),main="Margin cumulative distribution graph", xlab="m", 
  ylab="% observations", col="blue3", lty=2, lwd=2)
abline(v=0, col="red",lty=2, lwd=2)
lines(sort(margins.test), (1:length(margins.test))/length(margins.test), 
  type="l", cex = .5 ,col="green", lwd=2)
legend("topleft", c("test","train"), col = c("green", "blue3"), lty=1:2, lwd=2)

sub<-matrix.sub[,which(errortest[,3]==min(errortest[,3]))[1]]
margins.train <- margins(Vehicle.adaboost,Vehicle[sub,])[[1]]
margins.test <- margins(Vehicle.adaboost.pred,Vehicle[-sub,])[[1]] 


plot(sort(margins.train), (1:length(margins.train))/length(margins.train), 
  type="l", xlim=c(-1,1),main="Margin cumulative distribution graph", xlab="m", 
  ylab="% observations", col="blue3", lty=2, lwd=2)
abline(v=0, col="red",lty=2, lwd=2)
lines(sort(margins.test), (1:length(margins.test))/length(margins.test), 
  type="l", cex = .5 ,col="green", lwd=2)
legend("topleft", c("test","train"), col = c("green", "blue3"), lty=1:2, lwd=2)

sub<-matrix.sub[,which(errortest[,4]==min(errortest[,4]))[1]]
margins.train <- margins(Vehicle.SAMME,Vehicle[sub,])[[1]]
margins.test <- margins(Vehicle.SAMME.pred,Vehicle[-sub,])[[1]] 


plot(sort(margins.train), (1:length(margins.train))/length(margins.train), 
  type="l", xlim=c(-1,1),main="Margin cumulative distribution graph", xlab="m", 
  ylab="% observations", col="blue3", lty=2, lwd=2)
abline(v=0, col="red",lty=2, lwd=2)
lines(sort(margins.test), (1:length(margins.test))/length(margins.test), 
  type="l", cex = .5 ,col="green", lwd=2)
legend("topleft", c("test","train"), col = c("green", "blue3"), lty=1:2, lwd=2)