# This file contains the code used in the Examples of manuscript:
# Calcagno & de Mazancourt J. Stat. Soft.
# It uses the R package glmulti (version 0.6-1 or later) 
# Vincent Calcagno, 2009

library("glmulti")

# Using the "d" method
# Section 3.1
# Figure 2
#################################


# How the number of models increases with the number of predictors
dd = matrix(nc= 6, nr = 6)
ddsimple= matrix(nc= 6, nr = 6)
ddmarg= matrix(nc= 6, nr = 6)
# make a dummy data frame
dummyfactor=as.factor(c(rep("y",5),rep("yn",5),rep("n",10)))
dummycovariate = 1:20
dod = data.frame(2:21,dummyfactor,dummyfactor,dummyfactor,dummyfactor,dummyfactor,dummyfactor,dummycovariate,dummycovariate,dummycovariate,dummycovariate,dummycovariate,dummycovariate)
# compute
for (i in 1:6) for (j in 1:6) dd[i,j]= glmulti(names(dod)[1],names(dod)[c(2:(2+i-1),8:(8+j-1))], data=dod, method="d")
for (i in 1:6) for (j in 1:6) ddsimple[i,j]= glmulti(names(dod)[1],names(dod)[c(2:(2+i-1),8:(8+j-1))], data=dod, method="d",level=1)
for (i in 1:6) for (j in 1) ddmarg[i,j]= glmulti(names(dod)[1],names(dod)[c(2:(2+i-1),8:(8+j-1))],  data=dod, method="d", marginality=T)
for (i in 1) for (j in 1:6) ddmarg[i,j]= glmulti(names(dod)[1],names(dod)[c(2:(2+i-1),8:(8+j-1))],  data=dod, method="d", marginality=T)
# the increase is not the same depending on whether factors or covariates are added
dd
# this is not true when interactions are not considered
ddsimple

# plots Figure 2 A
par(mfrow=c(2,1))
dd = .readRDS(file="dd")
plot(log(dd[,1],10),type="l",xlab="Number of predictors added",ylab="Log10 number of candidate models",cex.axis=1.2, cex.lab=1.2, cex.main=1.2, lwd=2)
lines(log(dd[1,],10),col="red",lwd=2)
points(log(ddsimple[,1],10)+0.05,lwd=2)
points(log(ddsimple[1,],10),col="red",lwd=2)
lines(log(ddmarg[1,],10),col="red", lty=21, lwd=2)

# What is the complexity of the models?
# With 1 factor and 5 covariates
# plots Figure 2 B
dd = integer(40)
for (i in 1:40) dd[i]= glmulti("X2.21", names(dod)[c(2:(2+1-1),8:(8+5-1))], data=dod, method="d", minK=(i), maxK=(i))
plot(dd/sum(dd),cex.axis=1.2, cex.lab=1.2, cex.main=1.2, lwd=2, type="b",xlab="Model complexity (K)",ylab="Density of candidate models")
# With 3 factors and 2 covariates
dd = integer(40)
for (i in 1:40) dd[i]= glmulti("X2.21", names(dod)[c(2:(2+3-1),8:(8+2-1))], data=dod, method="d", minK=(i), maxK=(i))
lines(dd/sum(dd),col="red",cex.axis=1.2, cex.lab=1.2, cex.main=1.2, lwd=2)
# This shows that most models have intermediate complexity
# This implies that setting constraints on model complexity is unlikely to reduce the number of candidate models importantly


# We now use simulated datasets
# Section 3.2
################################

# this generates datasets
gaga = function(ss, nono=1) 
{
	a = runif(ss)
	b = runif(ss)
	c = rexp(ss)
	f1 = as.factor(sample(c(rep("y",round(ss/3)),rep("n",ss-round(ss/3)))))
	f2 = as.factor(sample(c(rep("y",round(ss/3)),rep("n",ss-round(ss/3)))))
	y =  function (i) {
		if (f1[i]=="y") return(-2*b[i]*a[i] + 3*a[i] -2 + rnorm(1,sd=nono))
			else 
			return(-2*b[i]*a[i] -3*a[i] + 2 + rnorm(1,sd=nono))
	  } 
	resp = numeric(ss)
	for (j in 1:ss) resp[j] = y(j)
	data.frame(r=resp,a=a,b=b,c=c,f1=f1,f2=f2)
}

# estimate R2 of full model
riri = 0
for (i in 1:100)
	riri = riri+ summary(lm(r~(.)^2, data=gaga(100, nono = 0.5)))$r.squared
print(riri/100)


# number of models to be compared
glmulti(r~a*b*c*f1*f2, data= gaga(20), plotty=F, report=F, fitfunc=lm, crit=bic, method="d")

# example (Figure 3)
obj <- glmulti(r~a*b*c*f1*f2, data= gaga(20), plotty=F, report=F, fitfunc=lm, crit=bic, method="h")
plot(obj, highlight=c("f2:f1"))
plot(obj, type = "w", highlight=c("f2:f1"))
plot(obj, type = "s")


# Replicate exhaustive screening with n=20, 50, 100 and 150
# uses bic and no marginality
aloha= list()
aloharaw = list()
for (ss in c(20, 50, 100, 150)) {
billy = matrix(0,nr=100, nc=15)
for (r in 1:100) {
	datamock = gaga(ss, nono=0.5)
	rez <- glmulti(r~a*b*c*f1*f2, data=datamock, plotty=F, report=F, fitfunc=lm, marginality=F, crit=bic, confsetsize=100)
	aloharaw = c(aloharow, rez)
	billy[r,]=summary(rez)$termweights
}
aloha=c(aloha,billy)
}

# is the true model found ?


# box plot of results (Figure 4)
nous = as.numeric(unlist(aloha))
par (mfrow=c(1,4))
for (ss in c(0:3)) {
	bibi = matrix(nous[(1+ss*1500):(1500+ss*1500)], nc=15)
	dimnames(bibi) = list(c(),names(rez@codes[,!is.na(rez@codes[1,])]))
	boxplot(as.data.frame(bibi), col=c("gray", "white", "gray", gray(0.9), rep("white",2), "gray", rep("white", 2), "gray", rep("white",5)), xlab="Terms", ylab=c("","Importance")[ss==0], 
				 main=paste("n=",c(20,50,100,10000,150)[ss+1],sep=""),las=2,cex.axis=1.2, cex.lab=1.2, cex.main=2)
		abline(h=0.8, col="red")
		points(y= rep(1.015,4), x=c(1,3,7,10), pch=19, col="red")
				 }
			

# Type I and Type II errors
nbs = 50
alpha = matrix(nc=nbs,nr=5)
beta = matrix(nc=nbs,nr=5)
alphaP = matrix(nc=nbs,nr=5)
betaP = matrix(nc=nbs,nr=5)
for (ss in 0:3) {
	bibi = matrix(nous[(1+ss*1500):(1500+ss*1500)], nc=15)
	dimnames(bibi) = list(c(),names(rez@codes[,!is.na(rez@codes[1,])]))
	for (t in 1:nbs) {
		baba = bibi>=(0.99*t/nbs)
		alf = (rep(1,100)%*%baba)/100
		alpha[ss+1, t] = sum(alf[-c(1,3,7,10)])
		baba = !baba
		alf = (rep(1,100)%*%baba)/100
		beta[ss+1, t] = sum(alf[c(1,3,7,10)])
		baba = bibi>=(0.99*t/nbs)
		alf = (baba[,-c(1,3,7,10)])%*%rep(1,11)>= 1
		alphaP[ss+1, t] = sum(alf)/100
		baba = !baba
		alf = (baba[,c(1,3,7,10)])%*%rep(1,4)>= 1
		betaP[ss+1, t] = sum(alf)/100
		
	}
}

# plots Figure 5
plot(alphaP[1,],type="l", ylim=c(0,1),  col=grey(0.5), xlab="Importance threshold",lwd=2,cex.axis=1.2, cex.lab=1.2, cex.main=1.2, ylab="Probabilities (types I and II)", main="Prob(at least one mistake)",axes=F)
axis(1,at=c(0,10,20,30,40,50), labels=round(0.99*c(0,10,20,30,40,50)/50,1))
axis(2,at=c(0,0.2,0.4,0.6,0.8,1), labels=c(0,0.2,0.4,0.6,0.8,1))
lines(alphaP[2,], col=grey(0.3), lwd=2)
lines(alphaP[3,], col=grey(0.25), lwd=2)
lines(alphaP[4,], col=grey(0), lwd=2)
lines(betaP[1,], ylim=c(0,1), col=grey(0.5),lwd=4, lty=21)
lines(betaP[2,], col=grey(0.3),lwd=4, lty=21)
lines(betaP[3,], col=grey(0.25),lwd=4, lty=21)
lines(betaP[4,], col=grey(0),lwd=4, lty=21)
abline(v= which(abs(0.99*(1:50)/50-0.8)==min(abs(0.99*(1:50)/50-0.8))),col="red")
abline(h=0.05, col="blue")


# We now use the genetic algorithm approach on a sample data set of size 100

dolly = gaga(100, nono=0.5)

# time it takes to perform exhaustive screening
ini = Sys.time()
reference = glmulti(r~a*b*c*f1*f2, data=dolly, plotty=F, report=F, fitfunc=lm, marginality=F, crit=bic, confsetsize=100)
fini = Sys.time()
fini-ini

# run 500 simulations with random parameters
rob = list()
for (r in 1:500)
	rob=c(rob,glmulti(r~a*b*c*f1*f2, data=dolly, plotty=F, report=F, fitfunc=lm, marginality=F, crit=bic, confsetsize=100, method="g",
			popsize=sample(c(10,20,30,40,100,150),1),
			mutrate=sample(c(10^-3,10^-2,10^-4),1),
			sexrate=sample(c(0,0.1,0.2),1),imm=sample(c(0,0.2,0.5),1), deltaM=sample(c(0,0.1,0.2),1), deltaB=sample(c(0.1,0.2,0.5),1),
			conseq=sample(c(2,3,4,5),1)))


# Distribution of computation time
titou = as.numeric(lapply(rob,function(x) x@params$elapsed))
hist(titou)

# I C profiles against the truth (Figure 6)
okays = 1:500
par(mfrow=c(1,1))
plot(x=reference@crits,y=rob[[okays[1]]]@crits,type="p",pch=19, col="grey",xlab="IC profile from exhaustive screening",
		main="", ylab="IC profile from genetic algorithm", cex.axis=1.2, cex.lab=1.2, cex.main=1.2, lwd=2, ylim=c(min(reference@crits),340))
for(i in 2:length(okays))
	if (length(rob[[okays[i]]]@crits)==100) points(x=reference@crits,y=rob[[okays[i]]]@crits,pch=19, col="grey")
 abline(a=0,b=1, col="red")

# how often did we find the best model ?
ref = as.character(reference@formulas[1])
mamaga = as.numeric(unlist(lapply(aloharawn, function(x) as.character(x@formulas[1])==ref)))
sum(mamas)/500

# Convergence dynamics (Figure 7)
par(mfrow=c(2,3))
tt1=2
hist(as.numeric(unlist(lapply(rob, function (t) t@params$dynab[min(tt1,length(t@params$dynab))]))),
		ann=F,  xlab="",ylim=c(0,0.3),freq=F,breaks=150+3*(0:49), main="100 gen.", col="grey", cex.axis=1.2, cex.lab=1.2, cex.main=2)
	abline(v=reference@crits[1], col="red")
tt1=50
hist(as.numeric(unlist(lapply(rob, function (t) t@params$dynab[min(tt1,length(t@params$dynab))]))),
		xlab="Best IC in confidence set", ylab="", main="2500 gen.",ylim=c(0,0.3),freq=F,,breaks=150+3*(0:49),col="grey",cex.axis=1.2, cex.lab=1.2, cex.main=2)
	abline(v=reference@crits[1], col="red")
tt1=100
hist(as.numeric(unlist(lapply(rob, function (t) t@params$dynab[min(tt1,length(t@params$dynab))]))),
		xlab="", ylab="", main="5000 gen.", ylim=c(0,0.3),freq=F,,breaks=150+3*(0:49),col="grey",cex.axis=1.2, cex.lab=1.2, cex.main=2)
	abline(v=reference@crits[1], col="red")
#tt1=300
#hist(as.numeric(unlist(lapply(rob, function (t) t@params$dynab[min(tt1,length(t@params$dynab))]))),
		#xlab="", ylab="", main="10000 gen.", ylim=c(0,0.3),freq=F,,breaks=150+3*(0:49),col="grey")
tt1=2
hist(as.numeric(unlist(lapply(rob, function (t) t@params$dynam[min(tt1,length(t@params$dynab))]))),
		xlab="",  main="",ylim=c(0,0.06),freq=F,,breaks=150+4.5*(0:49),col="grey",cex.axis=1.2, cex.lab=1.2, cex.main=2)
	abline(v=mean(reference@crits), col="red")
tt1=50
hist(as.numeric(unlist(lapply(rob, function (t) t@params$dynam[min(tt1,length(t@params$dynab))]))),
		xlab="Mean IC in confidence set", ylab="", main="",ylim=c(0,0.06),freq=F,,breaks=150+4.5*(0:49),col="grey",cex.axis=1.2, cex.lab=1.2, cex.main=2)
	abline(v=mean(reference@crits), col="red")
tt1=100
hist(as.numeric(unlist(lapply(rob, function (t) t@params$dynam[min(tt1,length(t@params$dynab))]))),
		xlab="", ylab="", main="",ylim=c(0,0.06),freq=F,,breaks=150+4.5*(0:49),col="grey",cex.axis=1.2, cex.lab=1.2, cex.main=2)
		abline(v=mean(reference@crits), col="red")


# Effect of immigration on convergence (Figure 8)
# Figure bimodes + immigration
mimi = as.numeric(lapply(rob, function(x) mean(x@crits)))
immvals =  as.numeric(lapply(rob, function(x) x@params$imm))
iilow=which(((immvals==0)*(as.numeric(unlist(lapply(rob, function(x) length(x@crits))))==100))==T)
iihigh=which(((immvals==0.5)*(as.numeric(unlist(lapply(rob, function(x) length(x@crits))))==100))==T)
par(mfrow = c(1,2))
baby <- hist(mimi, breaks=40, freq=F, right=F,xlab="Mean IC in returned confidence set", main="No immigration",col="gray", ylim=c(0,0.042),plot=F)$breaks
hist(mimi[iilow], breaks=baby, freq=F, right=F,xlab="Mean IC in returned confidence set", main="No immigration",col="gray", ylim=c(0,0.042))
abline(v=mean(reference@crits),col="red")
text(x=mean(reference@crits)+15, y=0.041,col="red", labels="Target value")
hist(mimi[iihigh],breaks=baby, right=F, freq=F,xlab="Mean IC in returned confidence set", main="Immigration (0.5)",col="gray", ylim=c(0,0.13))
abline(v=mean(reference@crits),col="red")
text(x=mean(reference@crits)+14, y=0.127,col="red", labels="Target value")


# Efficiency of consensus
# take random consensus of 2 or 4 objects
robby = list()
for (i in 1:500)
	robby=c(robby, consensus(list(rob[[sample(okays,1)]],rob[[sample(okays,1)]]),confsetsize=100))
robby4 = list()
for (i in 1:500)
	robby4=c(robby4, consensus(list(rob[[sample(okays,1)]],rob[[sample(okays,1)]],rob[[sample(okays,1)]],rob[[sample(okays,1)]]),confsetsize=100))

susuall = matrix(nr=500,nc=length(summary(reference)$termweights))
for (ww in 1:500)
	susuall[ww,] = summary(rob[[ww]])$termweights
susuC = matrix(nr=500,nc=length(summary(reference)$termweights))
for (ww in 1:500)
	susuC[ww,] = summary(robby[[ww]])$termweights
susuC4 = matrix(nr=500,nc=length(summary(reference)$termweights))
for (ww in 1:500)
	susuC4[ww,] = summary(robby4[[ww]])$termweights
dimnames(susuC4) <- dimnames(susuC) <- dimnames(susuall) <- list(c(),names(rob[[1]]@codes[,!is.na(rob[[1]]@codes[1,])]))

# plots Figure 9
par(mfrow = c(1,3))
boxplot(as.data.frame(susuall),xlab="", ylab="Term importance", main="No consensus",cex.axis=1.2, cex.lab=1.2, cex.main=2, las=2)
points(x=1:15,y=summary(reference)$termweights, col="red", pch=19)
boxplot(as.data.frame(susuC),xlab="", ylab="", main="Consensus of 2",cex.axis=1.2, cex.lab=1.2, cex.main=2, las=2)
points(x=1:15,y=summary(reference)$termweights, col="red", pch=19)
boxplot(as.data.frame(susuC4),xlab="", ylab="", main="Consensus of 4",cex.axis=1.2, cex.lab=1.2, cex.main=2, las=2)
points(x=1:15,y=summary(reference)$termweights, col="red", pch=19)


# We now use the birthwt dataset in package MASS 
# Section 3.3
################################

library("MASS")
# this prepares the data exactly like Venables1997
attach(birthwt)
race <- factor(race,labels=c("white","black","other"))
ptd <- factor(ptl>0,labels=c("y","n"))
ftv <- factor(ftv)
levels(ftv)[-(1:2)] <- "2+"
bwt = data.frame(low=factor(low),age,lwt,race,smoke=factor(smoke>0,labels=c("y","n")), ptd, ht=factor(ht>0,labels=c("y","n")),ui=factor(ui>0,labels=c("y","n")),ftv)
detach("birthwt")
rm(race, ptd,ftv)

# there are lots of models:
glmulti("low", names(bwt)[-1], data=bwt, family=binomial, method="d", level=1, confsetsize=300)
glmulti("low", names(bwt)[-1], data=bwt, family=binomial, method="d", marginality=FALSE)
glmulti("low", names(bwt)[-1], data=bwt, family=binomial, method="d", marginality=TRUE)

# Exhaustive screening
# main effects only
test1 <- glmulti(low~., data=bwt, family=binomial, crit=aic, level=1)

# Plots Figure 10
par(mfrow= c(2,1))
plot(test1, type="p")
plot(test1, type="s")


# With interactions: split exhaustive search into several parts (faster)
# This was run on 20 nodes on a cluster
partobj <- glmulti(low~., data=bwt, family=binomial,  chunk=X, chunks=20, plotty=F, report=F, marginality=T, crit=aic, name="exhausting")  
write(partobj, file="|object") 
# X in 1.. 20
# write used this way produces raw R object
# they can read with function consensus or more directly with .readRDS()

# put them together
fullobj <- consensus(list.files(pattern="exhausting"), confsetsize = 100) 

# best models:
print(fullobj)

# Figure 11:
par(mfrow= c(2,1))
plot(fullobj, type="p")
plot(fullobj, type="s")


# We now run 20 genetic algorithms 
rob = list()
for (i in 1:20) {
	rez <- glmulti(low~.*., data=bwt, family=binomial, name=paste("ga", i), plotty=FALSE, marginality=T, report=F, method="g", popsize=100,
		mutrate=0.06,
		sexrate=0.15,
		imm=0.2,
		deltaM=0.01,
		deltaB=0,
		conseq=6) 
	rob=c(rob, rez)
}

# Convergence through time (Figure 12)
plot(rob[[1]]@params$dynab, type="p", col="grey",pch=19)
 for (i in 2:500)
 points(rob[[i]]@params$dynab, col="grey",pch=19)

meanB = matrix(c(rob[[1]]@params$dynab, rep(0, (300-length(rob[[1]]@params$dynab)))), nr=1, nc=300)
 for (i in 2:500) {
 meanB = rbind(meanB, c(rob[[i]]@params$dynab, rep(mean(rob[[i]]@params$dynab), (300-length(rob[[i]]@params$dynab)))))
 }
meanB2 = numeric(300)
 for (i in 1:120)
 meanB2[i]=mean(meanB[,i])
lines(x=50*(1:300),y=meanB2, col="blue")


# make a consensus of them and check results are the same as with exhaustive search
coco <- consensus(rob, confsetsize = 100) 
par(mfrow= c(2,1))
plot(coco, type="p")
plot(coco, type="s")