###################################################
### required packages
###################################################
library("multitable")
library("rbenchmark")
library("ggplot2")
library("arm")
library("vegan")
library("scales")


###################################################
### the structure of data lists
###################################################
library("multitable")
data("fake.community")
fake.community


###################################################
### the summary function reveals the structure of a data list
###################################################
summary(fake.community)


###################################################
### an example matrix-valued variable to include in a new data list
###################################################
abundance <- data.frame(
	sites=c(
		"midlatitude","subtropical","tropical","equatorial",
		"arctic","midlatitude","tropical","equatorial",
		"subtropical"
	),
	species=c(rep("capybara",4),rep("moss",4),"vampire"),
	abundance=c(4,10,8,7,5,6,9,3,1)
)


###################################################
### what the abundance matrix looks like
###################################################
abundance


###################################################
### data frames of vector-valued variables to include in a new data list
###################################################
environment <- data.frame(
	sites=c(
		"subarctic","midlatitude","subtropical",
		"tropical","equatorial"
	),
	temperature=c(0,10,20,50,30),
	precipitation=c(40,20,100,150,200)
)
trait <- data.frame(
	species=c("capybara","moss","vampire"),
	body.size=c(140,5,190),
	metabolic.rate=c(20,5,0)
)
environment
trait


###################################################
### create a new data list
###################################################
l <- list(abundance, environment, trait)
dl <- dlcast(l, fill = c(0, NA, NA))
summary(dl)
dl


###################################################
### a wide-format variable to add to a data list
###################################################
allele <- matrix(c(
	NA, 0.4, 0, 0.1, 0, NA,
	0, 0, 0, NA, 0.2, NA,
	NA,NA,NA,0,NA,NA
), 6, 3)
dimnames(allele) <- dimnames(dl)
allele


###################################################
### adding a wide-format variable to an existing data list
###################################################
dl.with.allele <- dl + variable(allele, c("sites", "species"))
summary(dl.with.allele)


###################################################
### array-like data list subscripting example
###################################################
fake.community[,c("2008","2009"),]
fake.community[1:3, "1537", c(TRUE, FALSE, FALSE)]
fake.community[c("temperature", "precipitation")]
fake.community[5:6]
fake.community[5:6, drop = FALSE]


###################################################
### assigning new values to variables in data lists
###################################################
fake.community$precipitation[is.na(fake.community$precipitation)] <- 
  c(30, 5, 50, 75, 50, 2, 7)
fake.community$log.precipitation <- log(fake.community$precipitation)


###################################################
### specifying the shape of a transformed variable
###################################################
fake.community[["log.precipitation", shape = "precipitation"]] <- 
  log(fake.community$precipitation)
fake.community["log.precipitation"]


###################################################
### creating variables to identify replicates
###################################################
fake.community <- dims_to_vars(fake.community)
summary(fake.community)


###################################################
### here are some variables that were created from dimensions of replication
###################################################
fake.community["species", drop = FALSE]


###################################################
### simplifying the example to illustrate melt-recast techniques
###################################################
data("fake.community")
fake.community <- fake.community[1:3, 1:2, 1:2][1:4]
summary(fake.community)


###################################################
### melting a data list
###################################################
dlm <- dlmelt(fake.community)
summary(dlm)


###################################################
### the first two lines of the data frames in the melted data list
###################################################
lapply(dlm, head, n = 2)


###################################################
### dlcast is an approximate inverse to dlmelt (eval = FALSE)
###################################################
dlcast(dlm)


###################################################
### manipulating dimensions of replication in melted form
###################################################
dlm$sites.years.species <- within(dlm$sites.years.species, {
  sites.years <- interaction(sites, years, drop = TRUE)
  sites <- years <- NULL
})
dlm$sites.years <- within(dlm$sites.years, {
  sites.years <- interaction(sites, years, drop = TRUE)
  sites <- years <- NULL
})


###################################################
### recasting can now be applied to produce a reshaped data list
###################################################
dl <- dlcast(dlm)
summary(dl)


###################################################
### coercing a data list to data frame
###################################################
data("fake.community")
fake.community.df <- as.data.frame(fake.community)


###################################################
### looking at the data list in data frame form
###################################################
fake.community.df[,-6]


###################################################
### the molding technique to speed up data list to data frame coercion
###################################################
data("fake.community")
fc.mold <- data.list.mold(fake.community)


###################################################
### coercion with a mold (eval = FALSE)
###################################################
as.data.frame(fake.community, mold = fc.mold)


###################################################
### coercion without a mold (eval = FALSE)
###################################################
as.data.frame(fake.community)


###################################################
### functions for benchmarking the molding technique
###################################################
with_molding <- function(){
  fake.community.mold <- data.list.mold(fake.community)
  for(i in 1:100)
    as.data.frame(fake.community, mold = fake.community.mold)
}
without_molding <- function(){
  for(i in 1:100)
    as.data.frame(fake.community)
}


###################################################
### benchmarking molding
###################################################
library("rbenchmark")
benchmark(with_molding(), without_molding(),
  replications = 10, 
  columns = c("test", "replications", "relative"))


###################################################
### a real example data list
###################################################
data("higgins")
summary(higgins)
dim(higgins)


###################################################
### introducing dlapply
###################################################
dlapply(higgins, 1, median)


###################################################
### simplifying the results of a dlapply using as.data.frame
###################################################
as.data.frame(dlapply(higgins, c(2, 3), median))


###################################################
### dlapply error
###################################################
dlapply(higgins[1:2], 3, quantile)


###################################################
### data lists are also lists
###################################################
is.list(higgins) && is.data.list(higgins)


###################################################
### using lapply with data lists
###################################################
lapply(higgins[c("abundance", "width", "trophic")], summary)


###################################################
### boxplot
###################################################
data("higgins")
boxplot(
  sqrt(abundance) ~ species, 
  as.data.frame(dims_to_vars(higgins)), 
  horizontal = TRUE, las = 1, xaxt = "n",
  xlab = "abundance (square-root scale)", ylab = "species")
tickmarks <- with(higgins, pretty(sqrt(abundance)))
axis(1, at = tickmarks, labels = tickmarks^2)


###################################################
### a faceted ggplot scatterplot from a data list
###################################################
data("higgins")
higgins <- dims_to_vars(higgins)
higgins$species <- with(higgins, 
  reorder(species, rank(life.history)))
higgins.df <- as.data.frame(higgins)
library("ggplot2")
library("arm")
p <- ggplot(higgins.df)
p <- p + facet_wrap( ~ species, ncol = 4)
p <- p + geom_point(aes(x = width, y = abundance, shape = life.history))
p <- p + stat_smooth(aes(x = width, y = abundance), se = FALSE, 
    method = "bayesglm", family = poisson, form = y ~ x + I(x^2),
    colour = "black", alpha = 0.4, geom = "line")
p <- p + scale_y_continuous(
  trans = "sqrt", 
  breaks = trans_breaks("sqrt", function(x) x^2))
print(p)


###################################################
### formula with hypothesised relationships among variables in higgins data list
###################################################
form <- abundance ~ -1 + life.history + (scale(width):life.history)
higgins.glm <- glm(
  form, family = poisson, 
  data = as.data.frame(higgins))
printCoefmat(summary(higgins.glm)$coefficients, signif.stars = FALSE)


###################################################
### plot observed versus expected graph (eval = FALSE)
###################################################
higgins[["fitted", shape = "abundance"]] <- 
  array(fitted.values(higgins.glm), dim(higgins))
ggplot(as.data.frame(higgins)) + 
  facet_wrap( ~ species, ncol = 4) + 
  geom_point(aes(x = fitted, y = abundance)) + 
  geom_abline(intercept = 0, slope = 1) + 
  scale_y_continuous(trans = "sqrt", 
    breaks = trans_breaks("sqrt", function(x) x^2)) + 
  scale_x_continuous(trans = "sqrt", 
    breaks = trans_breaks("sqrt", function(x) x^2))


###################################################
### store coefficients from the unrandomised data
###################################################
coef.obs <- coefficients(higgins.glm)[6:10]


###################################################
### allocate an array to store randomised coefficients
###################################################
B <- 500
coef.B <- array(0, c(B, length(coef.obs), 2))
dimnames(coef.B) <- 
list(1:B, names(coef.obs), c("species","seasons.rivers"))


###################################################
### create a mold for the higgins data list
###################################################
mold.higgins <- data.list.mold(higgins)


###################################################
### set the pseudo-random number generator seed for replicability
###################################################
set.seed(1)


###################################################
### loop over permutations
###################################################
higgins.tmp <- higgins
for(i in 1:B){
  higgins.tmp$abundance <- higgins$abundance[sample(24),,]
  df.tmp <- as.data.frame(higgins.tmp, mold = mold.higgins)
  coef.B[i, , 1] <- glm(form, family = poisson, df.tmp)$coefficients[6:10]
}


###################################################
### loop over permutations
###################################################
higgins.tmp <- higgins
for(i in 1:B){
  higgins.tmp$abundance <- higgins$abundance[,sample(4),sample(3)]
  df.tmp <- as.data.frame(higgins.tmp, mold = mold.higgins)
  coef.B[i, , 2] <- glm(form, family = poisson, df.tmp)$coefficients[6:10]
}


###################################################
### take the absolute values of the coefficients
###################################################
coef.B.abs <- abs(coef.B)
coef.obs.abs <- abs(coef.obs)


###################################################
### calculate pvalues
###################################################
pvalues <- apply(sweep(coef.B.abs, 2, coef.obs.abs, ">="), c(2,3), mean)
round(cbind(coef.obs, pvalues), 3)


###################################################
### melt and recast higgins to collapse dimensions of replication
###################################################
data("higgins")
higgins.melt <- dlmelt(higgins)

higgins.melt$species.seasons.rivers <- 
  within(higgins.melt$species.seasons.rivers, {
    seasons.rivers <- interaction(seasons, rivers, drop = TRUE)
    seasons <- rivers <- NULL
})
higgins.melt$seasons.rivers <- 
  within(higgins.melt$seasons.rivers, 
    seasons.rivers <- interaction(seasons, rivers, drop = TRUE)
)

higgins <- dlcast(higgins.melt)


###################################################
### summarise higgins with collapsed dimensions
###################################################
summary(higgins)


###################################################
### analysis of dissimilarities
###################################################
library("vegan")
dl.adonis <- with(higgins, adonis(
  t(abundance) ~ width + temp + depth + velocity + substrate + habitat, 
  strata = higgins$species
))
print(dl.adonis$aov.tab, signif.stars = FALSE)