# all examples are run using the option:
options(digits = 4)

# Code for examples in Section 5.1
# Descriptive statistics

# the packages needed
library("ICS")
library("mvtnorm")

# creating the data and the ics
set.seed(2)
X <- rmvnorm(1000, c(0, 0, 1))
ics.X <- ics(X, stdKurt = FALSE)
Z <- ics.components(ics.X)

# the four summary statistics
colMeans(X)
cov(X)
mean3(Z) - colMeans(Z)
(dim(X)[2] + 2) * (ics.X@gKurt - 1)

# Code for examples in Section 5.2
# Diagnostic plots for the modified wood data set

# the packages needed
library("MASS")
library("ICS")

# Outlier identification in the modified wood data set
data("wood", package = "robustbase")

# regular and robust mahalanobis distances
maha1.wood <- sqrt(mahalanobis(wood, colMeans(wood), cov(wood)))
set.seed(1)
covmve.wood <- cov.rob(wood)
maha2.wood <- sqrt(mahalanobis(wood, covmve.wood$center, covmve.wood$cov))

# Mahalanaobis distance plots
max.maha.wood <- max(c(maha1.wood, maha2.wood))
out.id = ifelse(maha2.wood <= sqrt(qchisq(0.975, 6)), 0, 1)
par(mfrow = c(1,2),las=1, mar=c(5,4,1,1)+0.1)
plot(maha1.wood, xlab = "index" ,ylab = "Mahalanobis distance", ylim = c(0, max.maha.wood)
, col = out.id + 1, pch = 15 * out.id + 1)
abline(h = sqrt(qchisq(0.975, 6)))
plot(maha2.wood, xlab = "index", ylab = "robust Mahalanobis distance", ylim = c(0, max.maha.wood)
, col = out.id + 1, pch = 15 * out.id + 1)
abline(h = sqrt(qchisq(0.975, 6)))
par(mfrow = c(1,1))

# Distance - distance plot
plot(maha1.wood, maha2.wood, xlab = "regular Mahalanobis distance", ylab = "robust Mahalanobis distance", 
ylim = c(0, max.maha.wood), xlim = c(0, max.maha.wood), col = out.id + 1, pch = 15 * out.id + 1, las = 1)
abline(0, 1)

# three different invariant coordinate systems
library("ICSNP")
my.HR.Mest <- function(X,...) HR.Mest(X,...)$scatter
ics.default.wood <- ics(wood)
ics.2.wood <- ics(wood, tM(wood)$V, tM(wood, 2)$V)
ics.3.wood <- ics(wood,my.HR.Mest,HP1.shape)

# plotting from all 3 ICS the last component
par(mfrow=c(1,3),mar=c(5,4,1,1)+0.1)
plot(ics.components(ics.default.wood)[,6], xlab= "index", ylab="IC 6", 
sub="ICS using cov and cov4" , col = out.id + 1, pch = 15 * out.id + 1)
plot(ics.components(ics.2.wood)[,6], xlab= "index", ylab="IC 6", 
sub="ICS using tM(,1) and tM(,2)", col = out.id + 1, pch = 15 * out.id + 1)
plot(ics.components(ics.3.wood)[,6], xlab= "index", ylab="IC 6", 
sub="ICS using HR.Mest and HP1.shape" , col = out.id + 1, pch = 15 * out.id + 1)
par(mfrow=c(1,1),las=0)


# Analysis of the Iris data set

# packages needed
library("ICS")
library("MASS")
data("iris")

# creating and plotting an ICS for the Iris data set
iris.ics<-ics(iris[,1:4])
plot(iris.ics, col = as.numeric(iris[,5]))

# plotting the prinicipal components of the Iris data set.
pairs(princomp(iris[,1:4])$scores, col = as.numeric(iris[,5]))

# computing the within group "covariance" matrix
p <- dim(iris[ ,1:4])[2]
n <- dim(iris[ ,1:4])[1]
ngroup <- aggregate(iris$Species, list(iris$Species), length)$x
colMeans.iris <- colMeans(iris[ ,1:4])
colMeans.iris.groups <- by(iris[ , 1:4], iris$Species, colMeans)
colMeans.iris.diffs <- sapply(colMeans.iris.groups,"-",
colMeans.iris, simplify = FALSE)
matrix.iris <- sapply(colMeans.iris.diffs,tcrossprod,
simplify = FALSE)
freq <- rep(ngroup, each = p^2)
matrix.iris <- array(unlist(matrix.iris),
dim = c(p, p, nlevels(iris$Species)))
cov.within <- rowSums(matrix.iris * freq, dims = 2) /n

# creating and plotting an ICS using the within group covariance matrix for the Iris data set
ics.iris.disc <- ics(iris[,1:4], cov(iris[,1:4]), cov.within)
plot(ics.iris.disc, col = as.numeric(iris$Species))

# a density plot for the last component of the ICS with different rugs for the different species
iris.z <- ics.components(iris.ics)
plot(density(iris.z[,4], bw = 0.15), las = 1,
main = "Kernel Density of 4th component")
rug(iris.z[1:50, 4], col = 1)
rug(iris.z[51:100, 4], col = 2)
rug(iris.z[101:150, 4], col = 3, ticksize = -0.03)

# discriminat analysis for the Iris data set

# selecting a training sample
set.seed(4321)
train <- sample(1:150, 120)

# regular discriminat analysis using the data as it is
lda.iris <- lda(Species ~ Sepal.Length + Sepal.Width + Petal.Length +
Petal.Width, prior = c(1, 1, 1)/3, data = iris, subset = train)
table(iris[-train, 5], predict(lda.iris, iris[-train, ])$class)

# discriminat analysis based on the last component of the ICS
ics.iris <- ics(as.matrix(iris[train, 1:4]))
iris.comp4 <- (ics.components(ics.iris))[,4]
lda.ics.iris <- lda(iris$Species[train] ~ iris.comp4, prior = c(1, 1, 1)/3)
iris.comp4.pred <- (as.matrix(iris[-train, 1:4]) %*% t(coef(ics.iris)))[,4]
table(iris[-train, 5], predict(lda.ics.iris, data.frame(iris.comp4 =
iris.comp4.pred))$class)

# PAA example for the Iris data
iris.centered <- sweep(iris[,1:4], 2, colMeans(iris[,1:4]), "-")
iris.paa <- ics(iris.centered, cov, covAxis, stdKurt = FALSE)
emp.align <- iris.paa@gKurt
mean(emp.align)
emp.align

# plot for deciding the number of components
screeplot(iris.paa, las = 1)
abline(h = 1)


# Code for examples in Section 5.3
# ICA

# packages needed
library("ICS")
library("pixmap")

# loading the figures
fig1 <- read.pnm(system.file("pictures/cat.pgm", package = "ICS")[1])
fig2 <- read.pnm(system.file("pictures/road.pgm", package ="ICS")[1])
fig3 <- read.pnm(system.file("pictures/sheep.pgm", package = "ICS")[1])

# transforming the figures into vectors
p <- dim(fig1@grey)[2]
X <- cbind(as.vector(fig1@grey), as.vector(fig2@grey), as.vector(fig3@grey))

# mixing the vectorized figures
set.seed(4321)
A <- matrix(rnorm(9), ncol = 3)
X.mixed <- X %*% t(A)

# ICA to recover the figures
ICA.fig <- ics(X.mixed, stdB="B")

# plotting the original figures, the mixed figures and the recovered figures
par(mfrow = c(3, 3), omi = rep(0.1, 4), mai = rep(0.1, 4))
plot(fig1)
plot(fig2)
plot(fig3)
plot(pixmapGrey(X.mixed[,1], ncol = p))
plot(pixmapGrey(X.mixed[,2], ncol = p))
plot(pixmapGrey(X.mixed[,3], ncol = p))
plot(pixmapGrey(ics.components(ICA.fig)[,1], ncol = p))
plot(pixmapGrey(ics.components(ICA.fig)[,2], ncol = p))
plot(pixmapGrey(ics.components(ICA.fig)[,3], ncol = p))



# Code for examples in Section 5.4
# Multivariate Nonparametrics

# packages needed
library("ICS")
library("mvtnorm")
library("ICSNP")

# creating random sample and transformed sample
set.seed(2000)
X <- rmvnorm(150, c(1, 2,-1))
A <- matrix(rnorm(9), ncol = 3)
b <- c(1, 1, 1)
X.trans <- sweep(X %*% t(A), 1, b, "+")

# Function for Hodges-Lehmann (HL) estimator
HL.estimator <- function(x)
    {wilcox.test(x, exact = T, conf.int = T)$estimate}

# Comparing HL estimator of original and transformed data set
HLE.X <- apply(X, 2, HL.estimator)
as.vector(HLE.X %*% t(A) + b)
apply(X.trans, 2, HL.estimator)

# Comapring the HL estimator of original and transformed data set when based on ICS
ics.X <- ics(X, S1 = cov, S2 = tyler.shape)
HL.ics.Z1 <- apply(ics.components(ics.X), 2, HL.estimator)
HL.ics.X <- as.vector(HL.ics.Z1 %*% t(solve(coef(ics.X))))
ics.X.trans <- ics(X.trans, S1 = cov, S2 = tyler.shape)
HL.ics.Z2 <- apply(ics.components(ics.X.trans), 2, HL.estimator)
HL.ics.X.trans <- as.vector(HL.ics.Z2 %*% t(solve(coef(ics.X.trans))))
as.vector(HL.ics.X %*% t(A) +b)
HL.ics.X.trans

# random sample for the testing example
set.seed(1979)
Y <- rmvt(60, diag(4), df=6) + matrix(rep(c(0, 0.48), c(3*60, 60)), ncol=4)
A2 <- matrix(rnorm(16), ncol = 4)

# testing in the original sample 
rank.ctest(Y, scores = "normal")

# testing in the transformed sample
rank.ctest((Y %*% t(A2)), scores = "normal")

# testing in the original data based on ICS
Z.Y <- as.matrix(ics.components(ics(Y, S1 = covOrigin, S2 = cov4,
S2args = list(location = "Origin"))))
rank.ctest(Z.Y, scores = "normal")

# testing in the transformed data based on ICS
Z.Y.trans <- as.matrix(ics.components(ics(Y %*% t(A2), S1 = covOrigin,
S2 = cov4, S2args = list(location = "Origin"))))
rank.ctest(Z.Y.trans , scores = "normal")