# all examples are run using the option: options(digits = 4) # Code for examples in Section 5.1 # Descriptive statistics # the packages needed library("ICS") library("mvtnorm") # creating the data and the ics set.seed(2) X <- rmvnorm(1000, c(0, 0, 1)) ics.X <- ics(X, stdKurt = FALSE) Z <- ics.components(ics.X) # the four summary statistics colMeans(X) cov(X) mean3(Z) - colMeans(Z) (dim(X)[2] + 2) * (ics.X@gKurt - 1) # Code for examples in Section 5.2 # Diagnostic plots for the modified wood data set # the packages needed library("MASS") library("ICS") # Outlier identification in the modified wood data set data("wood", package = "robustbase") # regular and robust mahalanobis distances maha1.wood <- sqrt(mahalanobis(wood, colMeans(wood), cov(wood))) set.seed(1) covmve.wood <- cov.rob(wood) maha2.wood <- sqrt(mahalanobis(wood, covmve.wood$center, covmve.wood$cov)) # Mahalanaobis distance plots max.maha.wood <- max(c(maha1.wood, maha2.wood)) out.id = ifelse(maha2.wood <= sqrt(qchisq(0.975, 6)), 0, 1) par(mfrow = c(1,2),las=1, mar=c(5,4,1,1)+0.1) plot(maha1.wood, xlab = "index" ,ylab = "Mahalanobis distance", ylim = c(0, max.maha.wood) , col = out.id + 1, pch = 15 * out.id + 1) abline(h = sqrt(qchisq(0.975, 6))) plot(maha2.wood, xlab = "index", ylab = "robust Mahalanobis distance", ylim = c(0, max.maha.wood) , col = out.id + 1, pch = 15 * out.id + 1) abline(h = sqrt(qchisq(0.975, 6))) par(mfrow = c(1,1)) # Distance - distance plot plot(maha1.wood, maha2.wood, xlab = "regular Mahalanobis distance", ylab = "robust Mahalanobis distance", ylim = c(0, max.maha.wood), xlim = c(0, max.maha.wood), col = out.id + 1, pch = 15 * out.id + 1, las = 1) abline(0, 1) # three different invariant coordinate systems library("ICSNP") my.HR.Mest <- function(X,...) HR.Mest(X,...)$scatter ics.default.wood <- ics(wood) ics.2.wood <- ics(wood, tM(wood)$V, tM(wood, 2)$V) ics.3.wood <- ics(wood,my.HR.Mest,HP1.shape) # plotting from all 3 ICS the last component par(mfrow=c(1,3),mar=c(5,4,1,1)+0.1) plot(ics.components(ics.default.wood)[,6], xlab= "index", ylab="IC 6", sub="ICS using cov and cov4" , col = out.id + 1, pch = 15 * out.id + 1) plot(ics.components(ics.2.wood)[,6], xlab= "index", ylab="IC 6", sub="ICS using tM(,1) and tM(,2)", col = out.id + 1, pch = 15 * out.id + 1) plot(ics.components(ics.3.wood)[,6], xlab= "index", ylab="IC 6", sub="ICS using HR.Mest and HP1.shape" , col = out.id + 1, pch = 15 * out.id + 1) par(mfrow=c(1,1),las=0) # Analysis of the Iris data set # packages needed library("ICS") library("MASS") data("iris") # creating and plotting an ICS for the Iris data set iris.ics<-ics(iris[,1:4]) plot(iris.ics, col = as.numeric(iris[,5])) # plotting the prinicipal components of the Iris data set. pairs(princomp(iris[,1:4])$scores, col = as.numeric(iris[,5])) # computing the within group "covariance" matrix p <- dim(iris[ ,1:4])[2] n <- dim(iris[ ,1:4])[1] ngroup <- aggregate(iris$Species, list(iris$Species), length)$x colMeans.iris <- colMeans(iris[ ,1:4]) colMeans.iris.groups <- by(iris[ , 1:4], iris$Species, colMeans) colMeans.iris.diffs <- sapply(colMeans.iris.groups,"-", colMeans.iris, simplify = FALSE) matrix.iris <- sapply(colMeans.iris.diffs,tcrossprod, simplify = FALSE) freq <- rep(ngroup, each = p^2) matrix.iris <- array(unlist(matrix.iris), dim = c(p, p, nlevels(iris$Species))) cov.within <- rowSums(matrix.iris * freq, dims = 2) /n # creating and plotting an ICS using the within group covariance matrix for the Iris data set ics.iris.disc <- ics(iris[,1:4], cov(iris[,1:4]), cov.within) plot(ics.iris.disc, col = as.numeric(iris$Species)) # a density plot for the last component of the ICS with different rugs for the different species iris.z <- ics.components(iris.ics) plot(density(iris.z[,4], bw = 0.15), las = 1, main = "Kernel Density of 4th component") rug(iris.z[1:50, 4], col = 1) rug(iris.z[51:100, 4], col = 2) rug(iris.z[101:150, 4], col = 3, ticksize = -0.03) # discriminat analysis for the Iris data set # selecting a training sample set.seed(4321) train <- sample(1:150, 120) # regular discriminat analysis using the data as it is lda.iris <- lda(Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width, prior = c(1, 1, 1)/3, data = iris, subset = train) table(iris[-train, 5], predict(lda.iris, iris[-train, ])$class) # discriminat analysis based on the last component of the ICS ics.iris <- ics(as.matrix(iris[train, 1:4])) iris.comp4 <- (ics.components(ics.iris))[,4] lda.ics.iris <- lda(iris$Species[train] ~ iris.comp4, prior = c(1, 1, 1)/3) iris.comp4.pred <- (as.matrix(iris[-train, 1:4]) %*% t(coef(ics.iris)))[,4] table(iris[-train, 5], predict(lda.ics.iris, data.frame(iris.comp4 = iris.comp4.pred))$class) # PAA example for the Iris data iris.centered <- sweep(iris[,1:4], 2, colMeans(iris[,1:4]), "-") iris.paa <- ics(iris.centered, cov, covAxis, stdKurt = FALSE) emp.align <- iris.paa@gKurt mean(emp.align) emp.align # plot for deciding the number of components screeplot(iris.paa, las = 1) abline(h = 1) # Code for examples in Section 5.3 # ICA # packages needed library("ICS") library("pixmap") # loading the figures fig1 <- read.pnm(system.file("pictures/cat.pgm", package = "ICS")[1]) fig2 <- read.pnm(system.file("pictures/road.pgm", package ="ICS")[1]) fig3 <- read.pnm(system.file("pictures/sheep.pgm", package = "ICS")[1]) # transforming the figures into vectors p <- dim(fig1@grey)[2] X <- cbind(as.vector(fig1@grey), as.vector(fig2@grey), as.vector(fig3@grey)) # mixing the vectorized figures set.seed(4321) A <- matrix(rnorm(9), ncol = 3) X.mixed <- X %*% t(A) # ICA to recover the figures ICA.fig <- ics(X.mixed, stdB="B") # plotting the original figures, the mixed figures and the recovered figures par(mfrow = c(3, 3), omi = rep(0.1, 4), mai = rep(0.1, 4)) plot(fig1) plot(fig2) plot(fig3) plot(pixmapGrey(X.mixed[,1], ncol = p)) plot(pixmapGrey(X.mixed[,2], ncol = p)) plot(pixmapGrey(X.mixed[,3], ncol = p)) plot(pixmapGrey(ics.components(ICA.fig)[,1], ncol = p)) plot(pixmapGrey(ics.components(ICA.fig)[,2], ncol = p)) plot(pixmapGrey(ics.components(ICA.fig)[,3], ncol = p)) # Code for examples in Section 5.4 # Multivariate Nonparametrics # packages needed library("ICS") library("mvtnorm") library("ICSNP") # creating random sample and transformed sample set.seed(2000) X <- rmvnorm(150, c(1, 2,-1)) A <- matrix(rnorm(9), ncol = 3) b <- c(1, 1, 1) X.trans <- sweep(X %*% t(A), 1, b, "+") # Function for Hodges-Lehmann (HL) estimator HL.estimator <- function(x) {wilcox.test(x, exact = T, conf.int = T)$estimate} # Comparing HL estimator of original and transformed data set HLE.X <- apply(X, 2, HL.estimator) as.vector(HLE.X %*% t(A) + b) apply(X.trans, 2, HL.estimator) # Comapring the HL estimator of original and transformed data set when based on ICS ics.X <- ics(X, S1 = cov, S2 = tyler.shape) HL.ics.Z1 <- apply(ics.components(ics.X), 2, HL.estimator) HL.ics.X <- as.vector(HL.ics.Z1 %*% t(solve(coef(ics.X)))) ics.X.trans <- ics(X.trans, S1 = cov, S2 = tyler.shape) HL.ics.Z2 <- apply(ics.components(ics.X.trans), 2, HL.estimator) HL.ics.X.trans <- as.vector(HL.ics.Z2 %*% t(solve(coef(ics.X.trans)))) as.vector(HL.ics.X %*% t(A) +b) HL.ics.X.trans # random sample for the testing example set.seed(1979) Y <- rmvt(60, diag(4), df=6) + matrix(rep(c(0, 0.48), c(3*60, 60)), ncol=4) A2 <- matrix(rnorm(16), ncol = 4) # testing in the original sample rank.ctest(Y, scores = "normal") # testing in the transformed sample rank.ctest((Y %*% t(A2)), scores = "normal") # testing in the original data based on ICS Z.Y <- as.matrix(ics.components(ics(Y, S1 = covOrigin, S2 = cov4, S2args = list(location = "Origin")))) rank.ctest(Z.Y, scores = "normal") # testing in the transformed data based on ICS Z.Y.trans <- as.matrix(ics.components(ics(Y %*% t(A2), S1 = covOrigin, S2 = cov4, S2args = list(location = "Origin")))) rank.ctest(Z.Y.trans , scores = "normal")