## packages
library("smacof")
library("anacor")
library("tm")
library("slam")
library("proxy")
library("wordcloud")
library("topicmodels")
load("jancorp.rda")

## word cloud
set.seed(123)
wordcloud(jancorp, colors = brewer.pal(8, "Dark2"), min.freq = 10)

## dtm
load("jancorp.rda")
dtm <- DocumentTermMatrix(jancorp)

## tfidf
tfidf <- tapply(dtm$v/row_sums(dtm)[dtm$i], dtm$j, mean) * log2(nDocs(dtm)/col_sums(dtm >  0))
cut <- median(tfidf)
dtm2 <- dtm[, tfidf >= cut]

## uniscale
delta <- proxy::dist(as.matrix(dtm2), method = "cosine")
fit1D <- uniscale(delta)
fit1D

plot(fit1D$conf, rep(0, length(fit1D$conf)), axes = FALSE, ann = FALSE, pch = 19, 
  type = "o", ylim = c(-0.2, 0.11))
text(fit1D$conf, rep(0, length(fit1D$conf)) + 0.02, names(fit1D$conf), srt = 90, 
  adj = c(0, 0.5))

## topicmodels
cut <- quantile(tfidf, probs = 0.95)
dtm2 <- dtm[, tfidf >= cut]
SEED <- 123
K <- 5
fitTop <- LDA(dtm2, k = K, control = list(seed = SEED))
toptop <- terms(fitTop, 5)
toptop

## simple CA
fitca <- anacor(as.matrix(dtm2), ellipse = FALSE)

topvec <- as.vector(toptop)
cawords <- rownames(fitca$col.scores)
ind <- cawords %in% topvec
cpoints <- fitca$col.scores[ind, ]
textplot(cpoints[, 1], cpoints[, 2], rownames(cpoints), new = TRUE, cex = 0.8, col = "coral", 
  asp = 1, xlab = "Dimension 1", ylab = "Dimension 2", main = "CA Topic Map")
points(fitca$row.scores, pch = 20, cex = 0.5)
text(fitca$row.scores, labels = rownames(fitca$row.scores), pos = 3, cex = 0.8)
abline(h = 0, col = "gray", lty = 2)
abline(v = 0, col = "gray", lty = 2)

## JSS time series
load("JSS.rda")
library("colorspace")
matplot(1996:2015, datM, type = "b", lty = 1, lwd = 2, col = heat_hcl(4, l = c(50, 
  70)), pch = 15:18, xaxp = c(1999, 2014, 5), xlab = "Year", ylab = "Publications")

legend(1998, 85, c("Articles", "Code Snippets", "Book Reviews", "Software Reviews"), 
  lwd = 2, col = heat_hcl(4, l = c(50, 70)), pch = 15:18)

matplot(1999:2014, datJM, type = "b", lty = 1, pch = 15:17, lwd = 2, xaxp = c(1999, 
  2014, 5), xlab = "Year", ylab = "Impact Factor", col = heat_hcl(3, l = c(50, 
  70)))
legend(2000, 6, c("SNIP", "IPP", "SJR"), lwd = 2, col = heat_hcl(3, l = c(50, 70)), 
  pch = 15:17)

library("zoo")
var <- load("counts.rda")
main <- sprintf("Daily Statistics of Full Text Downloads\nOverall Mean Value: %d/day", 
  round(mean(res$galley), 0))
par(mar = c(2, 5, 4, 1))
plot(res$galley, main = main, xlab = NA, ylab = "Download Counts per Day")
abline(h = mean(res$galley), col = 2, lty = 2)
idx <- which(res$galley == max(res$galley))[1]
points(index(res)[idx], res$galley[idx], pch = 19, col = 2)
text(index(res)[idx], res$galley[idx], res$galley[idx], pos = 4, col = 2)
idx <- which(res$galley == min(res$galley))[1]
points(index(res)[idx], res$galley[idx], pch = 19, col = 4)
text(index(res)[idx], res$galley[idx], res$galley[idx], pos = 4, col = 4)