## packages library("smacof") library("anacor") library("tm") library("slam") library("proxy") library("wordcloud") library("topicmodels") load("jancorp.rda") ## word cloud set.seed(123) wordcloud(jancorp, colors = brewer.pal(8, "Dark2"), min.freq = 10) ## dtm load("jancorp.rda") dtm <- DocumentTermMatrix(jancorp) ## tfidf tfidf <- tapply(dtm$v/row_sums(dtm)[dtm$i], dtm$j, mean) * log2(nDocs(dtm)/col_sums(dtm > 0)) cut <- median(tfidf) dtm2 <- dtm[, tfidf >= cut] ## uniscale delta <- proxy::dist(as.matrix(dtm2), method = "cosine") fit1D <- uniscale(delta) fit1D plot(fit1D$conf, rep(0, length(fit1D$conf)), axes = FALSE, ann = FALSE, pch = 19, type = "o", ylim = c(-0.2, 0.11)) text(fit1D$conf, rep(0, length(fit1D$conf)) + 0.02, names(fit1D$conf), srt = 90, adj = c(0, 0.5)) ## topicmodels cut <- quantile(tfidf, probs = 0.95) dtm2 <- dtm[, tfidf >= cut] SEED <- 123 K <- 5 fitTop <- LDA(dtm2, k = K, control = list(seed = SEED)) toptop <- terms(fitTop, 5) toptop ## simple CA fitca <- anacor(as.matrix(dtm2), ellipse = FALSE) topvec <- as.vector(toptop) cawords <- rownames(fitca$col.scores) ind <- cawords %in% topvec cpoints <- fitca$col.scores[ind, ] textplot(cpoints[, 1], cpoints[, 2], rownames(cpoints), new = TRUE, cex = 0.8, col = "coral", asp = 1, xlab = "Dimension 1", ylab = "Dimension 2", main = "CA Topic Map") points(fitca$row.scores, pch = 20, cex = 0.5) text(fitca$row.scores, labels = rownames(fitca$row.scores), pos = 3, cex = 0.8) abline(h = 0, col = "gray", lty = 2) abline(v = 0, col = "gray", lty = 2) ## JSS time series load("JSS.rda") library("colorspace") matplot(1996:2015, datM, type = "b", lty = 1, lwd = 2, col = heat_hcl(4, l = c(50, 70)), pch = 15:18, xaxp = c(1999, 2014, 5), xlab = "Year", ylab = "Publications") legend(1998, 85, c("Articles", "Code Snippets", "Book Reviews", "Software Reviews"), lwd = 2, col = heat_hcl(4, l = c(50, 70)), pch = 15:18) matplot(1999:2014, datJM, type = "b", lty = 1, pch = 15:17, lwd = 2, xaxp = c(1999, 2014, 5), xlab = "Year", ylab = "Impact Factor", col = heat_hcl(3, l = c(50, 70))) legend(2000, 6, c("SNIP", "IPP", "SJR"), lwd = 2, col = heat_hcl(3, l = c(50, 70)), pch = 15:17) library("zoo") var <- load("counts.rda") main <- sprintf("Daily Statistics of Full Text Downloads\nOverall Mean Value: %d/day", round(mean(res$galley), 0)) par(mar = c(2, 5, 4, 1)) plot(res$galley, main = main, xlab = NA, ylab = "Download Counts per Day") abline(h = mean(res$galley), col = 2, lty = 2) idx <- which(res$galley == max(res$galley))[1] points(index(res)[idx], res$galley[idx], pch = 19, col = 2) text(index(res)[idx], res$galley[idx], res$galley[idx], pos = 4, col = 2) idx <- which(res$galley == min(res$galley))[1] points(index(res)[idx], res$galley[idx], pch = 19, col = 4) text(index(res)[idx], res$galley[idx], res$galley[idx], pos = 4, col = 4)