### R code from vignette source 'article/jss4586.Rnw' ################################################### ### code chunk number 1: preliminaries ################################################### options(prompt = "R> ", continue = "+ ", width = 70, useFancyQuotes = FALSE) # JSS code formatting # Formatting Sys.setenv(LANG = 'en') # RNG initialization set.seed(42) ## Set colors for bids and offer that will be used throughout bidColor <- "green" offerColor <- "blue" # Required packages library("highfrequency") library("xts") library("data.table") library("scales") options("scipen" = 1, "digits" = 3, "digits.secs" = 3) ################################################### ### code chunk number 2: jss4586.Rnw:182-183 ################################################### options("digits" = 5) ################################################### ### code chunk number 3: show_sample_trades ################################################### data("sampleTDataRaw", package = "highfrequency") print(sampleTDataRaw, topn = 4) ################################################### ### code chunk number 4: show_sample_quotes ################################################### data("sampleQDataRaw", package = "highfrequency") print(exchangeHoursOnly(sampleQDataRaw), topn = 4) ################################################### ### code chunk number 5: uncleanedTradesAndQuotes ################################################### rawDat <- sampleTDataRaw[as.Date(DT, tz = "EST") == "2018-01-03"] rawQDat <- sampleQDataRaw[as.Date(DT, tz = "EST") == "2018-01-03"] rawQDat <- exchangeHoursOnly(rawQDat) rawDat <- exchangeHoursOnly(rawDat) xLim <- range(as.POSIXct("2018-01-03 11:00:00", tz = "EST"), as.POSIXct("2018-01-03 12:00:00", tz = "EST")) par(mgp = c(3,1,0), mai = c(0.4, 0.6, 0.35, 0.35)) yLim <- range(c(rawDat[DT %between% xLim]$PRICE, rawQDat[DT %between% xLim & BID!=0 & OFR != 0][, list(BID, OFR)])) rawDat <- rawDat[DT %between% xLim] rawQDat <- rawQDat[DT %between% xLim] rawQDat <- rawQDat[BID != 0 & OFR != 0] rawQDat <- rawQDat[BID < 155 | OFR > 157] plot(rawQDat$DT, rawQDat$BID, ylim = yLim, main = "", ylab = "Price", xlab = "Time", xlim = xLim, xaxs = "i", lwd = 1, pch = 19, xaxt='n', las = 2) points(rawQDat$DT, rawQDat$OFR, ylim = yLim, xlim = xLim, pch = 19, col = alpha("black", 0.8)) lines(rawDat$DT, rawDat$PRICE, xlim = xLim, col = "red", lwd = 2) axis(1, at = seq(xLim[1], xLim[2], by = 60*10), labels = format(seq(xLim[1], xLim[2], by = 60*10), format = "%H:%M")) ################################################### ### code chunk number 6: uncleaned2 ################################################### cleanedDat <- tradesCleanupUsingQuotes(tData = tradesCleanup(tDataRaw = sampleTDataRaw,exchange = "N", report = FALSE, printExchange = FALSE), qData = quotesCleanup(qDataRaw = sampleQDataRaw, exchange = "N", report = FALSE, printExchange = FALSE))[as.Date(DT) == "2018-01-03"] par(mfrow = c(2, 1), mai = c(0.4, 0.6, 0.35, 0.35), mgp = c(3,0.5,0)) xLim <- range(as.POSIXct("2018-01-03 09:30:00", tz = "EST"), as.POSIXct("2018-01-03 16:00:00", tz = "EST")) rawDat <- sampleTDataRaw[as.Date(DT, tz = "EST") == "2018-01-03"] rawDat <- exchangeHoursOnly(rawDat) ## Top plot: plot(0, 0, main = "", ylab = "price", xlab = "Time", xlim = xLim, xaxs = "i", type = "l", xaxt = "n", ylim = range(rawDat$PRICE), las = 2) xLim2 <- range(as.POSIXct(c("2018-01-03 11:45:00 EST", "2018-01-03 12:00:00 EST"), tz = "EST")) rect(xLim2[1], min(rawDat$PRICE), xLim2[2], max(rawDat$PRICE), border = FALSE, col = "darkgray") lines(rawDat$DT, rawDat$PRICE, xlim = xLim, type = "l") lines(cleanedDat$DT, cleanedDat$PRICE, col = "red") legend(xLim[2]-8000, 159, legend = c("raw","cleaned"), lty = c(1, 1), col = c("black", "red"), pt.cex = 2, box.lwd = 0, bty="n") axis(1, at = seq(xLim[1], xLim[2], by = 60*30)[c(1,2,4,6,8,10,12,14)], labels = format(seq(xLim[1], xLim[2], by = 60*30)[c(1,2,4,6,8,10,12,14)], format = "%H:%M"), cex = 0.9) ## Bottom plot: plotTQData(cleanedDat, sampleQData, xLim2, main = "", cex = 0.5, las = 2, xlab = "time", ylab = "price") ################################################### ### code chunk number 7: jss4586.Rnw:287-300 ################################################### sampleQData <- quotesCleanup(qDataRaw = sampleQDataRaw, exchanges = "N", type = "standard", report = FALSE) tradesAfterFirstCleaning <- tradesCleanup(tDataRaw = sampleTDataRaw, exchanges = "N", report = FALSE) sampleTData <- tradesCleanupUsingQuotes(tData = tradesAfterFirstCleaning, qData = sampleQData, lagQuotes = 0)[, c("DT", "EX", "SYMBOL", "PRICE", "SIZE")] print(sampleTData, topn = 4) ################################################### ### code chunk number 8: agg ################################################### agg <- aggregateTrades(sampleTData[, list(DT, PRICE, SIZE, SYMBOL)], marketOpen = "09:30:00", marketClose = "16:00:00", alignBy = "minutes", alignPeriod = 5) print(agg, digits = 6, topn = 4) ################################################### ### code chunk number 9: jss4586.Rnw:314-315 ################################################### options("digits" = 3) ################################################### ### code chunk number 10: refreshTime ################################################### start = as.POSIXct("1970-01-01", tz = "GMT") ta = start + c(0.1, 1.2, 4, 5, 9.2, 14) tb = start + c(0, 3.25, 6, 6.5, 6.9, 7.3, 7.7, 11, 15) tc = start + c(0.7, 1.3, 2, 5, 7, 8.3, 10, 13) a = as.xts(1:length(ta), order.by = ta) b = as.xts(1:length(tb), order.by = tb) c = as.xts(1:length(tc), order.by = tc) refreshTime(list("a" = a, "b" = b, "c" = c)) par(mfrow = c(2, 1), mai = c(0, 0.3, 0.2, 0), mar = c(2.5, 3.5, 1.5, 0)) plot(x = seq(range(ta,tb,tc)[1], range(ta,tb,tc)[2],length.out = max(length(ta), length(tb), length(tc))), y = seq(1,4,length.out = max(length(ta), length(tb), length(tc))), col = 0, ylab = "", xlab = "", axes = FALSE, main = "", las = 2) title(xlab = "time", mgp = c(1.25,0,0)) axis(1, at = seq(1,15, 2), mgp = c(0,0.5,0)) axis(2, at = c(1.5, 2.5, 3.5), labels = paste("asset", c("A", "B", "C")), las = 2, mgp = c(0,0.5,0)) box() lines(c(ta[1], ta[1]), c(1,2), col = 1, lty = 2) lines(c(tb[1], tb[1]), c(2,3), col = "red", lty = 2) lines(c(tc[1], tc[1]), c(3,4), col = "blue", lty = 2) points(ta[1], c(1.5), pch = "1", cex = 2) points(tb[1], c(2.5), pch = "1", cex = 2) points(tc[1], c(3.5), pch = "1", cex = 2) idx <- 2 for (i in ta[-1]) { lines(c(i, i), c(1,2), col = 1, lty = 2) points(c(i), c(1.5), col = 1, pch = paste(as.numeric(a[idx])), cex = 2) idx <- idx + 1 } idx <- 2 for (i in tb[-1]) { lines(c(i, i), c(2,3), col = "red", lty = 2) points(c(i), c(2.5), col = 1, pch = paste(as.numeric(b[idx])), cex = 2) idx <- idx + 1 } idx <- 2 for (i in tc[-1]) { lines(c(i, i), c(3,4), col = "blue", lty = 2) points(c(i), c(3.5), col = 1, pch = paste(as.numeric(c[idx])), cex = 2) idx <- idx + 1 } ## Add arrows: shape::Arrows(0.25, 1.5, 0.65, 1.5, code =2, arr.adj = 0, arr.type = "simple", col = "black") shape::Arrows(0.15, 2.5, 0.65, 2.5, code =2, arr.adj = 0, arr.type = "simple", col = "red") shape::Arrows(1.5, 1.5, 3, 1.5, code =2, arr.adj = 0, arr.type = "simple") shape::Arrows(2.25, 3.5, 3, 3.5, code =2, arr.adj = 0, arr.type = "simple", col = "blue") shape::Arrows(5.25, 1.5, 5.75, 1.5, code =2, arr.adj = 0, arr.type = "simple") shape::Arrows(11.25, 2.5, 13.75, 2.5, code =2, arr.adj = 0, arr.type = "simple", col = "red") shape::Arrows(5.25, 3.5, 5.75, 3.5, code =2, arr.adj = 0, arr.type = "simple", col = "blue") shape::Arrows(8.35, 3.5, 9.15, 3.5, code =2, arr.adj = 0, arr.type = "simple", col = "blue") shape::Arrows(7.85, 2.5, 9.15, 2.5, code =2, arr.adj = 0, arr.type = "simple", col = "red") shape::Arrows(13.25, 3.5, 13.75, 3.5, code =2, arr.adj = 0, arr.type = "simple", col = "blue") RT <- refreshTime(list("a" = a, "b" = b, "c" = c)) plot(x = seq(range(ta,tb,tc)[1], range(ta,tb,tc)[2],length.out = max(length(ta), length(tb), length(tc))), y = seq(1,4,length.out = max(length(ta), length(tb), length(tc))), col = 0, ylab = "", xlab = "", axes = FALSE, main = "", las = 2) title(xlab = "time", mgp = c(1.25,0,0)) axis(1, at = seq(1,15, 2), mgp = c(0,0.5,0)) axis(2, at = c(1.5, 2.5, 3.5), labels = paste("asset", c("A", "B", "C")), las = 2, mgp = c(0,0.5,0)) box() RTstart <- as.numeric(index(RT)[1]) lines(c(RTstart, RTstart), c(1,2), col = 1, lty = 2) lines(c(RTstart, RTstart), c(2,3), col = "red", lty = 2) lines(c(RTstart, RTstart), c(3,4), col = "blue", lty = 2) points(RTstart, c(1.5), pch = "1", cex = 2) points(RTstart, c(2.5), pch = "1", cex = 2) points(RTstart, c(3.5), pch = "1", cex = 2) idx <- 2 for (i in as.numeric(index(RT))[-1]) { lines(c(i, i), c(1,2), col = 1, lty = 2) points(c(i), c(1.5), col = 1, pch = paste(RT[idx,1]), cex = 2) lines(c(i, i), c(2,3), col = "red", lty = 2) points(c(i), c(2.5), col = 1, pch = paste(RT[idx,2]), cex = 2) lines(c(i, i), c(3,4), col = "blue", lty = 2) points(c(i), c(3.5), col = 1, pch = paste(RT[idx,3]), cex = 2) idx <- idx + 1 } ################################################### ### code chunk number 11: rvplot ################################################### data("sampleOneMinuteData", package = "highfrequency") par(mfrow = c(2,2), mai = c(0, 0, 0, 0), mar = c(3, 3, 1.1, 3.1)) oneMinute <- sampleOneMinuteData[as.Date(DT) > "2001-08-30"] ylim1 <- range(oneMinute$MARKET) ylim2 <- range(oneMinute$STOCK) for (date in unique(as.Date(oneMinute$DT))) { plot(oneMinute[as.Date(DT) == date, DT], oneMinute[as.Date(DT) == date, MARKET], type = "l", ylab = "Price", xlab = "hour:minute", xaxs = "i", main = as.Date(date, origin = "1970-01-01"), ylim = ylim1, axes = FALSE) axis(2, las = 2) box() par(new = TRUE) plot(oneMinute[as.Date(DT) == date, DT], oneMinute[as.Date(DT) == date, STOCK], type = "l", ylab = "Price", xlab = "hour:minute", xaxs = "i", axes = FALSE, col = "red", lty = 4, ylim = ylim2, las = 2) axis(4, col.axis = "red", las = 2) xLim <- range(as.POSIXct(paste(as.Date(date), "09:30:00"), tz = "UTC"), as.POSIXct(paste(as.Date(date), "16:00:00"), tz = "UTC")) axis(1, at = seq(xLim[1], xLim[2], by = 60*30)[c(1,2,4,6,8,10,12,14)], labels = format(seq(xLim[1], xLim[2], by = 60*30)[c(1,2,4,6,8,10,12,14)], format = "%H:%M"), cex = 0.9) } ################################################### ### code chunk number 12: RV1 ################################################### oneMinute <- sampleOneMinuteData[as.Date(DT) > "2001-08-30"] RV1 <- rCov(oneMinute[, list(DT, MARKET)], makeReturns = TRUE, alignBy = "minutes", alignPeriod = 1) print(RV1) ################################################### ### code chunk number 13: BPV1 ################################################### BPV1 <- rBPCov(oneMinute[, list(DT, MARKET)], makeReturns = TRUE) print(BPV1) ################################################### ### code chunk number 14: volSiggnature ################################################### data("sampleMultiTradeData", package = "highfrequency") nums <- c(1, 2, 5, 10, 20, 30, 60, 90, 180, 360, 600) rvAgg <- matrix(nrow = length(nums), ncol = 2) for(i in 1:nrow(rvAgg)) { rvAgg[i, 1] <- rCov(sampleMultiTradeData[SYMBOL == 'AAA', list(DT, PRICE)], alignBy = "ticks", alignPeriod = nums[i], makeReturns = TRUE)[[2]] rvAgg[i, 2] <- rCov(sampleMultiTradeData[SYMBOL == 'AAA', list(DT, PRICE)], alignBy = "seconds", alignPeriod = nums[i], makeReturns = TRUE)[[2]] } ################################################### ### code chunk number 15: volSignaturePlot ################################################### par(mfrow = c(2,1), cex.axis = 0.8, mai = c(0,0, 0, 0), mar = c(3, 4, 1, 2)) plot(x = factor(nums), rep(0, length(nums)), type = "l", axes = FALSE, xlab = "", ylab = "", ylim = c(18,50)) lines(factor(nums), sqrt(rvAgg[,1] * 252) * 100) axis(1, at = factor(nums), labels = nums) axis(2, las = 2) title(xlab = "ticks", mgp = c(1.75,0,0)) box() plot(x = factor(nums), rep(0, length(nums)), type = "l", axes = FALSE, xlab = "", ylab = "", ylim = c(18,50)) lines(factor(nums), sqrt(rvAgg[,2] * 252) * 100) axis(1, at = factor(nums), labels = nums) title(xlab = "seconds", mgp = c(1.75,0,0)) axis(2, las = 2) box() ################################################### ### code chunk number 16: rKernelCov ################################################### listAvailableKernels() ################################################### ### code chunk number 17: rKernelCov1 ################################################### RK1 <- rKernelCov(oneMinute[, list(DT, MARKET)], kernelType = "Parzen", makeReturns = TRUE) print(RK1) ################################################### ### code chunk number 18: rCov ################################################### rCov(oneMinute, makeReturns = TRUE)[1:2] ################################################### ### code chunk number 19: jss4586.Rnw:678-684 ################################################### rMRCov( list("ETF" = sampleMultiTradeData[SYMBOL == "ETF", list(DT, PRICE)], "AAA" = sampleMultiTradeData[SYMBOL == "AAA", list(DT, PRICE)]), theta = 0.1) rCov(spreadPrices(sampleMultiTradeData[SYMBOL %chin% c("ETF", "AAA")]), alignBy = "ticks", alignPeriod = 1, makeReturns = TRUE) ################################################### ### code chunk number 20: ReMeDI ################################################### stockData <- sampleMultiTradeData[ SYMBOL == "AAA", list(DT, PRICE = log(PRICE))] kn <- knChooseReMeDI(stockData) remedi <- ReMeDI(stockData, kn = kn, lags = 0:15) remedi asympVar <- ReMeDIAsymptoticVariance( stockData, kn = kn, lags = 0:15, phi = 0.2, i = 2) asympVar ################################################### ### code chunk number 21: spotVol ################################################### parametric <- spotVol( data = sampleOneMinuteData[, list(DT, PRICE = MARKET)], periodicVol = "TML", P1 = 2, P2 = 2, alignPeriod = 1) nonParametric <- spotVol( data = sampleOneMinuteData[, list(DT, PRICE = MARKET)], periodicVol = "WSD", alignPeriod = 1) ################################################### ### code chunk number 22: spotVolPlot ################################################### plot.ts(cbind(as.numeric(parametric$periodic), as.numeric(nonParametric$periodic)), main = "", plot.type = "single", col = c("black", "red"), xaxt = "n", ylab = "diurnality effect", xaxs = "i", las = 2, axes = FALSE) labs <- format(index(parametric$periodic), "%H:%M") at <- 1:nrow(parametric$periodic) axis(2) axis(labels = labs[seq(1,length(labs), length.out = 7)], at = at[seq(1,length(labs), length.out = 7)], side = 1) legend(x = 150, y = 2.5, legend = c("parametric", "non-parametric"), col = c("black", "red"), lty = 1, box.lwd = 0) box() ################################################### ### code chunk number 23: jumpTest ################################################### daily <- BNSjumpTest( sampleOneMinuteData[, list(DT, MARKET)], makeReturns = TRUE) ################################################### ### code chunk number 24: jumpTest2 ################################################### pValues <- sapply(daily, function(x) x[["pvalue"]]) pValues[which.min(pValues)] ################################################### ### code chunk number 25: intraday ################################################### intraday <- intradayJumpTest( sampleOneMinuteData[as.Date(sampleOneMinuteData$DT) == "2001-08-26", list(DT, PRICE = MARKET)], makeReturns = TRUE, alignBy = "minutes", alignPeriod = 1) ################################################### ### code chunk number 26: jumpPlot ################################################### intraday <- intradayJumpTest( sampleOneMinuteData[as.Date(DT) == "2001-08-26", list(DT, PRICE = MARKET)], makeReturns = TRUE, alignBy = "minutes", alignPeriod = 1) par(mfrow = c(2, 1), mai = c(0.4, 0.4, 0.35, 0.2), mar = c(3, 3, 0.5, 1)) plot(x = as.Date(names(daily)) , y = abs(sapply(daily, function(x) x[["ztest"]])), ylab = "test statistic", xlab = "date", type = "l", col = "white", axes = FALSE) rect(xleft = as.Date(names(daily))[which.min(as.numeric(sapply(daily, function(x) x[["pvalue"]])))] - 0.5, xright = as.Date(names(daily))[which.min(sapply(daily, function(x) x[["pvalue"]]))] + 0.5, ybottom = min(abs(sapply(daily, function(x) x[["ztest"]]))), ytop = max(abs(sapply(daily, function(x) x[["ztest"]]))), col = "darkgrey", border = "darkgrey") lines(x = as.Date(names(daily)) , y = abs(sapply(daily, function(x) x[["ztest"]])), main ="", ylab = "test statistic", xlab = "date", type = "l") foo <- c(1,6,12, 16, 21) axis(1, at = as.Date(names(sapply(daily, function(x) x[["ztest"]])))[foo], labels = as.Date(names(sapply(daily, function(x) x[["ztest"]])))[foo]) axis(2, las = 2) box() plot(x = intraday$pData[[1]], intraday$pData[[2]], type = "l", col = "white", xlab = "time of day", ylab = "price", axes = FALSE) rect(xleft = index(intraday$ztest)[which(abs(intraday$ztest) > intraday$criticalValue)] - 45, xright = index(intraday$ztest)[which(abs(intraday$ztest) > intraday$criticalValue)] + 45, ybottom = min(intraday$pData[[2]]), ytop = max(intraday$pData[[2]]), col = 2, border = 2) lines(x = intraday$pData[[1]], intraday$pData[[2]], type = "l") xLim <- range(as.POSIXct("2001-08-26 9:30:00", tz = "UTC"), as.POSIXct("2001-08-26 16:00:00", tz = "UTC")) axis(1, at = seq(xLim[1], xLim[2], by = 60*30)[c(1,2,4,6,8,10,12,14)], labels = format(seq(xLim[1], xLim[2], by = 60*30)[c(1,2,4,6,8,10,12,14)], format = "%H:%M"), cex = 0.9) axis(2, las = 2, mgp = c(0,0.65,0)) box() ################################################### ### code chunk number 27: HAR_estimation ################################################### RV <- as.xts(SPYRM[, list(DT, RV5)]) * 10000 model <- HARmodel(data = RV, periods = c(1, 5, 22), type = "HAR", inputType = "RM") summary(model) ################################################### ### code chunk number 28: Forecasting ################################################### logReturns <- 100 * makeReturns(SPYRM$CLOSE)[-1] dataSPY <- xts(cbind(logReturns, RV = SPYRM$RV5[-1] * 10000), order.by = SPYRM$DT[-1]) model <- HEAVYmodel(data = dataSPY[, c("logReturns","RV")]) summary(model) ################################################### ### code chunk number 29: Forecasting ################################################### dataSPY$HARfcst <- NA dataSPY$HEAVYfcst <- NA for (i in 1:494) { HAREstimated <- HARmodel(data = dataSPY[i:(i + 999), "RV"], periods = c(1, 5, 22), type = "HAR", inputType = "RM") dataSPY$HARfcst[i+1000] <- predict(HAREstimated) HEAVYEstimated <- HEAVYmodel( data = dataSPY[i:(i + 999), c("logReturns", "RV")]) dataSPY$HEAVYfcst[i+1000] <- predict(HEAVYEstimated, stepsAhead = 1)[,"CondRM"] } ################################################### ### code chunk number 30: jss4586.Rnw:1007-1017 ################################################### dfSPY <- as.data.frame(dataSPY) dfSPY$dates <- as.Date(index(dataSPY)) dfSPY <- dfSPY[!is.na(dfSPY$HEAVYfcst),] plot(dfSPY$dates, dfSPY$RV, type = "l", xlab = "Date", ylab = "", col = alpha("black", 0.8) , lty = 3, las = 2) lines(dfSPY$dates, dfSPY$HARfcst, col = alpha("blue", 0.8), lty = 1) lines(dfSPY$dates, dfSPY$HEAVYfcst, col = alpha("red", 0.8), lty = 1) legend("topright", c("RV", "HAR", "HEAVY"), lty = c(3, 1, 1), col = alpha(c("black", "blue", "red"), alpha = 0.8)) ################################################### ### code chunk number 31: forecast_evaluation_1 ################################################### sqrt(mean((dataSPY$RV - dataSPY$HEAVYfcst)^2, na.rm = TRUE)) sqrt(mean((dataSPY$RV - dataSPY$HARfcst)^2, na.rm = TRUE)) ################################################### ### code chunk number 32: forecast_evaluation_2 ################################################### idx <- dataSPY$RV > quantile(dataSPY$RV[-c(1:1000)], 0.90) sqrt(mean((dataSPY$RV[idx] - dataSPY$HEAVYfcst[idx])^2, na.rm = TRUE)) sqrt(mean((dataSPY$RV[idx] - dataSPY$HARfcst[idx])^2, na.rm = TRUE))