################################################################################ ################################################################################ Section 7 ################################################################################ ################################################################################ The code should be run from a fresh Julia kernel and in sequence. The versions of the packages I was using are available in the submission materials in the Project.toml and Manifest.toml files. A debian machine with VS Code was used to run this. The results were checked against a windows PC with Julia running through Atom and the results were identical up to the 10th decimal point. As the commenting is done in the paper's main body there are few comments here. So this should be read in conjunction with the paper. =# ################################################################################ using Pkg Pkg.activate(@__DIR__) Pkg.instantiate() Pkg.add("HighFrequencyCovariance") using DataFrames, Dates, LinearAlgebra, StableRNGs using HighFrequencyCovariance dims = 4 ticks = 100000 rng = StableRNG(100) time_period_per_unit = Second(1) ts_data, true_covar, true_micro_noise, true_update_rates = generate_random_path(dims, ticks; rng = rng, time_period_per_unit = time_period_per_unit) ################################################################################ show(ts_data) ################################################################################ assets = get_assets(ts_data) simple = estimate_volatility(ts_data, assets, :simple_volatility) two_scales= estimate_volatility(ts_data, assets, :two_scales_volatility) ################################################################################ micro_noise = estimate_microstructure_noise(ts_data, assets) ################################################################################ println(vcat(DataFrame(sort(simple)...,(:estimation=> "Simple Method")), DataFrame(sort(two_scales)..., (:estimation=> "Two Scales")), DataFrame(Dict(true_covar.labels .=> true_covar.volatility)..., (:estimation => "True Values")))) println(vcat(DataFrame(sort(micro_noise)..., (:estimation => "Est. Microstructure Noise")), DataFrame(true_micro_noise..., (:estimation => "True Microstructure Noise")))) ################################################################################ simple = estimate_covariance(ts_data, assets, :simple_covariance; regularisation = missing) preav = estimate_covariance(ts_data, assets, :preaveraged_covariance; regularisation = missing) two_scales = estimate_covariance(ts_data, assets, :two_scales_covariance; regularisation = missing) bnhls = estimate_covariance(ts_data, assets, :bnhls_covariance; regularisation = missing) ################################################################################ show(two_scales, 4, 4) ################################################################################ valid_correlation_matrix(bnhls) ################################################################################ regularised_bnhls = regularise(bnhls, ts_data, :nearest_correlation_matrix) ################################################################################ matrices = [preav, two_scales] weights = [1,1] combined = combine_covariance_matrices(matrices, weights) ################################################################################ calculate_mean_abs_distance(true_covar, combined) calculate_mean_abs_distance(true_covar, simple) calculate_mean_abs_distance(true_covar, preav) calculate_mean_abs_distance(true_covar, two_scales) ################################################################################ covariance_interval = Hour(8) covar = covariance(combined, covariance_interval, combined.labels) covar ################################################################################ ################################################################################ Section 8 ################################################################################ ################################################################################ using Pkg Pkg.activate(@__DIR__) Pkg.instantiate() Pkg.add("HighFrequencyCovariance") # Loading Packages and current path. using CSV, DataFrames, Dates, Distributions, KernelDensity, Statistics using HighFrequencyCovariance path = joinpath(@__DIR__, "Data") data = CSV.read(joinpath(path, "data.csv"), DataFrame) # As we loaded from a CSV, we do some conversions of data types. data[!,:ticker] = Symbol.(data[:,:ticker]) assets = sort(unique(data[:,:ticker])) dateformat = DateFormat("yyyy-mm-dd HH:MM:SS.sss") data[!,:stamp] = DateTime.(data[:,:stamp], Ref(dateformat)) data[!,:Times] = Time.(data[:,:stamp]) data[!,:Dates] = Date.(data[:,:stamp]) ################################################################################ data[!,:Hours] = parse.(Int, Dates.format.(data[:,:Times], "H")) data = transform(groupby(data, :ticker), nrow => :all_ticks) data = transform(groupby(data, [:ticker, :Hours]), nrow => :hour_ticks) plotdata = combine(groupby(data, [:ticker, :Hours, :all_ticks]), nrow => :hour_ticks) plotdata[!,:HourDensity] = plotdata[:,:hour_ticks] ./ plotdata[:,:all_ticks] using Gadfly, Cairo, Fontconfig plt = Gadfly.plot(plotdata, x=:Hours, y=:HourDensity, color=:ticker, Geom.line, Guide.xlabel("Hours in the day (UAT)"), Guide.ylabel("Density of ticks"), style(key_position = :right)) img = PDF(joinpath(path, "Figures", "kernel_density_of_ticks_over_the_day.pdf"), 30cm, 15cm) draw(img, plt) ################################################################################ min_time = minimum(data[:,:stamp]) data[!,:SecsFromBase] = map(x -> x.value, (data[:,:stamp] .- min_time))./1000 timeperiod = Second(1) ################################################################################ data[!,:logmid] = log.(data[:,:mid]) ################################################################################ ts = SortedDataFrame(data, :SecsFromBase, :ticker, :logmid, timeperiod) ################################################################################ preav_estimate = estimate_covariance(ts, assets, :preaveraged_covariance; regularisation = :default) twoscales_estimate = estimate_covariance(ts, assets, :two_scales_covariance; regularisation = :default) spectral_estimate = estimate_covariance(ts, assets, :spectral_covariance; regularisation = :default) ################################################################################ valid_correlation_matrix(preav_estimate) # true valid_correlation_matrix(twoscales_estimate) # true valid_correlation_matrix(spectral_estimate) # true calculate_mean_abs_distance(preav_estimate, twoscales_estimate) # (Correlation_error = 0.11493703, Volatility_error = 0.0) calculate_mean_abs_distance(spectral_estimate, twoscales_estimate) # (Correlation_error = 0.06101978, Volatility_error = 6.1e-7) calculate_mean_abs_distance(preav_estimate, spectral_estimate) # (Correlation_error = 0.14429151, Volatility_error = 6.1e-7) ################################################################################ covar = combine_covariance_matrices([preav_estimate, twoscales_estimate, spectral_estimate]) ################################################################################ show(rearrange(covar,[:USDDKK, :EURUSD, :USDHKD, :GBPUSD, :AUDUSD]),4,4) #= Volatilities per time interval of 1 second :USDDKK :EURUSD :USDHKD :GBPUSD :AUDUSD 1.463e-5 1.454e-5 8.686e-7 1.842e-5 2.228e-5 Correlations :___ :USDDKK :EURUSD :USDHKD :GBPUSD :AUDUSD :USDDKK 1.0 -0.9936 -0.0043 -0.5787 -0.6728 :EURUSD -0.9936 1.0 0.0123 0.5824 0.6854 :USDHKD -0.0043 0.0123 1.0 -0.0833 -0.0404 :GBPUSD -0.5787 0.5824 -0.0833 1.0 0.6518 :AUDUSD -0.6728 0.6854 -0.0404 0.6518 1.0 =# ################################################################################ USDDKK_correlations = get_correlation.(Ref(covar), :USDDKK, setdiff(assets, [:USDDKK, :EURUSD])) EURUSD_correlations = get_correlation.(Ref(covar), :EURUSD, setdiff(assets, [:USDDKK, :EURUSD])) Statistics.cor(USDDKK_correlations, EURUSD_correlations) # -0.9998844432559166 ################################################################################ future_returns= CSV.read(joinpath(path,"data_nextday.csv"),DataFrame) future_returns[!, :ticker] = Symbol.(future_returns[:, :ticker]) println(first(future_returns, 4)) #= 4x4 DataFrame Row │ ticker interval log_return duration │ Symbol String31 Float64 String31 ─────┼──────────────────────────────────────────────────────────────────────── 1 │ AUDUSD 2021-06-22 09:00:00.000000 0.000227 0 days 00:29:59.504000 2 │ AUDUSD 2021-06-22 09:30:00.000000 -0.000147 0 days 00:29:59.685000 3 │ AUDUSD 2021-06-22 10:00:00.000000 0.000906 0 days 00:29:59.251000 4 │ AUDUSD 2021-06-22 10:30:00.000000 0.000233 0 days 00:29:59.518000 =# ################################################################################ future_rets_wide = unstack(future_returns, :interval, :ticker, :log_return) future_rets_mat = Float64.(Matrix(future_rets_wide[:,covar.labels])) future_rets_covar = simple_covariance_given_returns(future_rets_mat) correl, sds = cov_to_cor(future_rets_covar) vols = sds / sqrt(30*60) # Testing volatilities Statistics.cor(vols, covar.volatility) # 0.8850266961604603 Statistics.mean(vols ./ covar.volatility) # 0.9695290869008333 # Testing correlations Statistics.mean(abs.(correl .- covar.correlation)) # 0.13124848487418406 ################################################################################ simple_estimate = estimate_covariance(ts, assets, :simple_covariance; regularisation = :default) # Testing volatilities Statistics.cor(vols, simple_estimate.volatility) # 0.8933722898678813 Statistics.mean(vols ./ simple_estimate.volatility) # 0.9457959505163285 # Testing correlations Statistics.mean(abs.(correl .- simple_estimate.correlation)) # 0.1517847273323817 ################################################################################ struct Portfolio{R<:Real} weights::Vector{R} labels::Vector{Symbol} end ################################################################################ function portfolio_variance(port::Portfolio, cov::CovarianceMatrix, duration::Dates.Period) cov2 = covariance(rearrange(cov, port.labels), duration) return transpose(port.weights) * cov2 * port.weights end our_portfolio = Portfolio([1,1,1], [:EURUSD, :GBPUSD, :AUDUSD]) length_of_trading_day = Hour(8) portfolio_variance(our_portfolio, preav_estimate, length_of_trading_day) # 6.923304662274563e-5 ################################################################################ """ Given a CovarianceMatrix, an asset of interest and returns for assets to condition on, this function estimates the (univariate) distribution of the asset of interest after conditioning on the returns of the other assets. """ function get_conditional_distribution(covar::CovarianceMatrix, asset::Symbol, conditioning_assets::Vector{Symbol}, conditioning_asset_returns::Vector{<:Real}, data_return_interval = covar.time_period_per_unit) covariance_labels = covar.labels covar_matrix = covariance(covar, data_return_interval) asset_index = findall(asset .== covariance_labels) conditioning_indices = map(x -> findfirst(==(x), covariance_labels), conditioning_assets) # Segmenting the covariance matrix. sigma11 = covar_matrix[asset_index,asset_index] sigma12 = covar_matrix[asset_index,conditioning_indices] sigma21 = covar_matrix[conditioning_indices,asset_index] sigma22 = covar_matrix[conditioning_indices,conditioning_indices] mu1 = zeros(length(asset_index)) mu2 = zeros(length(conditioning_indices)) weights = sigma12 / sigma22 conditional_mu = mu1 + weights * (conditioning_asset_returns - mu2) conditional_sigma = sigma11 - weights * sigma21 dist = Normal(conditional_mu[1], sqrt(conditional_sigma[1,1])) return dist, weights end ################################################################################ asset = :USDHUF other_assets = [:GBPUSD, :AUDUSD, :USDJPY, :USDNOK] covar_subsetted = rearrange(covar, vcat(asset, other_assets)) # We only want the weights and they do not depend on the correlated asset # returns we will feed in zeros for this argument. _, weights = get_conditional_distribution(covar_subsetted, asset, other_assets, zeros(length(other_assets))) hedging_portfolio = -weights ################################################################################ ################################################################################ Section 9 ################################################################################ ################################################################################ #= Note that this code is not included in the paper for brevity. The code should be run from a fresh Julia kernel and in sequence. The versions of the packages I was using are available in the submission materials in the Project.toml and Manifest.toml files. =# using Pkg Pkg.activate(@__DIR__) Pkg.instantiate() # Setting folder variables. folder = @__DIR__ fpath = joinpath(folder, "SimulationData") using Distributed # Setting up each core. num_cores = 8 println("Setting each core with the project $(Base.active_project())") addprocs(num_cores, exeflags = "--project=$(Base.active_project())") @everywhere begin using Pkg Pkg.activate(@__DIR__) Pkg.instantiate() using HighFrequencyCovariance, StableRNGs, DataFrames, Dates, Distributions, CSV end # Loading the monte carlo function onto each core. @everywhere function get_convergence_errors(pathnum, dimensions; functions = [:bnhls_covariance, :two_scales_covariance, :spectral_covariance, :simple_covariance, :preaveraged_covariance ], grd = Int.(floor.(10 .^ (2.0:(1/4):5.5))) .* dimensions, syncronous = false, with_noise = true) println("Doing pathnum = ", pathnum, " at time ", Dates.now()) ticks = maximum(grd) rng = StableRNG(pathnum) ts, true_covar, micro_noise, update_rates = generate_random_path(dimensions, ticks; syncronous = syncronous, rng = rng) dd = DataFrame() detail_vars = Dict{Symbol,Any}([:dimensions, :syncronous, :with_noise, :pathnum] .=> Any[dimensions, syncronous, with_noise, pathnum]) for gridlen in grd sub_ts = subset_to_tick(ts, gridlen) for method in functions covar = estimate_covariance(sub_ts, get_assets(sub_ts), method ) detail_vars2 = deepcopy(detail_vars) detail_vars2[:method] = string(method) detail_vars2[:number_of_paths] = gridlen newdf = HighFrequencyCovariance.DataFrame(covar, detail_vars2) dd = append!(dd, newdf) end end newdf = HighFrequencyCovariance.DataFrame(true_covar, detail_vars; delete_duplicate_correlations = false) rename!(newdf, :value => :true_value) select!(newdf, Not([:vol_period_units, :vol_period])) merged = leftjoin(dd, newdf, on = [ :asset1, :asset2, :variable, :syncronous, :pathnum, :dimensions, :with_noise]) number_we_should_have = length(functions) * ((dimensions * (dimensions-1))/2 + dimensions) * length(grd) if nrow(merged) != number_we_should_have error(string("We did not get all the rows we should have. We should have ", number_we_should_have, " while we only have ", nrow(merged) )) end return merged end # This runs the get_convergence_errors function in the 4 asset case. dims = 4 fname = joinpath(fpath, string(dims,".csv")) if isfile(fname) == false println("Currently doing the ", fname, " Monte Carlo." ) few_dimensions = @distributed (vcat) for path = 1:100 get_convergence_errors.(path, dims) end few_dimensions[!,:dimensions] .= string(dims, " dimensions") CSV.write(fname, few_dimensions) else few_dimensions = CSV.read(fname, DataFrame) end show(few_dimensions) # This runs the get_convergence_errors function in the 16 asset case. dims = 16 fname = joinpath(fpath, string(dims,".csv")) if isfile(fname) == false println("Currently doing the ", fname, " Monte Carlo." ) many_dimensions = @distributed (vcat) for path = 1:100 get_convergence_errors.(path, dims) end many_dimensions[!,:dimensions] .= string(dims, " dimensions") CSV.write(fname, many_dimensions) else many_dimensions = CSV.read(fname, DataFrame) end show(many_dimensions) # Assembling all data together. data = vcat(few_dimensions, many_dimensions) # Doing some calculations and then plotting the results. function median_ex_nans(x) vals = x[findall(isnan.(x) .== false)] return length(vals) > 0 ? median(vals) : NaN end data[!, :numdimensions] .= 4 data[findall(data[!, :dimensions] .== "16 dimensions"), :numdimensions] .= 16 data[findall(data[!, :dimensions] .== "4 dimensions"), :dimensions ] .= "4 assets" data[findall(data[!, :dimensions] .== "16 dimensions"), :dimensions] .= "16 assets" data[!, :abserror] = abs.(data[:,:value] .- data[:,:true_value]) data[!, :ticks_per_asset] = data[!, :number_of_paths] ./ data[!, :numdimensions] bb = combine(groupby(data, [:ticks_per_asset, :dimensions, :method, :with_noise, :variable]), :abserror => median_ex_nans) bb[!,:variable] .= Symbol.(bb[:,:variable]) using Gadfly, Cairo, Fontconfig yvar = :abserror_median_ex_nans deletes = intersect(intersect(findall(bb.ticks_per_asset .< 1001), findall(bb.dimensions .== "16 dimensions")), findall(bb[:,:method] .== "spectral_covariance") ) bb = bb[ setdiff(1:nrow(bb), deletes), :] plt = Gadfly.plot(bb, ygroup=:variable, xgroup=:dimensions, Geom.subplot_grid(layer(x=:ticks_per_asset,y = yvar, color=:method, Geom.point), layer(x=:ticks_per_asset,y = yvar, color=:method, Geom.line), free_y_axis =true), Scale.x_log10, Scale.y_log10, Guide.xlabel("Average number of updates per asset"), Guide.colorkey(title="method"), Guide.ylabel("Median absolute error"), style(key_position = :right)) img = PDF(joinpath(folder, "Figures", "convergence.pdf"), 30cm, 15cm) draw(img, plt)