################################################################################
################################################################################
Section 7
################################################################################
################################################################################
The code should be run from a fresh Julia kernel and in sequence.
The versions of the packages I was using are available in
the submission materials in the Project.toml and Manifest.toml files.

A debian machine with VS Code was used to run this. The results were checked against
a windows PC with Julia running through Atom and the results were identical up to the 10th
decimal point.

As the commenting is done in the paper's main body there are few comments here.
So this should be read in conjunction with the paper.
=#

################################################################################

using Pkg
Pkg.activate(@__DIR__)
Pkg.instantiate()
Pkg.add("HighFrequencyCovariance")


using DataFrames, Dates, LinearAlgebra, StableRNGs
using HighFrequencyCovariance

dims = 4
ticks = 100000
rng = StableRNG(100)
time_period_per_unit  = Second(1)
ts_data, true_covar, true_micro_noise, true_update_rates =
      generate_random_path(dims, ticks; rng = rng,
                           time_period_per_unit  = time_period_per_unit)

################################################################################

show(ts_data)

################################################################################

assets    = get_assets(ts_data)
simple    = estimate_volatility(ts_data, assets, :simple_volatility)
two_scales= estimate_volatility(ts_data, assets, :two_scales_volatility)

################################################################################

micro_noise = estimate_microstructure_noise(ts_data, assets)

################################################################################

println(vcat(DataFrame(sort(simple)...,(:estimation=> "Simple Method")),
               DataFrame(sort(two_scales)..., (:estimation=> "Two Scales")),
               DataFrame(Dict(true_covar.labels .=> true_covar.volatility)...,
                                 (:estimation => "True Values"))))

println(vcat(DataFrame(sort(micro_noise)...,
         (:estimation => "Est. Microstructure Noise")),
         DataFrame(true_micro_noise...,
         (:estimation => "True Microstructure Noise"))))

################################################################################

simple     = estimate_covariance(ts_data, assets, :simple_covariance;
                                 regularisation = missing)
preav      = estimate_covariance(ts_data, assets, :preaveraged_covariance;
                                 regularisation = missing)
two_scales = estimate_covariance(ts_data, assets, :two_scales_covariance;
                                 regularisation = missing)
bnhls      = estimate_covariance(ts_data, assets, :bnhls_covariance;
                                 regularisation = missing)

################################################################################

show(two_scales, 4, 4)

################################################################################

valid_correlation_matrix(bnhls)


################################################################################

regularised_bnhls = regularise(bnhls, ts_data, :nearest_correlation_matrix)

################################################################################

matrices = [preav, two_scales]
weights  = [1,1]
combined = combine_covariance_matrices(matrices, weights)

################################################################################

calculate_mean_abs_distance(true_covar, combined)

calculate_mean_abs_distance(true_covar, simple)

calculate_mean_abs_distance(true_covar, preav)

calculate_mean_abs_distance(true_covar, two_scales)

################################################################################

covariance_interval = Hour(8)
covar = covariance(combined, covariance_interval, combined.labels)
covar


################################################################################
################################################################################
Section 8
################################################################################
################################################################################

using Pkg
Pkg.activate(@__DIR__)
Pkg.instantiate()
Pkg.add("HighFrequencyCovariance")


# Loading Packages and current path.
using CSV, DataFrames, Dates, Distributions, KernelDensity, Statistics
using HighFrequencyCovariance
path = joinpath(@__DIR__, "Data")

data = CSV.read(joinpath(path, "data.csv"), DataFrame)

# As we loaded from a CSV, we do some conversions of data types.
data[!,:ticker] = Symbol.(data[:,:ticker])
assets = sort(unique(data[:,:ticker]))
dateformat = DateFormat("yyyy-mm-dd HH:MM:SS.sss")
data[!,:stamp] = DateTime.(data[:,:stamp], Ref(dateformat))
data[!,:Times] = Time.(data[:,:stamp])
data[!,:Dates] = Date.(data[:,:stamp])

################################################################################

data[!,:Hours] = parse.(Int, Dates.format.(data[:,:Times], "H"))
data = transform(groupby(data, :ticker), nrow => :all_ticks)
data = transform(groupby(data, [:ticker, :Hours]), nrow => :hour_ticks)
plotdata = combine(groupby(data, [:ticker, :Hours, :all_ticks]),
                   nrow => :hour_ticks)
plotdata[!,:HourDensity] = plotdata[:,:hour_ticks] ./
                           plotdata[:,:all_ticks]

using Gadfly, Cairo, Fontconfig
plt = Gadfly.plot(plotdata, x=:Hours, y=:HourDensity, color=:ticker,
        Geom.line, Guide.xlabel("Hours in the day (UAT)"),
        Guide.ylabel("Density of ticks"),
        style(key_position = :right))
img = PDF(joinpath(path, "Figures", "kernel_density_of_ticks_over_the_day.pdf"), 30cm, 15cm)
draw(img, plt)

################################################################################

min_time = minimum(data[:,:stamp])
data[!,:SecsFromBase] = map(x -> x.value, (data[:,:stamp] .- min_time))./1000
timeperiod = Second(1)

################################################################################

data[!,:logmid] = log.(data[:,:mid])

################################################################################

ts = SortedDataFrame(data, :SecsFromBase, :ticker, :logmid, timeperiod)

################################################################################

preav_estimate     = estimate_covariance(ts, assets, :preaveraged_covariance;
                           regularisation = :default)
twoscales_estimate = estimate_covariance(ts, assets, :two_scales_covariance;
                           regularisation = :default)
spectral_estimate  = estimate_covariance(ts, assets, :spectral_covariance;
                           regularisation = :default)

################################################################################

valid_correlation_matrix(preav_estimate)
# true
valid_correlation_matrix(twoscales_estimate)
# true
valid_correlation_matrix(spectral_estimate)
# true
calculate_mean_abs_distance(preav_estimate, twoscales_estimate)
# (Correlation_error = 0.11493703, Volatility_error = 0.0)
calculate_mean_abs_distance(spectral_estimate, twoscales_estimate)
# (Correlation_error = 0.06101978, Volatility_error = 6.1e-7)
calculate_mean_abs_distance(preav_estimate, spectral_estimate)
# (Correlation_error = 0.14429151, Volatility_error = 6.1e-7)

################################################################################

covar = combine_covariance_matrices([preav_estimate, twoscales_estimate,
		spectral_estimate])

################################################################################

show(rearrange(covar,[:USDDKK, :EURUSD, :USDHKD, :GBPUSD, :AUDUSD]),4,4)
#=
Volatilities per time interval of 1 second
  :USDDKK   :EURUSD   :USDHKD   :GBPUSD   :AUDUSD
 1.463e-5  1.454e-5  8.686e-7  1.842e-5  2.228e-5

Correlations
 :___       :USDDKK    :EURUSD    :USDHKD    :GBPUSD    :AUDUSD
 :USDDKK   1.0       -0.9936    -0.0043    -0.5787    -0.6728
 :EURUSD  -0.9936     1.0        0.0123     0.5824     0.6854
 :USDHKD  -0.0043     0.0123     1.0       -0.0833    -0.0404
 :GBPUSD  -0.5787     0.5824    -0.0833     1.0        0.6518
 :AUDUSD  -0.6728     0.6854    -0.0404     0.6518     1.0
=#

################################################################################

USDDKK_correlations = get_correlation.(Ref(covar), :USDDKK,
                            setdiff(assets, [:USDDKK, :EURUSD]))
EURUSD_correlations = get_correlation.(Ref(covar), :EURUSD,
                            setdiff(assets, [:USDDKK, :EURUSD]))
Statistics.cor(USDDKK_correlations, EURUSD_correlations)
# -0.9998844432559166

################################################################################

future_returns= CSV.read(joinpath(path,"data_nextday.csv"),DataFrame)
future_returns[!, :ticker] = Symbol.(future_returns[:, :ticker])
println(first(future_returns, 4))
#=
4x4 DataFrame
 Row │ ticker  interval                    log_return  duration
     │ Symbol  String31                    Float64     String31
─────┼────────────────────────────────────────────────────────────────────────
   1 │ AUDUSD  2021-06-22 09:00:00.000000    0.000227  0 days 00:29:59.504000
   2 │ AUDUSD  2021-06-22 09:30:00.000000   -0.000147  0 days 00:29:59.685000
   3 │ AUDUSD  2021-06-22 10:00:00.000000    0.000906  0 days 00:29:59.251000
   4 │ AUDUSD  2021-06-22 10:30:00.000000    0.000233  0 days 00:29:59.518000
=#

################################################################################

future_rets_wide = unstack(future_returns, :interval, :ticker,
                                     :log_return)
future_rets_mat = Float64.(Matrix(future_rets_wide[:,covar.labels]))
future_rets_covar = simple_covariance_given_returns(future_rets_mat)
correl, sds = cov_to_cor(future_rets_covar)
vols = sds / sqrt(30*60)

# Testing volatilities
Statistics.cor(vols, covar.volatility)
# 0.8850266961604603
Statistics.mean(vols ./ covar.volatility)
# 0.9695290869008333

# Testing correlations
Statistics.mean(abs.(correl .- covar.correlation))
# 0.13124848487418406


################################################################################

simple_estimate     = estimate_covariance(ts, assets, :simple_covariance;
                           regularisation = :default)
# Testing volatilities
Statistics.cor(vols, simple_estimate.volatility)
# 0.8933722898678813
Statistics.mean(vols ./ simple_estimate.volatility)
# 0.9457959505163285

# Testing correlations
Statistics.mean(abs.(correl .- simple_estimate.correlation))
# 0.1517847273323817


################################################################################

struct Portfolio{R<:Real}
    weights::Vector{R}
    labels::Vector{Symbol}
end

################################################################################

function portfolio_variance(port::Portfolio, cov::CovarianceMatrix,
	                        duration::Dates.Period)
	cov2 = covariance(rearrange(cov, port.labels), duration)
    return transpose(port.weights) * cov2 * port.weights
end
our_portfolio = Portfolio([1,1,1], [:EURUSD, :GBPUSD, :AUDUSD])
length_of_trading_day = Hour(8)
portfolio_variance(our_portfolio, preav_estimate, length_of_trading_day)
# 6.923304662274563e-5

################################################################################

"""
    Given a CovarianceMatrix, an asset of interest and returns for assets to
    condition on, this function estimates the (univariate) distribution of the
    asset of interest after conditioning on the returns of the other assets.
"""
function get_conditional_distribution(covar::CovarianceMatrix,
			asset::Symbol,
			conditioning_assets::Vector{Symbol},
			conditioning_asset_returns::Vector{<:Real},
			data_return_interval = covar.time_period_per_unit)
	  covariance_labels = covar.labels
	  covar_matrix = covariance(covar, data_return_interval)

	  asset_index =  findall(asset .== covariance_labels)
	  conditioning_indices = map(x -> findfirst(==(x), covariance_labels),
                               conditioning_assets)

	  # Segmenting the covariance matrix.
	  sigma11 = covar_matrix[asset_index,asset_index]
	  sigma12 = covar_matrix[asset_index,conditioning_indices]
	  sigma21 = covar_matrix[conditioning_indices,asset_index]
	  sigma22 = covar_matrix[conditioning_indices,conditioning_indices]
	  mu1     = zeros(length(asset_index))
	  mu2     = zeros(length(conditioning_indices))

	  weights = sigma12 / sigma22
	  conditional_mu = mu1 + weights * (conditioning_asset_returns - mu2)
	  conditional_sigma = sigma11 -  weights * sigma21
	  dist = Normal(conditional_mu[1], sqrt(conditional_sigma[1,1]))
	  return dist, weights
end


################################################################################

asset = :USDHUF
other_assets = [:GBPUSD, :AUDUSD, :USDJPY, :USDNOK]
covar_subsetted = rearrange(covar, vcat(asset, other_assets))
# We only want the weights and they do not depend on the correlated asset
# returns we will feed in zeros for this argument.
_, weights = get_conditional_distribution(covar_subsetted, asset,
                              other_assets,
                              zeros(length(other_assets)))
hedging_portfolio = -weights


################################################################################
################################################################################
Section 9
################################################################################
################################################################################
#=
Note that this code is not included in the paper for brevity.

The code should be run from a fresh Julia kernel and in sequence.
The versions of the packages I was using are available in
the submission materials in the Project.toml and Manifest.toml files.
=#

using Pkg
Pkg.activate(@__DIR__)
Pkg.instantiate()

# Setting folder variables.
folder = @__DIR__
fpath = joinpath(folder, "SimulationData")

using Distributed

# Setting up each core.
num_cores = 8
println("Setting each core with the project $(Base.active_project())")
addprocs(num_cores, exeflags = "--project=$(Base.active_project())")

@everywhere begin
     using Pkg
     Pkg.activate(@__DIR__)
     Pkg.instantiate()
     using HighFrequencyCovariance, StableRNGs, DataFrames, Dates, Distributions, CSV
end

# Loading the monte carlo function onto each core.
@everywhere function get_convergence_errors(pathnum, dimensions;
                                      functions = [:bnhls_covariance, :two_scales_covariance, :spectral_covariance,  :simple_covariance, :preaveraged_covariance ],
                                      grd = Int.(floor.(10 .^ (2.0:(1/4):5.5)))  .* dimensions,
                                      syncronous = false,
                                      with_noise = true)
    println("Doing pathnum = ", pathnum, " at time ", Dates.now())
    ticks = maximum(grd)
    rng = StableRNG(pathnum)
    ts, true_covar, micro_noise, update_rates = generate_random_path(dimensions, ticks; syncronous = syncronous, rng = rng)
    dd = DataFrame()
    detail_vars = Dict{Symbol,Any}([:dimensions, :syncronous, :with_noise, :pathnum] .=> Any[dimensions, syncronous, with_noise, pathnum])
    for gridlen in grd
        sub_ts = subset_to_tick(ts, gridlen)
        for method in functions
            covar = estimate_covariance(sub_ts, get_assets(sub_ts), method )
            detail_vars2 = deepcopy(detail_vars)
            detail_vars2[:method] = string(method)
            detail_vars2[:number_of_paths] = gridlen
            newdf = HighFrequencyCovariance.DataFrame(covar, detail_vars2)
            dd = append!(dd, newdf)
        end
    end
    newdf = HighFrequencyCovariance.DataFrame(true_covar, detail_vars; delete_duplicate_correlations = false)
    rename!(newdf, :value => :true_value)
    select!(newdf, Not([:vol_period_units, :vol_period]))
    merged = leftjoin(dd, newdf, on = [ :asset1, :asset2, :variable, :syncronous, :pathnum, :dimensions, :with_noise])

    number_we_should_have = length(functions) * ((dimensions * (dimensions-1))/2 + dimensions) * length(grd)
    if nrow(merged) != number_we_should_have
        error(string("We did not get all the rows we should have. We should have ", number_we_should_have, " while we only have ", nrow(merged) ))
    end
    return merged
end

# This runs the get_convergence_errors function in the 4 asset case.
dims = 4
fname = joinpath(fpath,  string(dims,".csv"))
if isfile(fname) == false
    println("Currently doing the ", fname, " Monte Carlo." )
    few_dimensions = @distributed (vcat) for path = 1:100
        get_convergence_errors.(path, dims)
    end
    few_dimensions[!,:dimensions] .= string(dims, " dimensions")
    CSV.write(fname, few_dimensions)
else
    few_dimensions = CSV.read(fname, DataFrame)
end

show(few_dimensions)


# This runs the get_convergence_errors function in the 16 asset case.
dims = 16
fname = joinpath(fpath,  string(dims,".csv"))
if isfile(fname) == false
    println("Currently doing the ", fname, " Monte Carlo." )
    many_dimensions = @distributed (vcat) for path = 1:100
        get_convergence_errors.(path, dims)
    end
    many_dimensions[!,:dimensions] .= string(dims, " dimensions")
    CSV.write(fname, many_dimensions)
else
    many_dimensions = CSV.read(fname, DataFrame)
end

show(many_dimensions)


# Assembling all data together.
data = vcat(few_dimensions, many_dimensions)

# Doing some calculations and then plotting the results.
function median_ex_nans(x)
     vals = x[findall(isnan.(x) .== false)]
     return length(vals) > 0 ? median(vals) : NaN
end
data[!, :numdimensions] .= 4
data[findall(data[!, :dimensions] .== "16 dimensions"), :numdimensions] .= 16
data[findall(data[!, :dimensions] .== "4 dimensions"), :dimensions ] .= "4 assets"
data[findall(data[!, :dimensions] .== "16 dimensions"), :dimensions] .= "16 assets"
data[!, :abserror] = abs.(data[:,:value] .- data[:,:true_value])
data[!, :ticks_per_asset] = data[!, :number_of_paths] ./ data[!, :numdimensions]
bb = combine(groupby(data, [:ticks_per_asset, :dimensions, :method, :with_noise, :variable]), :abserror => median_ex_nans)
bb[!,:variable] .= Symbol.(bb[:,:variable])

using Gadfly, Cairo, Fontconfig
yvar = :abserror_median_ex_nans
deletes = intersect(intersect(findall(bb.ticks_per_asset .< 1001), findall(bb.dimensions .== "16 dimensions")),  findall(bb[:,:method] .== "spectral_covariance") )
bb = bb[ setdiff(1:nrow(bb), deletes), :]
plt = Gadfly.plot(bb, ygroup=:variable, xgroup=:dimensions, Geom.subplot_grid(layer(x=:ticks_per_asset,y = yvar, color=:method, Geom.point),
           layer(x=:ticks_per_asset,y = yvar, color=:method, Geom.line), free_y_axis =true),
           Scale.x_log10, Scale.y_log10, Guide.xlabel("Average number of updates per asset"), Guide.colorkey(title="method"),
           Guide.ylabel("Median absolute error"), style(key_position = :right))
img = PDF(joinpath(folder, "Figures", "convergence.pdf"), 30cm, 15cm)
draw(img, plt)