# run using:
# julia --project=. -e "using Pkg; Pkg.instantiate(); Pkg.resolve()"
# julia -t auto --project julia_bench.jl

using CSV, DataFrames, Random

Random.seed!(1234)
n = 50_000_000
k = 500_000
df = DataFrame(x=rand(n), grp=rand(1:k, n))
CSV.write("df.csv", df)
df1 = DataFrame(x = shuffle(1:n-1), y1 = randn(n - 1))
df2 = DataFrame(x = shuffle(2:n), y2 = randn(n - 1))
CSV.write("df1.csv", df1)
CSV.write("df2.csv", df2)

println("Julia aggregation time: ",
        minimum(@elapsed combine(groupby(df, :grp), :x => sum, nrow) for _ in 1:100))

println("Julia innerjoin time: ",
        minimum(@elapsed innerjoin(df1, df2, on = :x) for _ in 1:100))
println("Julia leftjoin time: ",
        minimum(@elapsed leftjoin(df1, df2, on = :x) for _ in 1:100))
println("Julia rightjoin time: ",
        minimum(@elapsed rightjoin(df1, df2, on = :x) for _ in 1:100))
println("Julia outerjoin time: ",
        minimum(@elapsed outerjoin(df1, df2, on = :x) for _ in 1:100))

