# Run using: RScript arrow_bench.r

library("dplyr")
library("arrow")
library("microbenchmark")

df <- read_csv_arrow("df.csv", as_data_frame = FALSE)
bench_agg <- microbenchmark(collect(df %>%
  group_by(grp) %>%
  summarise(sum(x), n())), times = 100)
cat("Arrow/dplyr aggregation time:", min(bench_agg$time)/10^9, "\n")

df1 <- read_csv_arrow("df1.csv", as_data_frame = FALSE)
df2 <- read_csv_arrow("df2.csv", as_data_frame = FALSE)
bench_innerjoin <- microbenchmark(collect(inner_join(df1, df2, on = "x")), times = 100)
cat("Arrow/dplyr innerjoin time:", min(bench_innerjoin$time)/10^9, "\n")
bench_leftjoin <- microbenchmark(collect(left_join(df1, df2, on = "x")), times = 100)
cat("Arrow/dplyr leftjoin time:", min(bench_leftjoin$time)/10^9, "\n")
bench_rightjoin <- microbenchmark(collect(right_join(df1, df2, on = "x")), times = 100)
cat("Arrow/dplyr rightjoin time:", min(bench_rightjoin$time)/10^9, "\n")
bench_outerjoin <- microbenchmark(collect(full_join(df1, df2, on = "x")), times = 100)
cat("Arrow/dplyr outerjoin time:", min(bench_outerjoin$time)/10^9, "\n")
