# Run using:
# python pandas_bench.r

import pandas as pd
import time
df_pandas = pd.read_csv("df.csv", engine='pyarrow')

def test_agg_pandas(df):
    begin = time.time()
    df.groupby("grp").agg({'x': ['sum', 'count']})
    end = time.time()
    return end-begin

print('Pandas aggregation time:',
      min([test_agg_pandas(df_pandas) for i in range(100)]))

df1_pandas = pd.read_csv("df1.csv", engine='pyarrow')
df2_pandas = pd.read_csv("df2.csv", engine='pyarrow')

def test_join_pandas(df1, df2):
    begin = time.time()
    df1.merge(df2, on='x', how='inner')
    end = time.time()
    jinner = end-begin
    begin = time.time()
    df1.merge(df2, on='x', how='left')
    end = time.time()
    jleft = end-begin
    begin = time.time()
    df1.merge(df2, on='x', how='right')
    end = time.time()
    jright = end-begin
    begin = time.time()
    df1.merge(df2, on='x', how='outer')
    end = time.time()
    jouter = end-begin
    return (jinner, jleft, jright, jouter)

res_join_pd = [test_join_pandas(df1_pandas, df2_pandas) for i in range(100)]
print('Pandas innerjoin time:',
      min([v[0] for v in res_join_pd]))
print('Pandas leftjoin time:',
      min([v[1] for v in res_join_pd]))
print('Pandas rightjoin time:',
      min([v[2] for v in res_join_pd]))
print('Pandas outerjoin time:',
      min([v[3] for v in res_join_pd]))
