#!/usr/bin/env python3
# coding: utf-8

## 2. Data structures
### 2.1. Trajectory

import skmob
skmob.core.trajectorydataframe.np.random.seed(0)

tdf = skmob.TrajDataFrame.from_file('geolife_sample.txt.gz')

print(tdf.crs)
print(tdf.parameters)
print(tdf.head())

### 2.2. Flows

import skmob
import geopandas as gpd

tessellation = gpd.GeoDataFrame.from_file('NY_counties_2011.geojson')
fdf = skmob.FlowDataFrame.from_file('NY_commuting_flows_2011.csv',
                                    tessellation = tessellation, tile_id = 'tile_id')

print(tessellation.head())
print(fdf.head())

## 3. Trajectory preprocessing
### 3.1. Noise filtering

import skmob
from skmob import preprocessing

tdf = skmob.TrajDataFrame.from_file('geolife_sample.txt.gz')
print('Number of points in tdf: %d\n' %len(tdf))
print(tdf.head())

ftdf = preprocessing.filtering.filter(tdf, max_speed_kmh = 10.)
print('Number of points in ftdf: %d' %len(ftdf))
print('Number of filtered points: %d\n' %(len(tdf) - len(ftdf)))
print(ftdf.head())

### 3.2. Stop detection

from skmob.preprocessing import detection
stdf = detection.stay_locations(tdf, stop_radius_factor = 0.5,
                       minutes_for_a_stop = 20.0, spatial_radius_km = 0.2, leaving_time = True)
print(stdf.head())

### 3.3. Trajectory compression

from skmob.preprocessing import compression
print(ftdf.head())

ctdf = compression.compress(ftdf, spatial_radius_km = 0.2)
print(ctdf.head())

### 3.3. Trajectory compression

from skmob.preprocessing import compression
print(ftdf.head())

ctdf = compression.compress(ftdf, spatial_radius_km = 0.2)
print(ctdf.head())

## 4. Plotting
### 4.1. Visualizing Trajectories
#### Plot trajectories

import skmob
tdf = skmob.TrajDataFrame.from_file('geolife_sample.txt.gz')
map_f = tdf.plot_trajectory(max_users = 1, hex_color = '#000000', start_end_markers = False)
map_f
map_f.save("figure-2.html")

#### Plot stops

from skmob.preprocessing import detection, clustering
tdf = skmob.TrajDataFrame.from_file('geolife_sample.txt.gz')
stdf = detection.stay_locations(tdf)
cstdf = clustering.cluster(stdf)
map_stops = cstdf.plot_stops(max_users = 1, map_f = map_f)
map_stops
map_stops.save("figure-3.html")

#### Plot diary

plot_diary = cstdf.plot_diary(1, legend = True)
plot_diary.figure.savefig("figure-4.pdf",bbox_inches='tight')

ax = cstdf.plot_diary(1)
ax.figure.savefig("figure-5-1.pdf",bbox_inches='tight')
ax = cstdf.plot_diary(5, legend = True)
ax.figure.savefig("figure-5-2.pdf",bbox_inches='tight')

### 4.2. Visualizing flows
#### Plot tessellation

import geopandas as gpd
from skmob import FlowDataFrame
tessellation = gpd.GeoDataFrame.from_file('NY_counties_2011.geojson')
fdf = FlowDataFrame.from_file('NY_commuting_flows_2011.csv',
                              tessellation = tessellation, tile_id  =  'tile_id')
map_tess = fdf.plot_tessellation(popup_features = ['tile_id', 'population'], style_func_args = {'fillColor': 'red', 'color':'red'})
map_tess.save("figure-6.html")

#### Plot flows

map_flows = fdf.plot_flows(min_flow = 50)
map_flows
map_flows.save("figure-7.html")

map_f = fdf.plot_tessellation(popup_features = ['tile_id', 'population'], style_func_args = {'fillColor': 'red', 'color':'red'})
map_flows = fdf.plot_flows(map_f = map_f, min_flow = 50)
map_flows
map_flows.save("figure-8.html")

## 5. Mobility measures

import skmob
from skmob.measures.individual import jump_lengths, radius_of_gyration

url = 'https://snap.stanford.edu/data/loc-brightkite_totalCheckins.txt.gz'
import pandas as pd
df = pd.read_csv(url, sep  =  '\t', header = 0, nrows = 100000,
                 names = ['user', 'check-in_time',
                          'latitude', 'longitude',
                          'location id'])
tdf = skmob.TrajDataFrame(df, latitude = 'latitude',
                          longitude = 'longitude',
                          datetime = 'check-in_time',
                          user_id = 'user').sort_values(by = 'datetime')
print(tdf.head())

jl_df = jump_lengths(tdf)
rg_df = radius_of_gyration(tdf)

print(jl_df.head())
print(rg_df.head())

from skmob.measures.collective import visits_per_location

vpl_df = visits_per_location(tdf)
print(vpl_df.head())

## 6. Individual Generative Algorithms

import skmob
import pandas as pd
import geopandas as gpd
from skmob.models.epr import DensityEPR

tessellation = gpd.GeoDataFrame.from_file('NY_counties_2011.geojson')
start_time = pd.to_datetime('2019/01/01 08:00:00')
end_time = pd.to_datetime('2019/01/14 08:00:00')

depr = DensityEPR()
tdf = depr.generate(start_time, end_time, tessellation, n_agents = 1000,
                    relevance_column = 'population', show_progress = True, random_state = 42)
print(tdf.head())

## 7. Collective Generative Algorithms

import skmob
import geopandas as gpd
tessellation = gpd.GeoDataFrame.from_file('NY_counties_2011.geojson')
print(tessellation.head())

fdf = skmob.FlowDataFrame.from_file('NY_commuting_flows_2011.csv',
                                    tessellation = tessellation, tile_id = 'tile_id')
print(fdf.head())

from skmob.models.gravity import Gravity
gravity = Gravity(gravity_type = 'singly constrained')
print(gravity)

gravity.fit(fdf, relevance_column = 'population')
print(gravity)

fdf_fitted = gravity.generate(tessellation, relevance_column = 'population',
                              out_format = 'probabilities', tile_id_column = 'tile_id')
print(fdf_fitted.head())

## 8. Privacy Risk Assessment

import skmob
from skmob.privacy import attacks
at = attacks.LocationAttack(knowledge_length = 2)

tdf = skmob.TrajDataFrame.from_file(filename = 'privacy_toy.csv')
tdf_risk = at.assess_risk(tdf)
print(tdf_risk.head())

tdf_risk = at.assess_risk(tdf, targets = [1, 2])
print(tdf_risk)

tdf_risk = at.assess_risk(tdf, targets = [2], force_instances = True)
print(tdf_risk)
