Skip to article frontmatterSkip to article content
import os
import pandas as pd
import geopandas as gpd
os.listdir('../resources')
df = pd.read_csv('../resources/origin_destination_bus_202005.csv.xz', index_col=0)
df.head()
df2 = df[df['DAY_TYPE']=='WEEKDAY']
df2 = df2[df2['TIME_PER_HOUR']>=7]
df2 = df2[df2['TIME_PER_HOUR']<9]
df2 = df2.groupby(['ORIGIN_PT_CODE', 'DESTINATION_PT_CODE'])[['TOTAL_TRIPS']].sum().reset_index()
df2 = df2.rename(columns={'ORIGIN_PT_CODE': 'origin', 'DESTINATION_PT_CODE': 'dest', 'TOTAL_TRIPS': 'count'})
df2.head()
gdf = gpd.read_file('../resources/bus_stop_loation.zip')
gdf = gdf.to_crs('epsg:4326')
gdf.plot()
gdf.head()
gdf.columns
gdf['name'] = gdf['desc']
gdf['id'] = gdf['PTcode']
location_df = gdf[['id', 'name', 'lat', 'lon']].copy()
location_df.head()
df2.to_csv('../resources/flowmap_flow_202005.csv')
location_df.to_csv('../resources/flowmap_location_202005.csv')
len(location_df['id'].unique())
len(location_df)
temp = location_df.groupby('id')[['name']].count().reset_index()
temp[temp['name']>1]
location_df[location_df['id']=='52371']
location_df = location_df.drop(938)
len(location_df)
location_df.to_csv('../resources/flowmap_location_202005.csv')