In [1]:
Copied!
import geopandas as gpd
import pandas as pd
import geopandas as gpd
import pandas as pd
In [2]:
Copied!
csv_name = '../Data/RideAustin_Weather.csv'
df = pd.read_csv(csv_name, encoding= 'unicode_escape')
df.head()
csv_name = '../Data/RideAustin_Weather.csv'
df = pd.read_csv(csv_name, encoding= 'unicode_escape')
df.head()
E:\Anaconda\envs\gpd\lib\site-packages\IPython\core\interactiveshell.py:3146: DtypeWarning: Columns (1,7) have mixed types.Specify dtype option on import or set low_memory=False. has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
Out[2]:
completed_on | distance_travelled | end_location_lat | end_location_long | started_on | driver_rating | rider_rating | start_zip_code | end_zip_code | charity_id | ... | 11/24/2016 8:35 | 9879 | 30.31 | -97.74 | 11/24/2016 8:21 | 5 | 5.1 | 78701 | 78756 | Thunder | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 11/14/2016 19:16 | 2300 | 30.26 | -97.76 | 11/14/2016 19:11 | 5.0 | 5.0 | 78701 | 78704 | NaN | ... | 4.955556 | 11/14/2016 | 0.0 | 82.0 | 56.0 | 2.0 | 8.9 | 1.0 | 1.0 | 0.0 |
1 | 11/14/2016 19:17 | 2977 | 30.29 | -97.73 | 11/14/2016 19:08 | 5.0 | 5.0 | 78701 | 78712 | NaN | ... | 4.955556 | 11/14/2016 | 0.0 | 82.0 | 56.0 | 2.0 | 8.9 | 1.0 | 1.0 | 0.0 |
2 | 11/14/2016 19:19 | 1298 | 30.27 | -97.75 | 11/14/2016 19:13 | 5.0 | 5.0 | 78701 | 78703 | 5.0 | ... | 5.000000 | 11/14/2016 | 0.0 | 82.0 | 56.0 | 2.0 | 8.9 | 1.0 | 1.0 | 0.0 |
3 | 11/14/2016 19:19 | 3442 | 30.26 | -97.76 | 11/14/2016 19:13 | 5.0 | 5.0 | 78704 | 78704 | NaN | ... | 5.000000 | 11/14/2016 | 0.0 | 82.0 | 56.0 | 2.0 | 8.9 | 1.0 | 1.0 | 0.0 |
4 | 11/14/2016 19:21 | 6635 | 30.32 | -97.71 | 11/14/2016 19:12 | 5.0 | 5.0 | 78701 | 78723 | NaN | ... | 5.000000 | 11/14/2016 | 0.0 | 82.0 | 56.0 | 2.0 | 8.9 | 1.0 | 1.0 | 0.0 |
5 rows × 29 columns
Transfer to shapefile¶
In [3]:
Copied!
%%time
from shapely.geometry import Point
import fiona
start_loc = [Point(xy) for xy in zip(df.start_location_long,df.start_location_lat)]
end_loc = [Point(xy) for xy in zip(df.end_location_long,df.end_location_lat)]
%%time
from shapely.geometry import Point
import fiona
start_loc = [Point(xy) for xy in zip(df.start_location_long,df.start_location_lat)]
end_loc = [Point(xy) for xy in zip(df.end_location_long,df.end_location_lat)]
Wall time: 5.12 s
Only deal with travel within austin¶
In [4]:
Copied!
Shapefile_path = '../Data/austin/Austin2018.shp'
austin = gpd.read_file(Shapefile_path)
Shapefile_path = '../Data/austin/Austin2018.shp'
austin = gpd.read_file(Shapefile_path)
In [5]:
Copied!
austin.plot()
austin.plot()
Out[5]:
<AxesSubplot:>
In [6]:
Copied!
%%time
remove_id = []
for index,pt in enumerate(start_loc):
if not austin.contains(pt)[0]:
remove_id.append(index)
for index,pt in enumerate(end_loc):
if not austin.contains(pt)[0]:
remove_id.append(index)
remove_id = list(set(remove_id))
len(remove_id)
%%time
remove_id = []
for index,pt in enumerate(start_loc):
if not austin.contains(pt)[0]:
remove_id.append(index)
for index,pt in enumerate(end_loc):
if not austin.contains(pt)[0]:
remove_id.append(index)
remove_id = list(set(remove_id))
len(remove_id)
Wall time: 29min 35s
Out[6]:
8739
In [9]:
Copied!
df_new = df.drop(remove_id)
df_new.to_csv('../Data/RideAustin_Weather_new.csv')
df_new = df.drop(remove_id)
df_new.to_csv('../Data/RideAustin_Weather_new.csv')