Table of Contents
Build Matrix¶
In [11]:
Copied!
import geopandas as gpd
import pandas as pd
import numpy as np
import os
import numpy.linalg as la
from sklearn.preprocessing import normalize
import geopandas as gpd
import pandas as pd
import numpy as np
import os
import numpy.linalg as la
from sklearn.preprocessing import normalize
In [12]:
Copied!
def pageRank(linkMatrix, d) :
n = linkMatrix.shape[0]
M = d * linkMatrix + (1-d)/n * np.ones([n, n]) # np.ones() is the J matrix, with ones for each entry.
r = 100 * np.ones(n) / n # Sets up this vector (6 entries of 1/6 × 100 each)
lastR = r
r = M @ r
i = 0
while la.norm(lastR - r) > 0.01 :
lastR = r
r = M @ r
i += 1
return r
def pageRank(linkMatrix, d) :
n = linkMatrix.shape[0]
M = d * linkMatrix + (1-d)/n * np.ones([n, n]) # np.ones() is the J matrix, with ones for each entry.
r = 100 * np.ones(n) / n # Sets up this vector (6 entries of 1/6 × 100 each)
lastR = r
r = M @ r
i = 0
while la.norm(lastR - r) > 0.01 :
lastR = r
r = M @ r
i += 1
return r
In [3]:
Copied!
grid_path = "../Data/Fishnet500/Net500m.shp"
gdf = gpd.read_file(grid_path)
gdf.head()
grid_path = "../Data/Fishnet500/Net500m.shp"
gdf = gpd.read_file(grid_path)
gdf.head()
Out[3]:
Id | Shape_Leng | Shape_Area | geometry | |
---|---|---|---|---|
0 | 0 | 0.02079 | 2.701401e-05 | POLYGON ((-97.65444 30.50675, -97.65964 30.506... |
1 | 0 | 0.00750 | 9.236718e-07 | POLYGON ((-97.57298 30.34562, -97.57648 30.344... |
2 | 0 | 0.02079 | 2.701401e-05 | POLYGON ((-97.96109 30.34043, -97.96629 30.340... |
3 | 0 | 0.02079 | 2.701400e-05 | POLYGON ((-97.94030 30.37681, -97.94550 30.376... |
4 | 0 | 0.02079 | 2.701401e-05 | POLYGON ((-97.72201 30.41319, -97.72720 30.413... |
In [4]:
Copied!
end_df_lst = os.listdir('end_pt')
end_df_lst
end_df_lst = os.listdir('end_pt')
end_df_lst
Out[4]:
['RideAustin_0_3.csv', 'RideAustin_12_15.csv', 'RideAustin_15_18.csv', 'RideAustin_18_21.csv', 'RideAustin_21_24.csv', 'RideAustin_3_6.csv', 'RideAustin_6_9.csv', 'RideAustin_9_12.csv']
In [28]:
Copied!
%%time
rows = gdf.shape[0]
output_as_point = True
pt_lst = [x.centroid for x in gdf.geometry]
gdf_new = gpd.GeoDataFrame(gdf, geometry = pt_lst)
for df in end_df_lst:
matrix = np.zeros((rows,rows))
start_df = pd.read_csv(os.path.join('start_pt',df))
end_df = pd.read_csv(os.path.join('end_pt',df))
tdf = start_df.merge(end_df, left_on = 'Pid',right_on = 'Pid')
print(f'Start df : {start_df.shape[0]}, End df : {end_df.shape[0]}, Total : {tdf.shape[0]}')
try:
for row in tdf.iloc:
matrix[row.Gid_y][row.Gid_x] +=1
matrix_norm = normalize(matrix, axis=0, norm='l1')
r = pageRank(matrix_norm,0.9)
if output_as_point:
gdf_new['Rank'] = r
fn = os.path.join('../Data/Output',str(df).replace('.csv','_pt.shp'))
gdf_new.to_file(fn)
else:
gdf['Rank'] = r
fn = os.path.join('../Data/Output',str(df).replace('.csv','.shp'))
gdf.to_file(fn)
print(f'Complete {fn}')
except Exception as e:
print(e)
%%time
rows = gdf.shape[0]
output_as_point = True
pt_lst = [x.centroid for x in gdf.geometry]
gdf_new = gpd.GeoDataFrame(gdf, geometry = pt_lst)
for df in end_df_lst:
matrix = np.zeros((rows,rows))
start_df = pd.read_csv(os.path.join('start_pt',df))
end_df = pd.read_csv(os.path.join('end_pt',df))
tdf = start_df.merge(end_df, left_on = 'Pid',right_on = 'Pid')
print(f'Start df : {start_df.shape[0]}, End df : {end_df.shape[0]}, Total : {tdf.shape[0]}')
try:
for row in tdf.iloc:
matrix[row.Gid_y][row.Gid_x] +=1
matrix_norm = normalize(matrix, axis=0, norm='l1')
r = pageRank(matrix_norm,0.9)
if output_as_point:
gdf_new['Rank'] = r
fn = os.path.join('../Data/Output',str(df).replace('.csv','_pt.shp'))
gdf_new.to_file(fn)
else:
gdf['Rank'] = r
fn = os.path.join('../Data/Output',str(df).replace('.csv','.shp'))
gdf.to_file(fn)
print(f'Complete {fn}')
except Exception as e:
print(e)
Start df : 95296, End df : 95296, Total : 95296 Complete ../Data/Output\RideAustin_0_3_pt.shp Start df : 25794, End df : 25794, Total : 25794 Complete ../Data/Output\RideAustin_12_15_pt.shp Start df : 45121, End df : 35139, Total : 35138 only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boolean arrays are valid indices Start df : 49425, End df : 49425, Total : 49425 Complete ../Data/Output\RideAustin_18_21_pt.shp Start df : 60570, End df : 60570, Total : 60570 Complete ../Data/Output\RideAustin_21_24_pt.shp Start df : 100151, End df : 100151, Total : 100151 Complete ../Data/Output\RideAustin_3_6_pt.shp Start df : 71477, End df : 77979, Total : 71477 Complete ../Data/Output\RideAustin_6_9_pt.shp Start df : 18060, End df : 18060, Total : 18060 Complete ../Data/Output\RideAustin_9_12_pt.shp Wall time: 41 s
In [6]:
Copied!
tdf.head()
tdf.head()
Out[6]:
Pid | Gid_x | Gid_y | |
---|---|---|---|
0 | 7941 | 5541 | 5541 |
1 | 7942 | 2238 | 4559 |
2 | 7943 | 1238 | 4947 |
3 | 7944 | 4350 | 2479 |
4 | 7945 | 1238 | 4940 |
In [13]:
Copied!
matrix_norm = normalize(matrix, axis=0, norm='l1')
matrix_norm
matrix_norm = normalize(matrix, axis=0, norm='l1')
matrix_norm
Out[13]:
array([[0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], ..., [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.], [0., 0., 0., ..., 0., 0., 0.]])
In [14]:
Copied!
r = pageRank(matrix_norm,0.9)
r
r = pageRank(matrix_norm,0.9)
r
Out[14]:
array([1.15521465e-05, 1.15521465e-05, 1.15521465e-05, ..., 1.15521465e-05, 1.15521465e-05, 1.15521465e-05])
In [ ]:
Copied!
gdf['Rank'] = r
gdf.head()
gdf['Rank'] = r
gdf.head()
In [ ]:
Copied!
gdf.to_file('../Data/Output/RideAustin_6_8.shp')
gdf.to_file('../Data/Output/RideAustin_6_8.shp')
In [ ]:
Copied!