POI and roads data source: Overture Maps, url: https://
data download with:
for poi:
overturemaps download --bbox=102.1401,1.716,103.5922,2.9272 -f geoparquet --type=place -o segamat.parquetfor roads:
overturemaps download --bbox=102.1401,1.716,103.5922,2.9272 -f geoparquet --type=segment -o segamat_road.parquet
Earthquake data source: http://
Download the util script file here: utils.py
## basic utilities
import os
import random
## data processing and calculations
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point # for generating Point object to create geodataframe
from sklearn.neighbors import KernelDensity # for calculating KDE
## plot related
import matplotlib.pyplot as plt
from matplotlib import cm # for color bar (in 3D)
import seaborn as sns
from matplotlib_scalebar.scalebar import ScaleBar # https://pypi.org/project/matplotlib-scalebar/
## our own scripts
import utils as utilA demo on how KDE can be done in Python¶
x = [random.random() for _ in range(20)]
y = [random.random() for _ in range(20)]
xy = np.array(list(zip(x, y)))
xygrid = util.gen_grid(0., 0., 1., 1., ncol=50, nrow=50, force_square=True, crs=None)
cent = grid.centroid
grid['x'] = cent.x
grid['y'] = cent.y
print(grid.head())
grid.plot(fc='none', ec='k')
grid_data = grid[['x', 'y']]
grid_data = np.array(grid_data)
grid_data, grid_data.shapekde_demo = KernelDensity(kernel='exponential', bandwidth=.125).fit(xy)
kde_demoZ_demo = np.exp(kde_demo.score_samples(grid_data))
grid['demo'] = Z_demo
fig, ax = plt.subplots(figsize=(5,5))
grid.plot('demo', cmap='Blues', ax=ax)
ax.scatter(x, y, c='r', marker='.', alpha=.2)
#ax.set_xlim([-0.05, 1.05])
#ax.set_ylim([-0.05, 1.05])
ax.set_xlim([0., 1.])
ax.set_ylim([0., 1.])
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()Let’s look at the Johor’s Earthquakes data¶
data preparation¶
os.listdir('../resources/segamat/')df = pd.read_csv('../resources/segamat/my_gempa_20250917.csv')
df.head()plt.scatter(df['longitude'], df['latitude'])minx, miny, maxx, maxy = 102.1401, 1.716, 103.5922, 2.9272 # lat long
df2 = df[df['latitude']>=miny]
df2 = df2[df2['latitude']<=maxy]
df2 = df2[df2['longitude']>=minx]
df2 = df2[df2['longitude']<=maxx]
df2#crs = 'epsg:4390' https://epsg.io/4390
# epsg:4390 = +proj=cass +lat_0=2.04258333333333 +lon_0=103.562758333333 +x_0=0 +y_0=0 +ellps=evrst48 +towgs84=-11,851,5,0,0,0,0 +units=m +no_defs +type=crs
crs = '+proj=cass +lat_0=2.04258333333333 +lon_0=103.562758333333 +x_0=160000 +y_0=40000 +ellps=evrst48 +towgs84=-11,851,5,0,0,0,0 +units=m +no_defs +type=crs'
# note the changed x_0 and y_0, which are false easting and false northing## create a geodataframe with the Johor's earthquake
gdf = gpd.GeoDataFrame(df2, geometry=[Point(x,y) for x, y in zip(df2['longitude'], df2['latitude'])], crs='epsg:4326') # wgs84 lat lng degree
gdf = gdf.to_crs('epsg:4390')
gdf.plot()## create a geodataframe with the Johor's earthquake
gdf = gpd.GeoDataFrame(df2, geometry=[Point(x,y) for x, y in zip(df2['longitude'], df2['latitude'])], crs='epsg:4326') # wgs84 lat lng degree
gdf = gdf.to_crs(crs)
gdf.plot()gdf['x'] = gdf.geometry.x
gdf['y'] = gdf.geometry.ypoi = gpd.read_parquet('../resources/segamat/segamat_poi.parquet')
poi = poi.to_crs(crs)
poi['x'] = poi.geometry.x
poi['y'] = poi.geometry.y
print(poi.head())
poi.plot()fig, ax = plt.subplots()
poi.plot(ax=ax, c='k', marker='.')
gdf.plot(ax=ax, c='r', marker='x')
plt.tight_layout()choosing the range of study area¶
fig, ax = plt.subplots()
poi.plot(ax=ax, c='k', marker='.')
gdf.plot(ax=ax, c='r', marker='x')
ax.axvline(x=57000, c='b')
ax.axvline(x=97000, c='b')
ax.axhline(y=75000, c='b')
ax.axhline(y=115000, c='b')
xlim = [57000, 97000]
ylim = [75000, 115000]
plt.tight_layout()fig, ax = plt.subplots()
poi.plot(ax=ax, c='k', marker='.')
gdf.plot(ax=ax, c='r', marker='x')
"""
ax.axvline(x=57000, c='b')
ax.axvline(x=97000, c='b')
ax.axhline(y=75000, c='b')
ax.axhline(y=115000, c='b')
"""
xlim = [57000, 97000]
ylim = [75000, 115000]
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()## filter the poi data
xmin, xmax = xlim
ymin, ymax = ylim
#poi2 = poi.intersection(Polygon([(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), (xmin, ymin)]))
poi2 = poi[poi['x']>=xmin]
poi2 = poi2[poi2['x']<=xmax]
poi2 = poi2[poi2['y']>=ymin]
poi2 = poi2[poi2['y']<=ymax]
poi2.plot(marker='.', c='k')create grid for the area¶
size = 500
len_x = xmax - xmin
len_y = ymax - ymin
print(len_x, len_y)
ncol = int(np.ceil(len_x/size))
nrow = int(np.ceil(len_y/size))
ncol, nrow, ncol*nrowgrid = util.gen_grid(xmin, ymin, xmax, ymax, ncol=ncol, nrow=nrow, force_square=True, crs=crs)
grid.plot(fc='none', ec='k')cent = grid.centroid
grid['x'] = cent.x
grid['y'] = cent.y
grid.head()grid_data = grid[['x', 'y']]
grid_data = np.array(grid_data)
grid_data, grid_data.shapecheck POI categories
poi2.groupby('category')[['names']].count()fig, ax = plt.subplots(figsize=(5,5))
poi2.plot(marker='.', c='k', ax=ax)
poi2[poi2['category']=='health'].plot(marker='+', c='r', ax=ax)
#roads2.plot(ec='grey', ax=ax)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()fig, ax = plt.subplots(figsize=(5,5))
poi2.plot(marker='.', c='k', ax=ax)
poi2[poi2['category']=='food_bev'].plot(marker='+', c='r', ax=ax)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()fig, ax = plt.subplots(figsize=(5,5))
poi2.plot(marker='.', c='k', ax=ax)
poi2[poi2['category']=='religious'].plot(marker='+', c='r', ax=ax)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()fig, ax = plt.subplots(figsize=(5,5))
poi2.plot(marker='.', c='k', ax=ax)
poi2[poi2['category']=='education'].plot(marker='+', c='r', ax=ax)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()fig, ax = plt.subplots(figsize=(5,5))
poi2.plot(marker='.', c='k', ax=ax)
poi2[poi2['category']=='retail'].plot(marker='+', c='r', ax=ax)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()fig, ax = plt.subplots(figsize=(5,5))
poi2.plot(marker='.', c='k', ax=ax)
poi2[poi2['category']=='manufacturing'].plot(marker='+', c='r', ax=ax)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()slicing data into different geodataframe/table¶
pt_food = poi2[poi2['category']=='food_bev']
pt_reli = poi2[poi2['category']=='religious']
pt_educ = poi2[poi2['category']=='education']
pt_reta = poi2[poi2['category']=='retail']
pt_heal = poi2[poi2['category']=='health']
pt_manu = poi2[poi2['category']=='manufacturing']food_pts = pt_food[['x', 'y']]
reli_pts = pt_reli[['x', 'y']]
educ_pts = pt_educ[['x', 'y']]
heal_pts = pt_heal[['x', 'y']]
reta_pts = pt_reta[['x', 'y']]
manu_pts = pt_manu[['x', 'y']]kde_food = KernelDensity(kernel='exponential', bandwidth=1000).fit(food_pts)
Z_food = np.exp(kde_food.score_samples(grid_data))
grid['food_dens'] = Z_food
kde_food
fig, ax = plt.subplots(figsize=(5,5))
grid.plot('food_dens', cmap='Blues', ax=ax)
# Create scale bar
scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
ax.add_artist(scalebar)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()roads = gpd.read_parquet('../resources/segamat/segamat_roads3.parquet')
roads = roads.to_crs(crs)
roads.plot()
fig, ax = plt.subplots(figsize=(5,5))
grid.plot('food_dens', cmap='Blues', ax=ax)
roads.plot(ec='k', ax=ax, alpha=.125)
#poi2.plot(marker='.', c='k', ax=ax, alpha=.03)
#poi2[poi2['category']=='food_bev'].plot(marker='+', c='r', ax=ax, alpha=.3)
scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
ax.add_artist(scalebar)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()h = 1000
kde_reli = KernelDensity(kernel='exponential', bandwidth=h).fit(reli_pts)
kde_reta = KernelDensity(kernel='exponential', bandwidth=h).fit(reta_pts)
kde_educ = KernelDensity(kernel='exponential', bandwidth=h).fit(educ_pts)
kde_heal = KernelDensity(kernel='exponential', bandwidth=h).fit(heal_pts)
kde_manu = KernelDensity(kernel='exponential', bandwidth=h).fit(manu_pts)
grid['reli_dens'] = np.exp(kde_reli.score_samples(grid_data))
grid['reta_dens'] = np.exp(kde_reta.score_samples(grid_data))
grid['educ_dens'] = np.exp(kde_educ.score_samples(grid_data))
grid['heal_dens'] = np.exp(kde_heal.score_samples(grid_data))
grid['manu_dens'] = np.exp(kde_manu.score_samples(grid_data))fig, axg = plt.subplots(3, 2, figsize=(14, 21))
axs = axg.flatten()
density_columns = ['food_dens', 'reli_dens', 'reta_dens', 'educ_dens', 'heal_dens', 'manu_dens']
for i, col in enumerate(density_columns):
ax = axs[i]
grid.plot(col, ax=ax, cmap='Blues')
roads.plot(fc='none', ec='k', ax=ax, alpha=.125)
scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
ax.add_artist(scalebar)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
ax.set_title(col)
#break
plt.tight_layout()vmax = grid[density_columns].max().max()
vmin = grid[density_columns].min().min()
vmax, vminfig, axg = plt.subplots(3, 2, figsize=(14, 21))
axs = axg.flatten()
for i, col in enumerate(density_columns):
ax = axs[i]
grid.plot(col, ax=ax, cmap='Blues', vmin=0., vmax=vmax)
#roads.plot(fc='none', ec='k', ax=ax, alpha=.06125)
gdf.plot(ax=ax, c='r', marker='x')
scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
ax.add_artist(scalebar)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
ax.set_title(col)
#break
plt.tight_layout()make 3D plots¶
z_array = grid.pivot(index='y', columns='x', values='reli_dens').iloc[::-1]
z_arrayx_array = grid.pivot(index='y', columns='x', values='x').iloc[::-1]
y_array = grid.pivot(index='y', columns='x', values='y').iloc[::-1]
y_array
fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(10, 10))
surf = ax.plot_surface(x_array, y_array, z_array, cmap=cm.Blues,
linewidth=0, antialiased=False, vmin=vmin, vmax=vmax)
ax.scatter(gdf['x'] , gdf['y'] , [0]*len(gdf), color='r', marker='x')
fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_zlim([vmin, vmax])
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.tight_layout()
fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(10, 10))
ax.plot_wireframe(x_array, y_array, z_array, rstride=2, cstride=2)
ax.set_zlim([vmin, vmax])
ax.set_box_aspect(None, zoom=0.9)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.tight_layout()
fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(10, 10))
surf = ax.plot_surface(x_array, y_array, z_array, cmap=cm.Blues,
linewidth=0, antialiased=False, vmin=vmin, vmax=vmax)
ax.plot_wireframe(x_array, y_array, z_array, rstride=5, cstride=5, alpha=.4)
fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_zlim([vmin, vmax])
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.tight_layout()fig, axg = plt.subplots(3, 2, figsize=(14, 21), subplot_kw={"projection": "3d"})
axs = axg.flatten()
for i, col in enumerate(density_columns):
ax = axs[i]
z_array = grid.pivot(index='y', columns='x', values=col).iloc[::-1]
surf = ax.plot_surface(x_array, y_array, z_array, cmap=cm.Blues,
linewidth=0, antialiased=False, vmin=vmin, vmax=vmax)
ax.plot_wireframe(x_array, y_array, z_array, rstride=2, cstride=2, alpha=.4)
ax.scatter(gdf['x'] , gdf['y'] , [0]*len(gdf), color='r', marker='X')
ax.set_box_aspect(None, zoom=0.9)
ax.set_zlabel(col)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_zlim([vmin, vmax])
ax.set_xticks([])
ax.set_yticks([])
ax.set_title(col)
#break
plt.tight_layout()Comparison between two density values¶
fig, ax = plt.subplots(figsize=(5,5))
grid.plot('educ_dens', cmap='Blues', ax=ax)
pt_educ.plot(ax=ax, c='g', marker='^', alpha=.25)
scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
ax.add_artist(scalebar)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()
fig, ax = plt.subplots(figsize=(5,5))
grid.plot('food_dens', cmap='Blues', ax=ax)
pt_food.plot(ax=ax, c='k', marker='+', alpha=.125)
scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
ax.add_artist(scalebar)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()grid['temp'] = np.log2(grid['educ_dens'] / grid['food_dens'])
## ratio dividing one value by the other
## log ratio get the magnitude (how many times) of which one is larger than the other; 0 means the are same (ratio=1, log ratio=0)
grid['temp'].min(), grid['temp'].max()grid.plot('temp', cmap='coolwarm', vmin=-9, vmax=9)
fig, ax = plt.subplots(figsize=(5,5))
grid.plot('temp', cmap='coolwarm', vmin=-9, vmax=9, ax=ax)
roads.plot(ec='k', ax=ax, alpha=.125)
gdf.plot(c='r', marker='x', ax=ax)
scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
ax.add_artist(scalebar)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()
fig, ax = plt.subplots(figsize=(5,5))
grid.plot('temp', cmap='coolwarm', vmin=-9, vmax=9, ax=ax)
#roads.plot(ec='k', ax=ax, alpha=.125)
pt_food.plot(ax=ax, c='k', marker='+', alpha=.25, label='F&B')
pt_educ.plot(ax=ax, c='g', marker='^', alpha=.25, label='Education')
gdf.plot(c='r', marker='x', ax=ax, label='Earthquake')
scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
ax.add_artist(scalebar)
ax.legend(loc='lower left')
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()