Skip to article frontmatterSkip to article content

POI and roads data source: Overture Maps, url: https://overturemaps.org/

data download with:

  • for poi: overturemaps download --bbox=102.1401,1.716,103.5922,2.9272 -f geoparquet --type=place -o segamat.parquet

  • for roads: overturemaps download --bbox=102.1401,1.716,103.5922,2.9272 -f geoparquet --type=segment -o segamat_road.parquet

Earthquake data source: http://mygempa.met.gov.my/docroot/archive/index.php

Download the util script file here: utils.py

## basic utilities
import os
import random

## data processing and calculations
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point  # for generating Point object to create geodataframe
from sklearn.neighbors import KernelDensity  # for calculating KDE

## plot related
import matplotlib.pyplot as plt
from matplotlib import cm  # for color bar (in 3D)
import seaborn as sns
from matplotlib_scalebar.scalebar import ScaleBar  # https://pypi.org/project/matplotlib-scalebar/

## our own scripts
import utils as util

A demo on how KDE can be done in Python

x = [random.random() for _ in range(20)]
y = [random.random() for _ in range(20)]

xy = np.array(list(zip(x, y)))
xy
grid = util.gen_grid(0., 0., 1., 1., ncol=50, nrow=50, force_square=True, crs=None)

cent = grid.centroid
grid['x'] = cent.x
grid['y'] = cent.y
print(grid.head())

grid.plot(fc='none', ec='k')

grid_data = grid[['x', 'y']]
grid_data = np.array(grid_data)
grid_data, grid_data.shape
kde_demo = KernelDensity(kernel='exponential', bandwidth=.125).fit(xy)
kde_demo
Z_demo = np.exp(kde_demo.score_samples(grid_data))
grid['demo'] = Z_demo

fig, ax = plt.subplots(figsize=(5,5))

grid.plot('demo', cmap='Blues', ax=ax)

ax.scatter(x, y, c='r', marker='.', alpha=.2)

#ax.set_xlim([-0.05, 1.05])
#ax.set_ylim([-0.05, 1.05])
ax.set_xlim([0., 1.])
ax.set_ylim([0., 1.])
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()

Let’s look at the Johor’s Earthquakes data

data preparation

os.listdir('../resources/segamat/')
df = pd.read_csv('../resources/segamat/my_gempa_20250917.csv')
df.head()
plt.scatter(df['longitude'], df['latitude'])
minx, miny, maxx, maxy = 102.1401, 1.716, 103.5922, 2.9272  # lat long

df2 = df[df['latitude']>=miny]
df2 = df2[df2['latitude']<=maxy]
df2 = df2[df2['longitude']>=minx]
df2 = df2[df2['longitude']<=maxx]
df2
#crs = 'epsg:4390' https://epsg.io/4390
# epsg:4390  =  +proj=cass +lat_0=2.04258333333333 +lon_0=103.562758333333 +x_0=0 +y_0=0 +ellps=evrst48 +towgs84=-11,851,5,0,0,0,0 +units=m +no_defs +type=crs

crs = '+proj=cass +lat_0=2.04258333333333 +lon_0=103.562758333333 +x_0=160000 +y_0=40000 +ellps=evrst48 +towgs84=-11,851,5,0,0,0,0 +units=m +no_defs +type=crs'
# note the changed x_0 and y_0, which are false easting and false northing
## create a geodataframe with the Johor's earthquake
gdf = gpd.GeoDataFrame(df2, geometry=[Point(x,y) for x, y in zip(df2['longitude'], df2['latitude'])], crs='epsg:4326')  # wgs84 lat lng degree
gdf = gdf.to_crs('epsg:4390')
gdf.plot()
## create a geodataframe with the Johor's earthquake
gdf = gpd.GeoDataFrame(df2, geometry=[Point(x,y) for x, y in zip(df2['longitude'], df2['latitude'])], crs='epsg:4326')  # wgs84 lat lng degree
gdf = gdf.to_crs(crs)
gdf.plot()
gdf['x'] = gdf.geometry.x
gdf['y'] = gdf.geometry.y
poi = gpd.read_parquet('../resources/segamat/segamat_poi.parquet')
poi = poi.to_crs(crs)
poi['x'] = poi.geometry.x
poi['y'] = poi.geometry.y

print(poi.head())
poi.plot()
fig, ax = plt.subplots()

poi.plot(ax=ax, c='k', marker='.')
gdf.plot(ax=ax, c='r', marker='x')

plt.tight_layout()

choosing the range of study area

fig, ax = plt.subplots()

poi.plot(ax=ax, c='k', marker='.')
gdf.plot(ax=ax, c='r', marker='x')

ax.axvline(x=57000, c='b')
ax.axvline(x=97000, c='b')
ax.axhline(y=75000, c='b')
ax.axhline(y=115000, c='b')

xlim = [57000, 97000]
ylim = [75000, 115000]

plt.tight_layout()
fig, ax = plt.subplots()

poi.plot(ax=ax, c='k', marker='.')
gdf.plot(ax=ax, c='r', marker='x')
"""
ax.axvline(x=57000, c='b')
ax.axvline(x=97000, c='b')
ax.axhline(y=75000, c='b')
ax.axhline(y=115000, c='b')
"""
xlim = [57000, 97000]
ylim = [75000, 115000]

ax.set_xlim(xlim)
ax.set_ylim(ylim)

ax.set_xticks([])
ax.set_yticks([])

plt.tight_layout()
## filter the poi data

xmin, xmax = xlim
ymin, ymax = ylim

#poi2 = poi.intersection(Polygon([(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin), (xmin, ymin)]))
poi2 = poi[poi['x']>=xmin]
poi2 = poi2[poi2['x']<=xmax]
poi2 = poi2[poi2['y']>=ymin]
poi2 = poi2[poi2['y']<=ymax]

poi2.plot(marker='.', c='k')

create grid for the area

size = 500
len_x = xmax - xmin
len_y = ymax - ymin

print(len_x, len_y)

ncol = int(np.ceil(len_x/size))
nrow = int(np.ceil(len_y/size))
ncol, nrow, ncol*nrow
grid = util.gen_grid(xmin, ymin, xmax, ymax, ncol=ncol, nrow=nrow, force_square=True, crs=crs)
grid.plot(fc='none', ec='k')
cent = grid.centroid
grid['x'] = cent.x
grid['y'] = cent.y
grid.head()
grid_data = grid[['x', 'y']]
grid_data = np.array(grid_data)
grid_data, grid_data.shape

check POI categories

poi2.groupby('category')[['names']].count()
fig, ax = plt.subplots(figsize=(5,5))
poi2.plot(marker='.', c='k', ax=ax)
poi2[poi2['category']=='health'].plot(marker='+', c='r', ax=ax)
#roads2.plot(ec='grey', ax=ax)

ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()
fig, ax = plt.subplots(figsize=(5,5))
poi2.plot(marker='.', c='k', ax=ax)
poi2[poi2['category']=='food_bev'].plot(marker='+', c='r', ax=ax)

ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()
fig, ax = plt.subplots(figsize=(5,5))
poi2.plot(marker='.', c='k', ax=ax)
poi2[poi2['category']=='religious'].plot(marker='+', c='r', ax=ax)

ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()
fig, ax = plt.subplots(figsize=(5,5))
poi2.plot(marker='.', c='k', ax=ax)
poi2[poi2['category']=='education'].plot(marker='+', c='r', ax=ax)

ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()
fig, ax = plt.subplots(figsize=(5,5))
poi2.plot(marker='.', c='k', ax=ax)
poi2[poi2['category']=='retail'].plot(marker='+', c='r', ax=ax)

ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()
fig, ax = plt.subplots(figsize=(5,5))
poi2.plot(marker='.', c='k', ax=ax)
poi2[poi2['category']=='manufacturing'].plot(marker='+', c='r', ax=ax)

ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()

slicing data into different geodataframe/table

pt_food = poi2[poi2['category']=='food_bev']
pt_reli = poi2[poi2['category']=='religious']
pt_educ = poi2[poi2['category']=='education']
pt_reta = poi2[poi2['category']=='retail']
pt_heal = poi2[poi2['category']=='health']
pt_manu = poi2[poi2['category']=='manufacturing']
food_pts = pt_food[['x', 'y']]
reli_pts = pt_reli[['x', 'y']]
educ_pts = pt_educ[['x', 'y']]
heal_pts = pt_heal[['x', 'y']]
reta_pts = pt_reta[['x', 'y']]
manu_pts = pt_manu[['x', 'y']]
kde_food = KernelDensity(kernel='exponential', bandwidth=1000).fit(food_pts)

Z_food = np.exp(kde_food.score_samples(grid_data))
grid['food_dens'] = Z_food

kde_food

fig, ax = plt.subplots(figsize=(5,5))

grid.plot('food_dens', cmap='Blues', ax=ax)


# Create scale bar
scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
ax.add_artist(scalebar)

ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()
roads = gpd.read_parquet('../resources/segamat/segamat_roads3.parquet')
roads = roads.to_crs(crs)
roads.plot()

fig, ax = plt.subplots(figsize=(5,5))

grid.plot('food_dens', cmap='Blues', ax=ax)
roads.plot(ec='k', ax=ax, alpha=.125)

#poi2.plot(marker='.', c='k', ax=ax, alpha=.03)
#poi2[poi2['category']=='food_bev'].plot(marker='+', c='r', ax=ax, alpha=.3)

scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
ax.add_artist(scalebar)

ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()
h = 1000

kde_reli = KernelDensity(kernel='exponential', bandwidth=h).fit(reli_pts)
kde_reta = KernelDensity(kernel='exponential', bandwidth=h).fit(reta_pts)
kde_educ = KernelDensity(kernel='exponential', bandwidth=h).fit(educ_pts)
kde_heal = KernelDensity(kernel='exponential', bandwidth=h).fit(heal_pts)
kde_manu = KernelDensity(kernel='exponential', bandwidth=h).fit(manu_pts)

grid['reli_dens'] = np.exp(kde_reli.score_samples(grid_data))
grid['reta_dens'] = np.exp(kde_reta.score_samples(grid_data))
grid['educ_dens'] = np.exp(kde_educ.score_samples(grid_data))
grid['heal_dens'] = np.exp(kde_heal.score_samples(grid_data))
grid['manu_dens'] = np.exp(kde_manu.score_samples(grid_data))
fig, axg = plt.subplots(3, 2, figsize=(14, 21))
axs = axg.flatten()

density_columns = ['food_dens', 'reli_dens', 'reta_dens', 'educ_dens', 'heal_dens', 'manu_dens']

for i, col in enumerate(density_columns):
    ax = axs[i]
    grid.plot(col, ax=ax, cmap='Blues')
    roads.plot(fc='none', ec='k', ax=ax, alpha=.125)
    
    scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
    ax.add_artist(scalebar)
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(col)
    #break

plt.tight_layout()
vmax = grid[density_columns].max().max()
vmin = grid[density_columns].min().min()

vmax, vmin
fig, axg = plt.subplots(3, 2, figsize=(14, 21))
axs = axg.flatten()

for i, col in enumerate(density_columns):
    ax = axs[i]
    grid.plot(col, ax=ax, cmap='Blues', vmin=0., vmax=vmax)
    #roads.plot(fc='none', ec='k', ax=ax, alpha=.06125)
    
    gdf.plot(ax=ax, c='r', marker='x')
    
    scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
    ax.add_artist(scalebar)
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(col)
    #break

plt.tight_layout()

make 3D plots

z_array = grid.pivot(index='y', columns='x', values='reli_dens').iloc[::-1]
z_array
x_array = grid.pivot(index='y', columns='x', values='x').iloc[::-1]
y_array = grid.pivot(index='y', columns='x', values='y').iloc[::-1]
y_array

fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(10, 10))


surf = ax.plot_surface(x_array, y_array, z_array, cmap=cm.Blues,
                       linewidth=0, antialiased=False, vmin=vmin, vmax=vmax)
ax.scatter(gdf['x'] , gdf['y'] , [0]*len(gdf),  color='r', marker='x')
fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_zlim([vmin, vmax])
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.tight_layout()

fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(10, 10))

ax.plot_wireframe(x_array, y_array, z_array, rstride=2, cstride=2)

ax.set_zlim([vmin, vmax])
ax.set_box_aspect(None, zoom=0.9)

ax.set_xlabel('x')
ax.set_ylabel('y')
plt.tight_layout()

fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(10, 10))

surf = ax.plot_surface(x_array, y_array, z_array, cmap=cm.Blues,
                       linewidth=0, antialiased=False, vmin=vmin, vmax=vmax)

ax.plot_wireframe(x_array, y_array, z_array, rstride=5, cstride=5, alpha=.4)

fig.colorbar(surf, shrink=0.5, aspect=5)
ax.set_zlim([vmin, vmax])
ax.set_xlabel('x')
ax.set_ylabel('y')
plt.tight_layout()
fig, axg = plt.subplots(3, 2, figsize=(14, 21), subplot_kw={"projection": "3d"})
axs = axg.flatten()

for i, col in enumerate(density_columns):
    ax = axs[i]
    z_array = grid.pivot(index='y', columns='x', values=col).iloc[::-1]

    surf = ax.plot_surface(x_array, y_array, z_array, cmap=cm.Blues,
                           linewidth=0, antialiased=False, vmin=vmin, vmax=vmax)
    
    ax.plot_wireframe(x_array, y_array, z_array, rstride=2, cstride=2, alpha=.4)
    
    ax.scatter(gdf['x'] , gdf['y'] , [0]*len(gdf),  color='r', marker='X')
    ax.set_box_aspect(None, zoom=0.9)
    ax.set_zlabel(col)
    ax.set_xlim(xlim)
    ax.set_ylim(ylim)
    ax.set_zlim([vmin, vmax])
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(col)
    #break

plt.tight_layout()

Comparison between two density values

fig, ax = plt.subplots(figsize=(5,5))

grid.plot('educ_dens', cmap='Blues', ax=ax)
pt_educ.plot(ax=ax, c='g', marker='^', alpha=.25)

scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
ax.add_artist(scalebar)

ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])

plt.tight_layout()

fig, ax = plt.subplots(figsize=(5,5))

grid.plot('food_dens', cmap='Blues', ax=ax)
pt_food.plot(ax=ax, c='k', marker='+', alpha=.125)

scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
ax.add_artist(scalebar)

ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])

plt.tight_layout()
grid['temp'] = np.log2(grid['educ_dens'] / grid['food_dens'])  
## ratio dividing one value by the other
## log ratio get the magnitude (how many times) of which one is larger than the other; 0 means the are same (ratio=1, log ratio=0)

grid['temp'].min(), grid['temp'].max()
grid.plot('temp', cmap='coolwarm', vmin=-9, vmax=9)

fig, ax = plt.subplots(figsize=(5,5))
grid.plot('temp', cmap='coolwarm', vmin=-9, vmax=9, ax=ax)
roads.plot(ec='k', ax=ax, alpha=.125)
gdf.plot(c='r', marker='x', ax=ax)

scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
ax.add_artist(scalebar)

ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()

fig, ax = plt.subplots(figsize=(5,5))
grid.plot('temp', cmap='coolwarm', vmin=-9, vmax=9, ax=ax)
#roads.plot(ec='k', ax=ax, alpha=.125)
pt_food.plot(ax=ax, c='k', marker='+', alpha=.25, label='F&B')
pt_educ.plot(ax=ax, c='g', marker='^', alpha=.25, label='Education')
gdf.plot(c='r', marker='x', ax=ax, label='Earthquake')

scalebar = ScaleBar(1, "m", fixed_value=5, fixed_units='km')
ax.add_artist(scalebar)
ax.legend(loc='lower left')
ax.set_xlim(xlim)
ax.set_ylim(ylim)
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()