Data source and info (metadata):
Elections: https://
Guerry (the data used for demo in the lecture): https://
About Geoda: https://
To use the math symbol from Latex: https://
e.g.,
$\leq$becomes$\geq$becomes
import os
import timeit
import random
import pandas as pd
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
from shapely.geometry import Point
import numpy as np
from pysal.lib import weights
from libpysal.weights import lat2W, higher_order
from pysal.explore import esda
#from pysal.viz.splot.esda import moran_scatterplot, lisa_cluster, plot_local_autocorrelation
import splot
from splot.esda import moran_scatterplot, lisa_cluster, plot_local_autocorrelation
import mapclassify
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.lines import Line2D
from matplotlib.patches import Patch
from tqdm import tqdmos.listdir('../resources/election')gdf = gpd.read_file('../resources/election/election.shp')
gdf.crs = 'epsg:4326'
gdf = gdf.to_crs('epsg:5070')
gdf = gdf.reset_index()
gdf.plot()gdf.head()Let’s run Moran’s I and Local Moran’s I¶
w = weights.Queen.from_dataframe(gdf, ids='index')col = 'pct_gop_16'
mi = esda.Moran(gdf[col], w)
print('I:', mi.I)
print('p-sim:', mi.p_sim)
moran_scatterplot(mi);# https://pysal.org/esda/generated/esda.Moran_Local.html#esda.Moran_Local
lm = esda.Moran_Local(gdf[col], w, transformation = "r",
permutations = 999, geoda_quads=False)
lm.qgdf['lm_q'] = lm.q # 1 HH, 2 LH, 3 LL, 4 HL
gdf['pval'] = lm.p_sim
pbins = [0.001, 0.01, 0.05]
bin_pval = mapclassify.UserDefined(gdf['pval'], pbins)
#gdf['pval_bin'] = bin_pval.yb
gdf['pval_bin'] = bin_pval.find_bin(gdf['pval']) set(bin_pval.yb==bin_pval.find_bin(gdf['pval']))bin_pvalgdf.groupby(['pval_bin'])[['GEOID']].count()in math,
(and)indicates NOT inclusive of the number;[and]indicates inclusive of the number
so (0.00, 0.01] means: greater than () 0.00 but less than and equal to () 0.01
gdf.plot('pct_gop_16', scheme='naturalbreaks', legend=True, cmap='Reds')sns.color_palette('Greens', 5)pv_cmap = sns.color_palette('Greens', 5)
pv_cmap = pv_cmap[1:] # first one too bright
pv_cmap = pv_cmap[::-1] # reverse them all
pv_cmap = {i:c for i, c in enumerate(pv_cmap)}
pv_cmap_label = {0: '$\leq$ .001', 1: '$\leq$ .01', 2: '$\leq$ .05'} # 3: >0.05
#1 HH, 2 LH, 3 LL, 4 HL
q_cmap = {
1: 'xkcd:bright red',
2: 'xkcd:sky blue',
3: 'xkcd:electric blue',
4: 'xkcd:baby pink'}
q_cmap_label = {1: 'HH', 2: 'LH', 3: 'LL', 4: 'HL'}fig, axs = plt.subplots(2, 1, figsize=(10, 10), sharex=True, sharey=True)
for ax in axs:
gdf.plot(ax=ax, fc='xkcd:light grey')
ax = axs[0]
tmp = gdf[gdf['pval']<=0.05]
tmp.plot(ax=ax, color=[pv_cmap[a] for a in tmp['pval_bin']])
# to highlight the very significant places
tmp2 = gdf[gdf['pval']<=0.01]
tmp2 = tmp2.dissolve()
tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
## manually compose legend
legend_elements = [Patch(facecolor=pv_cmap[i], edgecolor=pv_cmap[i],
label=pv_cmap_label[i]) for i in [0, 1, 2]]
legend_elements.append(Patch(facecolor='none', edgecolor='k',
label='p-value$\leq$ 0.01'))
legend_elements.append(Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
label='not significant'))
ax.legend(handles=legend_elements, loc='lower left', ncol=1)
ax = axs[1]
tmp.plot(ax=ax, color=[q_cmap[a] for a in tmp['lm_q']])
tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
## manually compose legend
legend_elements = [Patch(facecolor=q_cmap[i], edgecolor=q_cmap[i],
label=q_cmap_label[i]) for i in [1, 4, 3, 2]]
ax.legend(handles=legend_elements, loc='lower left', ncol=2)
legend_elements2 = [Patch(facecolor='none', edgecolor='k',
label='p-value<=0.01'),
Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
label='not significant')]
legend_elements.extend(legend_elements2)
ax.legend(handles=legend_elements, loc='lower left', ncol=2)
titles = ['(a) pseudo p-value', '(b) Local Moran']
for i, ax in enumerate(axs):
ax.set_xticks([])
ax.set_yticks([])
ax.set_title(titles[i])
plt.tight_layout()
#fig.savefig('local_moran_election_2016_gop.png', dpi=200, bbox_inches='tight')#gdf.head()running Geary’s C¶
w2 = weights.Queen.from_dataframe(gdf, ids='index')
w2.transform = 'R'c = esda.Geary(gdf[col], w2, permutations=999)
c.C, c.p_simcol = 'pct_gop_16'
# https://pysal.org/esda/generated/esda.Geary_Local.html#esda.Geary_Local
lc = esda.Geary_Local(connectivity=w2, permutations = 999, drop_islands=False, labels=True)
lc.fit(gdf[col])
lc.labs, lc.p_simgdf['lc_q'] = lc.labs #labels: 1: 'HH Cluster', 2: 'LL Cluster', 3: 'Outlier', 4: 'Non-significant'
gdf['lc_pval'] = lc.p_sim
pbins = [0.001, 0.01, 0.05]
bin_pval = mapclassify.UserDefined(gdf['lc_pval'], pbins)
gdf['lc_pval_bin'] = bin_pval.find_bin(gdf['lc_pval'])gdf.groupby(['lc_q'])[['index']].count()pv_cmap = sns.color_palette('Greens', 5)
pv_cmap = pv_cmap[1:] # first one too light
pv_cmap = pv_cmap[::-1]
pv_cmap = {i:c for i, c in enumerate(pv_cmap)}
pv_cmap_label = {0: '$\leq$ .001', 1: '$\leq$ .01', 2: '$\leq$ .05'}
# for the new label: lc_q_cmap_label_new
lc_q_cmap = {
1: 'xkcd:bright red', # HH cluster
2: 'xkcd:electric blue', # LL cluster
3: 'xkcd:sunflower', # outliers
4: 'xkcd:light grey', # non-sig
} # other
fig, axs = plt.subplots(2, 1, figsize=(10, 10), sharex=True, sharey=True)
for ax in axs:
gdf.plot(ax=ax, fc='xkcd:light grey')
ax = axs[0]
tmp = gdf[gdf['lc_pval']<=0.05]
tmp2 = gdf[gdf['lc_pval']<=0.01]
tmp2 = tmp2.dissolve()
tmp.plot(ax=ax, color=[pv_cmap[a] for a in tmp['lc_pval_bin']])
tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
legend_elements = [Patch(facecolor=pv_cmap[i], edgecolor=pv_cmap[i],
label=pv_cmap_label[i]) for i in [0, 1, 2]]
legend_elements.append(Patch(facecolor='none', edgecolor='k',
label='p-value$\leq$0.01'))
legend_elements.append(Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
label='not significant'))
ax.legend(handles=legend_elements, loc='lower left', ncol=1)
ax = axs[1]
tmp.plot(ax=ax, color=[lc_q_cmap[a] for a in tmp['lc_q']])
tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
legend_elements = [Patch(facecolor=lc_q_cmap[i], edgecolor=lc_q_cmap[i],
label=lc_q_cmap_label_new[i]) for i in [1, 2, 3]]
#ax.legend(handles=legend_elements, loc='lower left', ncol=2)
legend_elements2 = [Patch(facecolor='none', edgecolor='k',
label='p-value<=0.01'),
Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
label='not significant')]
legend_elements.extend(legend_elements2)
ax.legend(handles=legend_elements, loc='lower left', ncol=1)
titles = ['(a) pseudo p-value', '(b) Local Geary']
for i, ax in enumerate(axs):
ax.set_xticks([])
ax.set_yticks([])
ax.set_title(titles[i])
plt.tight_layout()
#fig.savefig('local_geary_election_2016_gop.png', dpi=200, bbox_inches='tight')w3 = weights.Queen.from_dataframe(gdf, ids='index')
w3lg = esda.G_Local(gdf[col], w3,transform='R', star=False) # default no star
lg_star = esda.G_Local(gdf[col], w3,transform='R',star=True)gdf['lg_zg'] = lg.Zs
gdf['lg_pval'] = lg.p_sim
gdf['lgs_zg'] = lg_star.Zs
gdf['lgs_pval'] = lg_star.p_simpbins = [0.001, 0.01, 0.05]
bin_pval = mapclassify.UserDefined(gdf['lg_pval'], pbins)
#gdf['pval_bin'] = bin_pval.yb
gdf['lg_pval_bin'] = bin_pval.find_bin(gdf['lg_pval'])
bin_pval = mapclassify.UserDefined(gdf['lgs_pval'], pbins)
#gdf['pval_bin'] = bin_pval.yb
gdf['lgs_pval_bin'] = bin_pval.find_bin(gdf['lgs_pval'])fig, axs = plt.subplots(2, 1, figsize=(10, 10), sharex=True, sharey=True)
for ax in axs:
gdf.plot(ax=ax, fc='xkcd:light grey')
ax = axs[0]
tmp = gdf[gdf['lg_pval']<=0.05]
tmp2 = gdf[gdf['lg_pval']<=0.01]
tmp2 = tmp2.dissolve()
tmp.plot(ax=ax, color=[pv_cmap[a] for a in tmp['lg_pval_bin']])
tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
legend_elements = [Patch(facecolor=pv_cmap[i], edgecolor=pv_cmap[i],
label=pv_cmap_label[i]) for i in [0, 1, 2]]
legend_elements.append(Patch(facecolor='none', edgecolor='k',
label='p-value$\leq$0.01'))
legend_elements.append(Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
label='not significant'))
ax.legend(handles=legend_elements, loc='lower left', ncol=1)
ax = axs[1]
tmpa = tmp[tmp['lg_zg']>0]
tmpb = tmp[tmp['lg_zg']<0]
tmpa.plot(fc='xkcd:bright red', ax=ax)
tmpb.plot(fc='xkcd:electric blue', ax=ax)
tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
legend_elements = [Patch(facecolor='xkcd:bright red', edgecolor='xkcd:bright red',
label='{} ({})'.format('Hot-spot (HH)', len(tmpa))),
Patch(facecolor='xkcd:electric blue', edgecolor='xkcd:electric blue',
label='{} ({})'.format('Cold-spot (LL)', len(tmpb))), ]
legend_elements2 = [Patch(facecolor='none', edgecolor='k',
label='p-value<=0.01'),
Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
label='not significant')]
legend_elements.extend(legend_elements2)
ax.legend(handles=legend_elements, loc='lower left', ncol=1)
titles = ['(a) pseudo p-value', '(b) Getis-Ord Gi']
for i, ax in enumerate(axs):
ax.set_xticks([])
ax.set_yticks([])
ax.set_title(titles[i])
plt.tight_layout()
#fig.savefig('local_gi_election_2016_gop.png', dpi=200, bbox_inches='tight')fig, axs = plt.subplots(2, 1, figsize=(10, 10), sharex=True, sharey=True)
for ax in axs:
gdf.plot(ax=ax, fc='xkcd:light grey')
ax = axs[0]
tmp = gdf[gdf['lgs_pval']<=0.05]
tmp2 = gdf[gdf['lgs_pval']<=0.01]
tmp2 = tmp2.dissolve()
tmp.plot(ax=ax, color=[pv_cmap[a] for a in tmp['lg_pval_bin']])
tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
legend_elements = [Patch(facecolor=pv_cmap[i], edgecolor=pv_cmap[i],
label=pv_cmap_label[i]) for i in [0, 1, 2]]
legend_elements.append(Patch(facecolor='none', edgecolor='k',
label='p-value$\leq$0.01'))
legend_elements.append(Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
label='not significant'))
ax.legend(handles=legend_elements, loc='lower left', ncol=1)
ax = axs[1]
tmpa = tmp[tmp['lgs_zg']>0]
tmpb = tmp[tmp['lgs_zg']<0]
tmpa.plot(fc='xkcd:bright red', ax=ax)
tmpb.plot(fc='xkcd:electric blue', ax=ax)
tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
legend_elements = [Patch(facecolor='xkcd:bright red', edgecolor='xkcd:bright red',
label='{} ({})'.format('Hot-spot (HH)', len(tmpa))),
Patch(facecolor='xkcd:electric blue', edgecolor='xkcd:electric blue',
label='{} ({})'.format('Cold-spot (LL)', len(tmpb))), ]
legend_elements2 = [Patch(facecolor='none', edgecolor='k',
label='p-value<=0.01'),
Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
label='not significant')]
legend_elements.extend(legend_elements2)
ax.legend(handles=legend_elements, loc='lower left', ncol=1)
titles = ['(a) pseudo p-value', '(b) Getis-Ord Gi']
for i, ax in enumerate(axs):
ax.set_xticks([])
ax.set_yticks([])
ax.set_title(titles[i])
plt.tight_layout()
#fig.savefig('local_gi_election_2016_gop.png', dpi=200, bbox_inches='tight')identify places that is HH in all 3 methods¶
gdf.columnstmp = gdf[gdf['lm_q']==1] # HH
tmp = tmp[tmp['pval']<=.05]
tmp.plot()tmp2 = tmp[tmp['lc_q']==1]
tmp2 = tmp2[tmp2['lc_pval']<=.05]
tmp2.plot()tmp3 = tmp2[tmp2['lgs_pval']<=.05]
tmp3 = tmp3[tmp3['lgs_zg']>0]
tmp3.plot()similarly for LL¶
tmpb = gdf[gdf['lm_q']==3] # LL
tmpb = tmpb[tmpb['pval']<=.05]
tmpb.plot()tmpb2 = tmpb[tmpb['lc_q']==2]
tmpb2 = tmpb2[tmpb2['lc_pval']<=.05]
tmpb2.plot()tmpb3 = tmpb2[tmpb2['lgs_pval']<=.05]
tmpb3 = tmpb3[tmpb3['lgs_zg']<0]
tmpb3.plot()fig, ax = plt.subplots(1, 1, figsize=(10, 5), sharex=True, sharey=True)
gdf.plot(ax=ax, fc='xkcd:light grey')
tmp3.plot(ax=ax, fc='xkcd:bright red')
tmpb3.plot(ax=ax, fc='xkcd:electric blue')
gdf.plot(ax=ax, fc='none', ec='w', lw=.1)
legend_elements = [Patch(facecolor='xkcd:bright red', edgecolor='xkcd:bright red',
label='{} ({})'.format('Hot-spot (HH)', len(tmp3))),
Patch(facecolor='xkcd:electric blue', edgecolor='xkcd:electric blue',
label='{} ({})'.format('Cold-spot (LL)', len(tmpb3))),
Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
label='other')]
ax.legend(handles=legend_elements, loc='lower left', ncol=1)
ax.set_title('Combine the HH and LL of three methods (p$\leq$.05)')
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()