Lab 9: LISA Analysis - Geospatial Statistics and Visualisation

Data source and info (metadata):
Elections: https://geodacenter.github.io/data-and-lab/county_election_2012_2016-variables/

Guerry (the data used for demo in the lecture): https://geodacenter.github.io/data-and-lab/Guerry/

About Geoda: https://geodacenter.github.io/

To use the math symbol from Latex: https://oeis.org/wiki/List_of_LaTeX_mathematical_symbols

e.g.,

$\leq$ becomes $\leq$
$\geq$ becomes $\geq$

import os
import timeit
import random
import pandas as pd
os.environ['USE_PYGEOS'] = '0'
import geopandas as gpd
from shapely.geometry import Point

import numpy as np
from pysal.lib import weights
from libpysal.weights import lat2W, higher_order
from pysal.explore import esda
#from pysal.viz.splot.esda import moran_scatterplot, lisa_cluster, plot_local_autocorrelation
import splot
from splot.esda import moran_scatterplot, lisa_cluster, plot_local_autocorrelation
import mapclassify

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.lines import Line2D
from matplotlib.patches import Patch


from tqdm import tqdm

os.listdir('../resources/election')

gdf = gpd.read_file('../resources/election/election.shp')
gdf.crs = 'epsg:4326'
gdf = gdf.to_crs('epsg:5070')
gdf = gdf.reset_index()
gdf.plot()

gdf.head()

Let’s run Moran’s I and Local Moran’s I¶

w = weights.Queen.from_dataframe(gdf, ids='index')

col = 'pct_gop_16'

mi = esda.Moran(gdf[col], w)
print('I:', mi.I)
print('p-sim:', mi.p_sim) 
moran_scatterplot(mi);

# https://pysal.org/esda/generated/esda.Moran_Local.html#esda.Moran_Local
lm = esda.Moran_Local(gdf[col], w, transformation = "r", 
    permutations = 999, geoda_quads=False)

lm.q

gdf['lm_q'] = lm.q  # 1 HH, 2 LH, 3 LL, 4 HL
gdf['pval'] = lm.p_sim


pbins = [0.001, 0.01, 0.05]
bin_pval = mapclassify.UserDefined(gdf['pval'], pbins)
#gdf['pval_bin'] = bin_pval.yb
gdf['pval_bin'] = bin_pval.find_bin(gdf['pval'])

set(bin_pval.yb==bin_pval.find_bin(gdf['pval']))

bin_pval

gdf.groupby(['pval_bin'])[['GEOID']].count()

in math,

( and ) indicates NOT inclusive of the number;
[ and ] indicates inclusive of the number

so (0.00, 0.01] means: greater than ( $>$ ) 0.00 but less than and equal to ( $\leq$ ) 0.01

gdf.plot('pct_gop_16', scheme='naturalbreaks', legend=True, cmap='Reds')

sns.color_palette('Greens', 5)

pv_cmap = sns.color_palette('Greens', 5)
pv_cmap = pv_cmap[1:]  # first one too bright
pv_cmap = pv_cmap[::-1]  # reverse them all
pv_cmap = {i:c for i, c in enumerate(pv_cmap)}
pv_cmap_label = {0: '$\leq$ .001', 1: '$\leq$ .01', 2: '$\leq$ .05'}  # 3: >0.05

#1 HH, 2 LH, 3 LL, 4 HL
q_cmap = {
    1: 'xkcd:bright red', 
    2: 'xkcd:sky blue', 
    3: 'xkcd:electric blue', 
    4: 'xkcd:baby pink'}
q_cmap_label = {1: 'HH', 2: 'LH', 3: 'LL', 4: 'HL'}

fig, axs = plt.subplots(2, 1, figsize=(10, 10), sharex=True, sharey=True)

for ax in axs:
    gdf.plot(ax=ax, fc='xkcd:light grey')

ax = axs[0]
tmp = gdf[gdf['pval']<=0.05]
tmp.plot(ax=ax, color=[pv_cmap[a] for a in tmp['pval_bin']])
# to highlight the very significant places
tmp2 = gdf[gdf['pval']<=0.01]
tmp2 = tmp2.dissolve()
tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
## manually compose legend
legend_elements = [Patch(facecolor=pv_cmap[i], edgecolor=pv_cmap[i],
                         label=pv_cmap_label[i]) for i in [0, 1, 2]]
legend_elements.append(Patch(facecolor='none', edgecolor='k',
                         label='p-value$\leq$ 0.01'))
legend_elements.append(Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
                         label='not significant'))
ax.legend(handles=legend_elements, loc='lower left', ncol=1)


ax = axs[1]
tmp.plot(ax=ax, color=[q_cmap[a] for a in tmp['lm_q']])
tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
## manually compose legend
legend_elements = [Patch(facecolor=q_cmap[i], edgecolor=q_cmap[i],
                         label=q_cmap_label[i]) for i in [1, 4, 3, 2]]
ax.legend(handles=legend_elements, loc='lower left', ncol=2)
legend_elements2 = [Patch(facecolor='none', edgecolor='k',
                         label='p-value<=0.01'), 
                    Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
                         label='not significant')]
legend_elements.extend(legend_elements2)
ax.legend(handles=legend_elements, loc='lower left', ncol=2)

titles = ['(a) pseudo p-value', '(b) Local Moran']
for i, ax in enumerate(axs):
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(titles[i])

plt.tight_layout()
#fig.savefig('local_moran_election_2016_gop.png', dpi=200, bbox_inches='tight')

#gdf.head()

running Geary’s C¶

w2 = weights.Queen.from_dataframe(gdf, ids='index')
w2.transform = 'R'

c = esda.Geary(gdf[col], w2, permutations=999)
c.C, c.p_sim

col = 'pct_gop_16'
# https://pysal.org/esda/generated/esda.Geary_Local.html#esda.Geary_Local
lc = esda.Geary_Local(connectivity=w2, permutations = 999, drop_islands=False, labels=True)
lc.fit(gdf[col])
lc.labs, lc.p_sim

gdf['lc_q'] = lc.labs  #labels: 1: 'HH Cluster', 2: 'LL Cluster', 3: 'Outlier', 4: 'Non-significant'
gdf['lc_pval'] = lc.p_sim
pbins = [0.001, 0.01, 0.05]
bin_pval = mapclassify.UserDefined(gdf['lc_pval'], pbins)
gdf['lc_pval_bin'] = bin_pval.find_bin(gdf['lc_pval'])

gdf.groupby(['lc_q'])[['index']].count()

pv_cmap = sns.color_palette('Greens', 5)
pv_cmap = pv_cmap[1:]  # first one too light
pv_cmap = pv_cmap[::-1]
pv_cmap = {i:c for i, c in enumerate(pv_cmap)}
pv_cmap_label = {0: '$\leq$ .001', 1: '$\leq$ .01', 2: '$\leq$ .05'}

# for the new label: lc_q_cmap_label_new
lc_q_cmap = {
    1: 'xkcd:bright red',  # HH cluster
    2: 'xkcd:electric blue',  # LL cluster
    3: 'xkcd:sunflower', # outliers
    4: 'xkcd:light grey',  # non-sig
    }  # other

fig, axs = plt.subplots(2, 1, figsize=(10, 10), sharex=True, sharey=True)

for ax in axs:
    gdf.plot(ax=ax, fc='xkcd:light grey')

ax = axs[0]
tmp = gdf[gdf['lc_pval']<=0.05]
tmp2 = gdf[gdf['lc_pval']<=0.01]
tmp2 = tmp2.dissolve()
tmp.plot(ax=ax, color=[pv_cmap[a] for a in tmp['lc_pval_bin']])
tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
legend_elements = [Patch(facecolor=pv_cmap[i], edgecolor=pv_cmap[i],
                         label=pv_cmap_label[i]) for i in [0, 1, 2]]
legend_elements.append(Patch(facecolor='none', edgecolor='k',
                         label='p-value$\leq$0.01'))
legend_elements.append(Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
                         label='not significant'))
ax.legend(handles=legend_elements, loc='lower left', ncol=1)


ax = axs[1]
tmp.plot(ax=ax, color=[lc_q_cmap[a] for a in tmp['lc_q']])

tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
legend_elements = [Patch(facecolor=lc_q_cmap[i], edgecolor=lc_q_cmap[i],
                         label=lc_q_cmap_label_new[i]) for i in [1, 2, 3]]
#ax.legend(handles=legend_elements, loc='lower left', ncol=2)
legend_elements2 = [Patch(facecolor='none', edgecolor='k',
                         label='p-value<=0.01'), 
                    Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
                         label='not significant')]
legend_elements.extend(legend_elements2)
ax.legend(handles=legend_elements, loc='lower left', ncol=1)

titles = ['(a) pseudo p-value', '(b) Local Geary']
for i, ax in enumerate(axs):
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(titles[i])

plt.tight_layout()
#fig.savefig('local_geary_election_2016_gop.png', dpi=200, bbox_inches='tight')

w3 = weights.Queen.from_dataframe(gdf, ids='index')
w3

lg = esda.G_Local(gdf[col], w3,transform='R', star=False)  # default no star
lg_star = esda.G_Local(gdf[col], w3,transform='R',star=True)

gdf['lg_zg'] = lg.Zs
gdf['lg_pval'] = lg.p_sim

gdf['lgs_zg'] = lg_star.Zs
gdf['lgs_pval'] = lg_star.p_sim

pbins = [0.001, 0.01, 0.05]

bin_pval = mapclassify.UserDefined(gdf['lg_pval'], pbins)
#gdf['pval_bin'] = bin_pval.yb
gdf['lg_pval_bin'] = bin_pval.find_bin(gdf['lg_pval'])


bin_pval = mapclassify.UserDefined(gdf['lgs_pval'], pbins)
#gdf['pval_bin'] = bin_pval.yb
gdf['lgs_pval_bin'] = bin_pval.find_bin(gdf['lgs_pval'])

fig, axs = plt.subplots(2, 1, figsize=(10, 10), sharex=True, sharey=True)

for ax in axs:
    gdf.plot(ax=ax, fc='xkcd:light grey')

ax = axs[0]
tmp = gdf[gdf['lg_pval']<=0.05]
tmp2 = gdf[gdf['lg_pval']<=0.01]
tmp2 = tmp2.dissolve()
tmp.plot(ax=ax, color=[pv_cmap[a] for a in tmp['lg_pval_bin']])
tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
legend_elements = [Patch(facecolor=pv_cmap[i], edgecolor=pv_cmap[i],
                         label=pv_cmap_label[i]) for i in [0, 1, 2]]
legend_elements.append(Patch(facecolor='none', edgecolor='k',
                         label='p-value$\leq$0.01'))
legend_elements.append(Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
                         label='not significant'))
ax.legend(handles=legend_elements, loc='lower left', ncol=1)


ax = axs[1]
tmpa = tmp[tmp['lg_zg']>0]
tmpb = tmp[tmp['lg_zg']<0]
tmpa.plot(fc='xkcd:bright red', ax=ax)
tmpb.plot(fc='xkcd:electric blue', ax=ax)
tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
legend_elements = [Patch(facecolor='xkcd:bright red', edgecolor='xkcd:bright red',
                        label='{} ({})'.format('Hot-spot (HH)', len(tmpa))), 
                   Patch(facecolor='xkcd:electric blue', edgecolor='xkcd:electric blue',
                        label='{} ({})'.format('Cold-spot (LL)', len(tmpb))), ]
legend_elements2 = [Patch(facecolor='none', edgecolor='k',
                         label='p-value<=0.01'), 
                    Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
                         label='not significant')]
legend_elements.extend(legend_elements2)
ax.legend(handles=legend_elements, loc='lower left', ncol=1)

titles = ['(a) pseudo p-value', '(b) Getis-Ord Gi']
for i, ax in enumerate(axs):
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(titles[i])

plt.tight_layout()
#fig.savefig('local_gi_election_2016_gop.png', dpi=200, bbox_inches='tight')

fig, axs = plt.subplots(2, 1, figsize=(10, 10), sharex=True, sharey=True)

for ax in axs:
    gdf.plot(ax=ax, fc='xkcd:light grey')

ax = axs[0]
tmp = gdf[gdf['lgs_pval']<=0.05]
tmp2 = gdf[gdf['lgs_pval']<=0.01]
tmp2 = tmp2.dissolve()
tmp.plot(ax=ax, color=[pv_cmap[a] for a in tmp['lg_pval_bin']])
tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
legend_elements = [Patch(facecolor=pv_cmap[i], edgecolor=pv_cmap[i],
                         label=pv_cmap_label[i]) for i in [0, 1, 2]]
legend_elements.append(Patch(facecolor='none', edgecolor='k',
                         label='p-value$\leq$0.01'))
legend_elements.append(Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
                         label='not significant'))
ax.legend(handles=legend_elements, loc='lower left', ncol=1)


ax = axs[1]
tmpa = tmp[tmp['lgs_zg']>0]
tmpb = tmp[tmp['lgs_zg']<0]
tmpa.plot(fc='xkcd:bright red', ax=ax)
tmpb.plot(fc='xkcd:electric blue', ax=ax)
tmp2.plot(ax=ax, fc='none', ec='k', lw=1)
legend_elements = [Patch(facecolor='xkcd:bright red', edgecolor='xkcd:bright red',
                        label='{} ({})'.format('Hot-spot (HH)', len(tmpa))), 
                   Patch(facecolor='xkcd:electric blue', edgecolor='xkcd:electric blue',
                        label='{} ({})'.format('Cold-spot (LL)', len(tmpb))), ]
legend_elements2 = [Patch(facecolor='none', edgecolor='k',
                         label='p-value<=0.01'), 
                    Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
                         label='not significant')]
legend_elements.extend(legend_elements2)
ax.legend(handles=legend_elements, loc='lower left', ncol=1)

titles = ['(a) pseudo p-value', '(b) Getis-Ord Gi']
for i, ax in enumerate(axs):
    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title(titles[i])

plt.tight_layout()
#fig.savefig('local_gi_election_2016_gop.png', dpi=200, bbox_inches='tight')

identify places that is HH in all 3 methods¶

gdf.columns

tmp = gdf[gdf['lm_q']==1]  # HH
tmp = tmp[tmp['pval']<=.05]
tmp.plot()

tmp2 = tmp[tmp['lc_q']==1]
tmp2 = tmp2[tmp2['lc_pval']<=.05]
tmp2.plot()

tmp3 = tmp2[tmp2['lgs_pval']<=.05]
tmp3 = tmp3[tmp3['lgs_zg']>0]
tmp3.plot()

similarly for LL¶

tmpb = gdf[gdf['lm_q']==3]  # LL
tmpb = tmpb[tmpb['pval']<=.05]
tmpb.plot()

tmpb2 = tmpb[tmpb['lc_q']==2]
tmpb2 = tmpb2[tmpb2['lc_pval']<=.05]
tmpb2.plot()

tmpb3 = tmpb2[tmpb2['lgs_pval']<=.05]
tmpb3 = tmpb3[tmpb3['lgs_zg']<0]
tmpb3.plot()

fig, ax = plt.subplots(1, 1, figsize=(10, 5), sharex=True, sharey=True)

gdf.plot(ax=ax, fc='xkcd:light grey')

tmp3.plot(ax=ax, fc='xkcd:bright red')
tmpb3.plot(ax=ax, fc='xkcd:electric blue')

gdf.plot(ax=ax, fc='none', ec='w', lw=.1)

legend_elements = [Patch(facecolor='xkcd:bright red', edgecolor='xkcd:bright red',
                        label='{} ({})'.format('Hot-spot (HH)', len(tmp3))), 
                   Patch(facecolor='xkcd:electric blue', edgecolor='xkcd:electric blue',
                        label='{} ({})'.format('Cold-spot (LL)', len(tmpb3))),
                   Patch(facecolor='xkcd:light grey', edgecolor='xkcd:light grey',
                        label='other')]
ax.legend(handles=legend_elements, loc='lower left', ncol=1)
ax.set_title('Combine the HH and LL of three methods (p$\leq$.05)')
ax.set_xticks([])
ax.set_yticks([])
plt.tight_layout()

Geospatial Statistics and Visualisation

Lab 8: Spatial Autocorrelation Analysis

Geospatial Statistics and Visualisation

Lab 10: Preparing files for flowmap.gl