Zip codes

import osmnx as ox
import pandas as pd
import geopandas as gpd
gdf = gpd.read_file('sd_banks.json')
gdf.head()
address index_right bbox_west bbox_south bbox_east bbox_north place_id osm_type osm_id lat lon class type place_rank importance addresstype name display_name geometry
0 Softwave of Solana Beach, 665, San Rodolfo Dri... 0 -117.610536 32.529524 -116.081094 33.505242 402480966 relation 396482 32.963784 -116.770628 boundary administrative 12 0.626383 county San Diego County San Diego County, California, United States POINT (-117.25935 32.99318)
1 U.S. Bank, 3201, University Avenue, North Park... 0 -117.610536 32.529524 -116.081094 33.505242 402480966 relation 396482 32.963784 -116.770628 boundary administrative 12 0.626383 county San Diego County San Diego County, California, United States POINT (-117.12476 32.7483)
2 U.S. Bank, 5197, Waring Road, allied gardens (... 0 -117.610536 32.529524 -116.081094 33.505242 402480966 relation 396482 32.963784 -116.770628 boundary administrative 12 0.626383 county San Diego County San Diego County, California, United States POINT (-117.08057 32.79229)
3 Wells Fargo, 4649, Carmel Mountain Road, San D... 0 -117.610536 32.529524 -116.081094 33.505242 402480966 relation 396482 32.963784 -116.770628 boundary administrative 12 0.626383 county San Diego County San Diego County, California, United States POINT (-117.21444 32.92126)
4 San Diego County Credit Union, 286, Town Cente... 0 -117.610536 32.529524 -116.081094 33.505242 402480966 relation 396482 32.963784 -116.770628 boundary administrative 12 0.626383 county San Diego County San Diego County, California, United States POINT (-116.98424 32.84421)
gdf.address.str.split(",").str[-2].str.strip()
0      92075
1      92104
2      92120
3      92130
4      92071
       ...  
189    92106
190    92004
191    91932
192    92024
193    91932
Name: address, Length: 194, dtype: object
gdf['zipcode'] = gdf.address.str.split(",").str[-2].str.strip()
gdf.groupby(by='zipcode').count()
address index_right bbox_west bbox_south bbox_east bbox_north place_id osm_type osm_id lat lon class type place_rank importance addresstype name display_name geometry
zipcode
91901 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
91910 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
91911 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
91915 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
91932 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
91941 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
91942 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
91950 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
91977 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
92004 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
92008 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
92009 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
92010 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
92011 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
92014 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
92019 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
92020 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11 11
92021 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
92024 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
92025 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
92026 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
92027 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
92028 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
92037 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
92040 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
92057 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
92059 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
92064 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
92065 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
92069 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
92071 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
92075 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
92078 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
92081 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
92083 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
92101 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
92102 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
92103 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
92104 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
92105 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
92106 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
92108 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
92109 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
92110 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
92111 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5
92118 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
92119 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
92120 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
92121 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
92123 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
92126 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
92128 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
92129 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
92130 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
92131 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
92154 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
92173 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6

Let’s use census tracts instead. We want to create a variable that records the number of banks in each census tract

from geosnap import DataStore
datasets = DataStore("/srv/data/geosnap")
dir(datasets)
['acs',
 'bea_regions',
 'blocks_2000',
 'blocks_2010',
 'blocks_2020',
 'codebook',
 'counties',
 'ejscreen',
 'lodes_codebook',
 'ltdb',
 'msa_definitions',
 'msas',
 'ncdb',
 'nces',
 'seda',
 'show_data_dir',
 'states',
 'tracts_1990',
 'tracts_2000',
 'tracts_2010',
 'tracts_2020']
from geosnap import io as gio
from geosnap.io import get_acs

ca = get_acs(datasets, state_fips=['06'], level='tract', years=[2016])
/home/serge/miniforge3/envs/workshop-pysal/lib/python3.10/site-packages/geosnap/_data.py:16: UserWarning: Streaming data from S3. Use `geosnap.io.store_acs()` to store the data locally for better performance
  warn(warning_msg)
sd = ca[ca.geoid.str.startswith('06073')]
sd.plot()

sd.crs
<Geographic 2D CRS: EPSG:4269>
Name: NAD83
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: North America - onshore and offshore: Canada - Alberta; British Columbia; Manitoba; New Brunswick; Newfoundland and Labrador; Northwest Territories; Nova Scotia; Nunavut; Ontario; Prince Edward Island; Quebec; Saskatchewan; Yukon. Puerto Rico. United States (USA) - Alabama; Alaska; Arizona; Arkansas; California; Colorado; Connecticut; Delaware; Florida; Georgia; Hawaii; Idaho; Illinois; Indiana; Iowa; Kansas; Kentucky; Louisiana; Maine; Maryland; Massachusetts; Michigan; Minnesota; Mississippi; Missouri; Montana; Nebraska; Nevada; New Hampshire; New Jersey; New Mexico; New York; North Carolina; North Dakota; Ohio; Oklahoma; Oregon; Pennsylvania; Rhode Island; South Carolina; South Dakota; Tennessee; Texas; Utah; Vermont; Virginia; Washington; West Virginia; Wisconsin; Wyoming. US Virgin Islands.  British Virgin Islands.
- bounds: (167.65, 14.92, -47.74, 86.46)
Datum: North American Datum 1983
- Ellipsoid: GRS 1980
- Prime Meridian: Greenwich
gdf.crs
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich
sd = sd.to_crs(gdf.crs)
sd.crs == gdf.crs
True
tracts = sd
points = gdf[['geometry', 'address', 'name']]
points.columns
Index(['geometry', 'address', 'name'], dtype='object')
points_with_tracts = gpd.sjoin(points, tracts, how="inner", predicate="within", lsuffix="left", rsuffix="right")
points_with_tracts.head()
geometry address name index_right geoid n_mexican_pop n_cuban_pop n_puerto_rican_pop n_russian_pop n_italian_pop ... p_veterans p_poverty_rate p_poverty_rate_over_65 p_poverty_rate_children p_poverty_rate_white p_poverty_rate_black p_poverty_rate_hispanic p_poverty_rate_native p_poverty_rate_asian year
0 POINT (-117.25935 32.99318) Softwave of Solana Beach, 665, San Rodolfo Dri... San Diego County 6117 06073017304 1269.0 0.0 0.0 0.0 255.0 ... 6.386393 10.637271 3.540393 0.305761 8.271645 0.225298 1.110396 0.305761 0.0 2016
1 POINT (-117.12476 32.7483) U.S. Bank, 3201, University Avenue, North Park... San Diego County 5673 06073001500 1314.0 0.0 5.0 25.0 66.0 ... 9.861325 9.553159 0.462250 1.155624 7.370313 1.335388 3.646636 0.000000 0.0 2016
2 POINT (-117.08057 32.79229) U.S. Bank, 5197, Waring Road, allied gardens (... San Diego County 5887 06073009703 475.0 8.0 38.0 13.0 102.0 ... 12.722063 6.905444 0.544413 0.687679 6.561605 0.028653 1.318052 0.000000 0.0 2016
3 POINT (-117.21444 32.92126) Wells Fargo, 4649, Carmel Mountain Road, San D... San Diego County 5813 06073008333 836.0 0.0 0.0 351.0 297.0 ... 1.775034 7.135031 0.000000 1.205285 4.047696 0.000000 0.412504 0.000000 0.0 2016
4 POINT (-116.98424 32.84421) San Diego County Credit Union, 286, Town Cente... San Diego County 6057 06073016614 488.0 22.0 21.0 10.0 25.0 ... 8.778437 4.043662 0.248077 0.669809 3.795584 0.248077 0.595386 0.000000 0.0 2016

5 rows × 161 columns

points_with_tracts.shape
(194, 161)
tracts.head()
geoid n_mexican_pop n_cuban_pop n_puerto_rican_pop n_russian_pop n_italian_pop n_german_pop n_irish_pop n_scandaniavian_pop n_foreign_born_pop ... p_poverty_rate p_poverty_rate_over_65 p_poverty_rate_children p_poverty_rate_white p_poverty_rate_black p_poverty_rate_hispanic p_poverty_rate_native p_poverty_rate_asian geometry year
5658 06073000100 191.0 18.0 14.0 0.0 84.0 155.0 41.0 35.0 345.0 ... 4.579877 1.009737 0.288496 3.678327 0.0 0.937613 0.000000 0.000000 MULTIPOLYGON (((-117.1949 32.75278, -117.19471... 2016
5659 06073000201 280.0 0.0 67.0 29.0 54.0 69.0 68.0 0.0 319.0 ... 7.228916 2.548656 0.417053 6.858202 0.0 0.000000 0.370714 0.000000 MULTIPOLYGON (((-117.17887 32.75765, -117.1779... 2016
5660 06073000202 525.0 54.0 46.0 129.0 244.0 102.0 68.0 0.0 743.0 ... 5.766408 1.740407 0.461313 4.172783 0.0 1.887188 0.000000 0.000000 MULTIPOLYGON (((-117.18404 32.74571, -117.1838... 2016
5661 06073000300 777.0 0.0 23.0 43.0 98.0 200.0 152.0 0.0 713.0 ... 12.922756 0.730689 0.981211 9.603340 0.0 4.133612 0.000000 0.041754 MULTIPOLYGON (((-117.16864 32.74897, -117.1684... 2016
5662 06073000400 484.0 41.0 12.0 0.0 203.0 99.0 26.0 0.0 698.0 ... 11.187655 1.515569 0.799118 9.258749 0.0 0.909341 0.000000 0.000000 MULTIPOLYGON (((-117.17087 32.75865, -117.1701... 2016

5 rows × 158 columns

# Replace 'GEOID' with the appropriate identifier column in your tracts GeoDataFrame
tract_counts = points_with_tracts.groupby('geoid').size().reset_index(name='point_count')
tracts = tracts.merge(tract_counts, on='geoid', how='left')
tracts['point_count'] = tracts['point_count'].fillna(0).astype(int)
tracts.plot('point_count', legend=True)

tracts[['geoid', 'point_count']].sort_values(by='point_count', ascending=False).head(10)
geoid point_count
195 06073008511 10
581 06073020214 8
383 06073016202 8
620 06073021400 7
332 06073013606 6
478 06073017900 6
152 06073008329 5
313 06073013409 5
437 06073017043 3
399 06073016614 3
tracts[['geoid', 'point_count']].groupby(by='point_count').count()
geoid
point_count
0 516
1 73
2 27
3 4
5 2
6 2
7 1
8 2
10 1