Code
import osmnx as ox
import pandas as pd
Code
import pandas as pd
Code
df = pd.read_csv('~/data/385/financial_ao.csv')
Code
df.head()
Code
import geopandas
Code
%%time
results = geopandas.tools.geocode(df.address, provider='nominatim', user_agent='geog385')
Code
results.shape
Code
df.shape
Code
type(df), type(results)
Code
results.explore()
Code
results.head()
Code
gdf = results

Drop the obvious error

  • use a point in polygon
Code
import osmnx as ox
county_boundary = ox.geocode_to_gdf("San Diego County, California, USA")
Code
county_boundary.plot()
Code
points_in_county = geopandas.sjoin(gdf, county_boundary, how="inner", predicate="within")
Code
points_in_county.explore()
Code
gdf = points_in_county
Code
gdf.head()
Code
gdf.to_file('sd_banks.json')
Code
gdf.address.str.split(",").str[-2].str.strip()
Code
gdf['zipcode'] = gdf.address.str.split(",").str[-2].str.strip()
Code
gdf.groupby(by='zipcode').count()
Code
from geosnap import DataStore
Code
datasets = DataStore("/srv/data/geosnap")
Code
dir(datasets)
Code
from geosnap import io as gio
Code
from geosnap.io import get_acs

ca = get_acs(datasets, state_fips=['06'], level='tract', years=[2016])
Code
sd = ca[ca.geoid.str.startswith('06073')]
Code
sd.plot()
Code
sd.crs
Code
gdf.crs
Code
sd = sd.to_crs(gdf.crs)
Code
sd.crs == gdf.crs
Code
tracts = sd
points = gdf[['geometry', 'address', 'name']]
Code
points.columns
Code
points_with_tracts = geopandas.sjoin(points, tracts, how="inner", predicate="within", lsuffix="left", rsuffix="right")
Code
points_with_tracts.head()
Code
points_with_tracts.shape
Code
tracts.head()
Code
# Replace 'GEOID' with the appropriate identifier column in your tracts GeoDataFrame
tract_counts = points_with_tracts.groupby('geoid').size().reset_index(name='point_count')
Code
tracts = tracts.merge(tract_counts, on='geoid', how='left')
tracts['point_count'] = tracts['point_count'].fillna(0).astype(int)
Code
tracts.plot('point_count', legend=True)
Code
tracts[['geoid', 'point_count']].sort_values(by='point_count', ascending=False).head(10)
Code
tracts[['geoid', 'point_count']].groupby(by='point_count').count()