Create address only csv

import osmnx as ox
import pandas as pd
place = "San Diego County, California, USA"
tags = {'shop': 'financial', 'amenity': 'bank'}
gdf = ox.features_from_place(place, tags)
gdf.head()
addresses = gdf[['name','addr:housenumber',  'addr:street', 'addr:city', 'addr:postcode','geometry']]
addresses = gdf[['name','addr:housenumber',  'addr:street', 'addr:city', 'addr:postcode','geometry']]
addresses_clean = addresses.dropna(subset=['addr:street','addr:postcode', 'addr:housenumber', 'name'])
addresses_clean.to_csv("financial.csv", index=False)
ao = addresses_clean.copy()
ao['address'] =  ao['addr:housenumber'] + " "+ ao['addr:street'] + ", " + ao['addr:city'] + " " + ao['addr:postcode']
ao.address
ao = ao[['name', 'address']]
ao.head()
ao.to_csv('financial_ao.csv', index=False)

Geocoding addresses

import pandas as pd
df = pd.read_csv('financial_ao.csv')
df.head()
import geopandas
results = geopandas.tools.geocode(df.address, provider='nominatim', user_agent='geog385f24')
results.shape
df.shape
results.head()

Create a geodataframe

gdf = geopandas.GeoDataFrame(results, geometry='geometry', crs="EPSG:4326")
gdf.explore()

Drop the obvious error

  • use a point in polygon
import osmnx as ox
county_boundary = ox.geocode_to_gdf("San Diego County, California, USA")
county_boundary.plot()
points_in_county = geopandas.sjoin(gdf, county_boundary, how="inner", predicate="within")
points_in_county.explore()
gdf = points_in_county
gdf.head()
gdf.address.str.split(",").str[-2].str.strip()
gdf['zipcode'] = gdf.address.str.split(",").str[-2].str.strip()
gdf.groupby(by='zipcode').count()
from geosnap import DataStore
datasets = DataStore("/srv/data/geosnap")
dir(datasets)
from geosnap import io as gio
from geosnap.io import get_acs

ca = get_acs(datasets, state_fips=['06'], level='tract', years=[2016])
sd = ca[ca.geoid.str.startswith('06073')]
sd.plot()
sd.crs
gdf.crs
sd = sd.to_crs(gdf.crs)
sd.crs == gdf.crs
tracts = sd
points = gdf[['geometry', 'address', 'name']]
points.columns
points_with_tracts = geopandas.sjoin(points, tracts, how="inner", predicate="within", lsuffix="left", rsuffix="right")
points_with_tracts.head()
points_with_tracts.shape
tracts.head()
# Replace 'GEOID' with the appropriate identifier column in your tracts GeoDataFrame
tract_counts = points_with_tracts.groupby('geoid').size().reset_index(name='point_count')
tracts = tracts.merge(tract_counts, on='geoid', how='left')
tracts['point_count'] = tracts['point_count'].fillna(0).astype(int)
tracts.plot('point_count', legend=True)
tracts[['geoid', 'point_count']].sort_values(by='point_count', ascending=False).head(10)
tracts[['geoid', 'point_count']].groupby(by='point_count').count()