Create address only csv

import osmnx as ox
import pandas as pd

place = "San Diego County, California, USA"

tags = {'shop': 'financial', 'amenity': 'bank'}

gdf = ox.features_from_place(place, tags)

gdf.head()

addresses = gdf[['name','addr:housenumber',  'addr:street', 'addr:city', 'addr:postcode','geometry']]

addresses = gdf[['name','addr:housenumber',  'addr:street', 'addr:city', 'addr:postcode','geometry']]

addresses_clean = addresses.dropna(subset=['addr:street','addr:postcode', 'addr:housenumber', 'name'])

addresses_clean.to_csv("financial.csv", index=False)

ao = addresses_clean.copy()

ao['address'] =  ao['addr:housenumber'] + " "+ ao['addr:street'] + ", " + ao['addr:city'] + " " + ao['addr:postcode']

ao.address

ao = ao[['name', 'address']]
ao.head()

ao.to_csv('financial_ao.csv', index=False)

Geocoding addresses

import pandas as pd

df = pd.read_csv('financial_ao.csv')

df.head()

import geopandas

results = geopandas.tools.geocode(df.address, provider='nominatim', user_agent='geog385f24')

results.shape

df.shape

results.head()

Create a geodataframe

gdf = geopandas.GeoDataFrame(results, geometry='geometry', crs="EPSG:4326")

gdf.explore()

Drop the obvious error

use a point in polygon

import osmnx as ox
county_boundary = ox.geocode_to_gdf("San Diego County, California, USA")

county_boundary.plot()

points_in_county = geopandas.sjoin(gdf, county_boundary, how="inner", predicate="within")

points_in_county.explore()

gdf = points_in_county

gdf.head()

gdf.address.str.split(",").str[-2].str.strip()

gdf['zipcode'] = gdf.address.str.split(",").str[-2].str.strip()

gdf.groupby(by='zipcode').count()

from geosnap import DataStore

datasets = DataStore("/srv/data/geosnap")

dir(datasets)

from geosnap import io as gio

from geosnap.io import get_acs

ca = get_acs(datasets, state_fips=['06'], level='tract', years=[2016])

sd = ca[ca.geoid.str.startswith('06073')]

sd.plot()

sd.crs

gdf.crs

sd = sd.to_crs(gdf.crs)

sd.crs == gdf.crs

tracts = sd
points = gdf[['geometry', 'address', 'name']]

points.columns

points_with_tracts = geopandas.sjoin(points, tracts, how="inner", predicate="within", lsuffix="left", rsuffix="right")

points_with_tracts.head()

points_with_tracts.shape

tracts.head()

# Replace 'GEOID' with the appropriate identifier column in your tracts GeoDataFrame
tract_counts = points_with_tracts.groupby('geoid').size().reset_index(name='point_count')

tracts = tracts.merge(tract_counts, on='geoid', how='left')
tracts['point_count'] = tracts['point_count'].fillna(0).astype(int)

tracts.plot('point_count', legend=True)

tracts[['geoid', 'point_count']].sort_values(by='point_count', ascending=False).head(10)

tracts[['geoid', 'point_count']].groupby(by='point_count').count()

--- title: Create address only csv execute: eval: false echo: true format: html: code-fold: false --- ```{python} import osmnx as ox import pandas as pd ``` ```{python} place = "San Diego County, California, USA" ``` ```{python} tags = {'shop': 'financial', 'amenity': 'bank'} ``` ```{python} gdf = ox.features_from_place(place, tags) ``` ```{python} gdf.head() ``` ```{python} addresses = gdf[['name','addr:housenumber', 'addr:street', 'addr:city', 'addr:postcode','geometry']] ``` ```{python} addresses = gdf[['name','addr:housenumber', 'addr:street', 'addr:city', 'addr:postcode','geometry']] ``` ```{python} addresses_clean = addresses.dropna(subset=['addr:street','addr:postcode', 'addr:housenumber', 'name']) ``` ```{python} addresses_clean.to_csv("financial.csv", index=False) ``` ```{python} ao = addresses_clean.copy() ``` ```{python} ao['address'] = ao['addr:housenumber'] + " "+ ao['addr:street'] + ", " + ao['addr:city'] + " " + ao['addr:postcode'] ``` ```{python} ao.address ``` ```{python} ao = ao[['name', 'address']] ao.head() ``` ```{python} ao.to_csv('financial_ao.csv', index=False) ``` ## Geocoding addresses ```{python} import pandas as pd ``` ```{python} df = pd.read_csv('financial_ao.csv') ``` ```{python} df.head() ``` ```{python} import geopandas ``` ```{python} results = geopandas.tools.geocode(df.address, provider='nominatim', user_agent='geog385f24') ``` ```{python} results.shape ``` ```{python} df.shape ``` ```{python} results.head() ``` ### Create a geodataframe ```{python} gdf = geopandas.GeoDataFrame(results, geometry='geometry', crs="EPSG:4326") ``` ```{python} gdf.explore() ``` ### Drop the obvious error - use a point in polygon ```{python} import osmnx as ox county_boundary = ox.geocode_to_gdf("San Diego County, California, USA") ``` ```{python} county_boundary.plot() ``` ```{python} points_in_county = geopandas.sjoin(gdf, county_boundary, how="inner", predicate="within") ``` ```{python} points_in_county.explore() ``` ```{python} gdf = points_in_county ``` ```{python} gdf.head() ``` ```{python} gdf.address.str.split(",").str[-2].str.strip() ``` ```{python} gdf['zipcode'] = gdf.address.str.split(",").str[-2].str.strip() ``` ```{python} gdf.groupby(by='zipcode').count() ``` ```{python} from geosnap import DataStore ``` ```{python} datasets = DataStore("/srv/data/geosnap") ``` ```{python} dir(datasets) ``` ```{python} from geosnap import io as gio ``` ```{python} from geosnap.io import get_acs ca = get_acs(datasets, state_fips=['06'], level='tract', years=[2016]) ``` ```{python} sd = ca[ca.geoid.str.startswith('06073')] ``` ```{python} sd.plot() ``` ```{python} sd.crs ``` ```{python} gdf.crs ``` ```{python} sd = sd.to_crs(gdf.crs) ``` ```{python} sd.crs == gdf.crs ``` ```{python} tracts = sd points = gdf[['geometry', 'address', 'name']] ``` ```{python} points.columns ``` ```{python} points_with_tracts = geopandas.sjoin(points, tracts, how="inner", predicate="within", lsuffix="left", rsuffix="right") ``` ```{python} points_with_tracts.head() ``` ```{python} points_with_tracts.shape ``` ```{python} tracts.head() ``` ```{python} # Replace 'GEOID' with the appropriate identifier column in your tracts GeoDataFrame tract_counts = points_with_tracts.groupby('geoid').size().reset_index(name='point_count') ``` ```{python} tracts = tracts.merge(tract_counts, on='geoid', how='left') tracts['point_count'] = tracts['point_count'].fillna(0).astype(int) ``` ```{python} tracts.plot('point_count', legend=True) ``` ```{python} tracts[['geoid', 'point_count']].sort_values(by='point_count', ascending=False).head(10) ``` ```{python} tracts[['geoid', 'point_count']].groupby(by='point_count').count() ```