---
title: Create address only csv
execute:
eval: false
echo: true
format:
html:
code-fold: false
---
```{python}
import osmnx as ox
import pandas as pd
```
```{python}
place = "San Diego County, California, USA"
```
```{python}
tags = {'shop' : 'financial' , 'amenity' : 'bank' }
```
```{python}
gdf = ox.features_from_place(place, tags)
```
```{python}
gdf.head()
```
```{python}
addresses = gdf[['name' ,'addr:housenumber' , 'addr:street' , 'addr:city' , 'addr:postcode' ,'geometry' ]]
```
```{python}
addresses = gdf[['name' ,'addr:housenumber' , 'addr:street' , 'addr:city' , 'addr:postcode' ,'geometry' ]]
```
```{python}
addresses_clean = addresses.dropna(subset= ['addr:street' ,'addr:postcode' , 'addr:housenumber' , 'name' ])
```
```{python}
addresses_clean.to_csv("financial.csv" , index= False )
```
```{python}
ao = addresses_clean.copy()
```
```{python}
ao['address' ] = ao['addr:housenumber' ] + " " + ao['addr:street' ] + ", " + ao['addr:city' ] + " " + ao['addr:postcode' ]
```
```{python}
ao.address
```
```{python}
ao = ao[['name' , 'address' ]]
ao.head()
```
```{python}
ao.to_csv('financial_ao.csv' , index= False )
```
## Geocoding addresses
```{python}
import pandas as pd
```
```{python}
df = pd.read_csv('financial_ao.csv' )
```
```{python}
df.head()
```
```{python}
import geopandas
```
```{python}
results = geopandas.tools.geocode(df.address, provider= 'nominatim' , user_agent= 'geog385f24' )
```
```{python}
results.shape
```
```{python}
df.shape
```
```{python}
results.head()
```
### Create a geodataframe
```{python}
gdf = geopandas.GeoDataFrame(results, geometry= 'geometry' , crs= "EPSG:4326" )
```
```{python}
gdf.explore()
```
### Drop the obvious error
- use a point in polygon
```{python}
import osmnx as ox
county_boundary = ox.geocode_to_gdf("San Diego County, California, USA" )
```
```{python}
county_boundary.plot()
```
```{python}
points_in_county = geopandas.sjoin(gdf, county_boundary, how= "inner" , predicate= "within" )
```
```{python}
points_in_county.explore()
```
```{python}
gdf = points_in_county
```
```{python}
gdf.head()
```
```{python}
gdf.address.str .split("," ).str [- 2 ].str .strip()
```
```{python}
gdf['zipcode' ] = gdf.address.str .split("," ).str [- 2 ].str .strip()
```
```{python}
gdf.groupby(by= 'zipcode' ).count()
```
```{python}
from geosnap import DataStore
```
```{python}
datasets = DataStore("/srv/data/geosnap" )
```
```{python}
dir (datasets)
```
```{python}
from geosnap import io as gio
```
```{python}
from geosnap.io import get_acs
ca = get_acs(datasets, state_fips= ['06' ], level= 'tract' , years= [2016 ])
```
```{python}
sd = ca[ca.geoid.str .startswith('06073' )]
```
```{python}
sd.plot()
```
```{python}
sd.crs
```
```{python}
gdf.crs
```
```{python}
sd = sd.to_crs(gdf.crs)
```
```{python}
sd.crs == gdf.crs
```
```{python}
tracts = sd
points = gdf[['geometry' , 'address' , 'name' ]]
```
```{python}
points.columns
```
```{python}
points_with_tracts = geopandas.sjoin(points, tracts, how= "inner" , predicate= "within" , lsuffix= "left" , rsuffix= "right" )
```
```{python}
points_with_tracts.head()
```
```{python}
points_with_tracts.shape
```
```{python}
tracts.head()
```
```{python}
# Replace 'GEOID' with the appropriate identifier column in your tracts GeoDataFrame
tract_counts = points_with_tracts.groupby('geoid' ).size().reset_index(name= 'point_count' )
```
```{python}
tracts = tracts.merge(tract_counts, on= 'geoid' , how= 'left' )
tracts['point_count' ] = tracts['point_count' ].fillna(0 ).astype(int )
```
```{python}
tracts.plot('point_count' , legend= True )
```
```{python}
tracts[['geoid' , 'point_count' ]].sort_values(by= 'point_count' , ascending= False ).head(10 )
```
```{python}
tracts[['geoid' , 'point_count' ]].groupby(by= 'point_count' ).count()
```