import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import folium
from shapely.geometry import Point
from IPython.display import IFrame

plt.rcParams['figure.figsize'] = (10,5)

What is choropleth?

Finding counts from a spatial join

You will be using a dataset of the building permits issued in Nashville during 2017.

permits = gpd.read_file('./dataset/building_permits_2017.csv')
council_districts = gpd.read_file('./dataset/council_districts.geojson')
permits['geometry'] = permits.apply(lambda x: Point((float(x.lng), float(x.lat))), axis=1)

# Build a GeoDataFrame: permits_geo
permits_geo = gpd.GeoDataFrame(permits, crs = council_districts.crs, geometry=permits.geometry)

# Spatial join of permits_geo and council_districts
permits_by_district = gpd.sjoin(permits_geo, council_districts, op='within')
print(permits_by_district.head(2))

# Create permit_counts
permit_counts = permits_by_district.groupby(['district']).size()
print(permit_counts)
     permit_id      issued      cost        lat         lng  \
0   2017032777  2017-05-24  226201.0  36.198241  -86.742235   
68  2017053890  2017-09-05       0.0  36.185442  -86.768239   

                      geometry  index_right first_name  \
0   POINT (-86.74223 36.19824)            5      Scott   
68  POINT (-86.76824 36.18544)            5      Scott   

                        email     res_phone     bus_phone last_name  \
0   scott.davis@nashville.gov  615-554-9730  615-862-6780     Davis   
68  scott.davis@nashville.gov  615-554-9730  615-862-6780     Davis   

          position district  
0   Council Member        5  
68  Council Member        5  
district
1     146
10    119
11    239
12    163
13    139
14    261
15    322
16    303
17    786
18    287
19    969
2     399
20    799
21    569
22    291
23    206
24    458
25    435
26    179
27    105
28    119
29    154
3     215
30     79
31    134
32    225
33    355
34    218
35    192
4     139
5     452
6     455
7     468
8     209
9     186
dtype: int64

Now you have a count of building permits issued for each council district. Next you'll get the area of each council_district.

Council district areas and permit counts

In order to create a normalized value for the building permits issued in each council district, you will need to find the area of each council district. Remember that you can leverage the area attribute of a GeoSeries to do this. You will need to convert permit_counts to a DataFrame so you can merge it with the council_districts data.

council_districts['area'] = council_districts.geometry.area
print(council_districts.head(2))

# Convert permit_counts to a DataFrame
permits_df = permit_counts.to_frame()
print(permits_df.head(2))

# Reset index and column names
permits_df.reset_index(inplace=True)
permits_df.columns = ['district', 'bldg_permits']
print(permits_df.head(2))

# Merge council_districts and permits_df
districts_and_permits = pd.merge(council_districts, permits_df, on='district')
print(districts_and_permits.head(2))
  first_name                           email     res_phone     bus_phone  \
0       Nick     nick.leonardo@nashville.gov  615-509-6334  615-862-6780   
1    DeCosta  decosta.hastings@nashville.gov  615-779-1565  615-862-6780   

  last_name        position district  \
0  Leonardo  Council Member        1   
1  Hastings  Council Member        2   

                                            geometry      area  
0  MULTIPOLYGON (((-86.90738 36.39052, -86.90725 ...  0.022786  
1  MULTIPOLYGON (((-86.75902 36.23091, -86.75909 ...  0.002927  
            0
district     
1         146
10        119
  district  bldg_permits
0        1           146
1       10           119
  first_name                           email     res_phone     bus_phone  \
0       Nick     nick.leonardo@nashville.gov  615-509-6334  615-862-6780   
1    DeCosta  decosta.hastings@nashville.gov  615-779-1565  615-862-6780   

  last_name        position district  \
0  Leonardo  Council Member        1   
1  Hastings  Council Member        2   

                                            geometry      area  bldg_permits  
0  MULTIPOLYGON (((-86.90738 36.39052, -86.90725 ...  0.022786           146  
1  MULTIPOLYGON (((-86.75902 36.23091, -86.75909 ...  0.002927           399  
C:\Users\kcsgo\anaconda3\lib\site-packages\ipykernel_launcher.py:2: UserWarning: Geometry is in a geographic CRS. Results from 'area' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.

  

You have created a column with the area in the council_districts Geo DataFrame and built a DataFrame from the permit_counts. You have merged all the information into a single GeoDataFrame. Next you will calculate the permits by area for each council district.

Calculating a normalized metric

Now you are ready to divide the number of building permits issued for projects in each council district by the area of that district to get a normalized value for the permits issued.

print(type(districts_and_permits))

# Create permit_density column in districts_and_permits
districts_and_permits['permit_density'] = districts_and_permits.apply(lambda row: row.bldg_permits/row.area, axis=1)

# Print the head of districts_and_permits
print(districts_and_permits.head())
<class 'geopandas.geodataframe.GeoDataFrame'>
  first_name                           email     res_phone     bus_phone  \
0       Nick     nick.leonardo@nashville.gov  615-509-6334  615-862-6780   
1    DeCosta  decosta.hastings@nashville.gov  615-779-1565  615-862-6780   
2      Nancy    nancy.vanreece@nashville.gov  615-576-0488  615-862-6780   
3       Bill    bill.pridemore@nashville.gov  615-915-1419  615-862-6780   
4     Robert      robert.swope@nashville.gov  615-308-0577  615-862-6780   

   last_name        position district  \
0   Leonardo  Council Member        1   
1   Hastings  Council Member        2   
2   VanReece  Council Member        8   
3  Pridemore  Council Member        9   
4      Swope  Council Member        4   

                                            geometry      area  bldg_permits  \
0  MULTIPOLYGON (((-86.90738 36.39052, -86.90725 ...  0.022786           146   
1  MULTIPOLYGON (((-86.75902 36.23091, -86.75909 ...  0.002927           399   
2  MULTIPOLYGON (((-86.72850 36.28328, -86.72791 ...  0.002517           209   
3  MULTIPOLYGON (((-86.68681 36.28671, -86.68706 ...  0.002883           186   
4  MULTIPOLYGON (((-86.74489 36.05316, -86.74491 ...  0.002052           139   

   permit_density  
0     6407.401089  
1   136305.586710  
2    83049.371383  
3    64514.836204  
4    67741.788057  

Choropleths with geopandas

Geopandas choropleths

First you will plot a choropleth of the building permit density for each council district using the default colormap. Then you will polish it by changing the colormap and adding labels and a title.

districts_and_permits.plot(column='permit_density', legend=True);
districts_and_permits.plot(column = 'permit_density', cmap = 'BuGn', edgecolor = 'black', legend = True)
plt.xlabel('longitude')
plt.ylabel('latitude')
plt.xticks(rotation = 'vertical')
plt.title('2017 Building Project Density by Council District')
Text(0.5, 1, '2017 Building Project Density by Council District')

You can see that most development has happened close to the city center! The scale for permit density is hard to interpret. We'll fix that next.

Area in km squared, geometry in decimal degrees

In this exercise, you'll start again with the council_districts GeoDataFrame and the permits DataFrame. You will change the council_districts to use the EPSG 3857 coordinate reference system before creating a column for area. Once the area column has been created, you will change the CRS back to EPSG 4326 so that the geometry is in decimal degrees.

council_districts = council_districts.to_crs(epsg=3857)
print(council_districts.crs)
print(council_districts.head())

# Create area in square km
sqm_to_sqkm = 10 ** 6
council_districts['area'] = council_districts.geometry.area / sqm_to_sqkm

# Change council_districts crs back to epsg 4326
council_districts = council_districts.to_crs(epsg=4326)
print(council_districts.crs)
print(council_districts.head())
epsg:3857
  first_name                           email     res_phone     bus_phone  \
0       Nick     nick.leonardo@nashville.gov  615-509-6334  615-862-6780   
1    DeCosta  decosta.hastings@nashville.gov  615-779-1565  615-862-6780   
2      Nancy    nancy.vanreece@nashville.gov  615-576-0488  615-862-6780   
3       Bill    bill.pridemore@nashville.gov  615-915-1419  615-862-6780   
4     Robert      robert.swope@nashville.gov  615-308-0577  615-862-6780   

   last_name        position district  \
0   Leonardo  Council Member        1   
1   Hastings  Council Member        2   
2   VanReece  Council Member        8   
3  Pridemore  Council Member        9   
4      Swope  Council Member        4   

                                            geometry      area  
0  MULTIPOLYGON (((-9674485.565 4354489.556, -967...  0.022786  
1  MULTIPOLYGON (((-9657970.373 4332440.650, -965...  0.002927  
2  MULTIPOLYGON (((-9654572.680 4339671.152, -965...  0.002517  
3  MULTIPOLYGON (((-9649930.991 4340143.590, -964...  0.002883  
4  MULTIPOLYGON (((-9656396.833 4307939.015, -965...  0.002052  
epsg:4326
  first_name                           email     res_phone     bus_phone  \
0       Nick     nick.leonardo@nashville.gov  615-509-6334  615-862-6780   
1    DeCosta  decosta.hastings@nashville.gov  615-779-1565  615-862-6780   
2      Nancy    nancy.vanreece@nashville.gov  615-576-0488  615-862-6780   
3       Bill    bill.pridemore@nashville.gov  615-915-1419  615-862-6780   
4     Robert      robert.swope@nashville.gov  615-308-0577  615-862-6780   

   last_name        position district  \
0   Leonardo  Council Member        1   
1   Hastings  Council Member        2   
2   VanReece  Council Member        8   
3  Pridemore  Council Member        9   
4      Swope  Council Member        4   

                                            geometry        area  
0  MULTIPOLYGON (((-86.90738 36.39052, -86.90725 ...  350.194851  
1  MULTIPOLYGON (((-86.75902 36.23091, -86.75909 ...   44.956987  
2  MULTIPOLYGON (((-86.72850 36.28328, -86.72791 ...   38.667932  
3  MULTIPOLYGON (((-86.68681 36.28671, -86.68706 ...   44.295293  
4  MULTIPOLYGON (((-86.74489 36.05316, -86.74491 ...   31.441618  

Spatially joining and getting counts

You will continue preparing your dataset for plotting a geopandas choropleth by creating a GeoDataFrame of the building permits spatially joined to the council districts. After that, you will be able to get counts of the building permits issued in each council district.

permits_geo = gpd.GeoDataFrame(permits, crs=council_districts.crs, geometry=permits.geometry)

# Spatially join permits_geo and council_districts
permits_by_district = gpd.sjoin(permits_geo, council_districts, op='within')

# Count permits in each district
permits_counts = permits_by_district.groupby(['district']).size()

# Convert permit_counts to a df with 2 columns: district and bldg_permits
counts_df = permits_counts.to_frame()
counts_df.reset_index(inplace=True)
counts_df.columns = ['district', 'bldg_permits']
print(counts_df.head(2))
  district  bldg_permits
0        1           146
1       10           119

You have done all the work to get the area units in km squared, latitude and longitude in decimal degrees, and a count of building permits issued for each council district all in the same dataset. All that's left is to calculate a normalized value and plot the choropleth!

Building a polished Geopandas choropleth

After merging the counts_df with permits_by_district, you will create a column with normalized permit_density by dividing the count of permits in each council district by the area of that council district. Then you will plot your final geopandas choropleth of the building projects in each council district.

districts_and_permits = pd.merge(permits_by_district, counts_df, on='district')

# Create permit_density column
districts_and_permits['permit_density'] = districts_and_permits.apply(lambda row: row.bldg_permits / row.area, axis=1)
print(districts_and_permits.head(2))

# Create choropleth plot
districts_and_permits.plot(column='permit_density', cmap='OrRd', edgecolor='black', legend=True);

# Add axis labels and title
plt.xlabel('longitude');
plt.ylabel('latitude');
plt.title('2017 Building Project Density by Council District');
    permit_id      issued      cost        lat         lng  \
0  2017032777  2017-05-24  226201.0  36.198241  -86.742235   
1  2017053890  2017-09-05       0.0  36.185442  -86.768239   

                     geometry  index_right first_name  \
0  POINT (-86.74223 36.19824)            5      Scott   
1  POINT (-86.76824 36.18544)            5      Scott   

                       email     res_phone     bus_phone last_name  \
0  scott.davis@nashville.gov  615-554-9730  615-862-6780     Davis   
1  scott.davis@nashville.gov  615-554-9730  615-862-6780     Davis   

         position district       area  bldg_permits  permit_density  
0  Council Member        5  19.030612           452       23.751207  
1  Council Member        5  19.030612           452       23.751207  

Choropleths with folium

Folium choropleth

In this exercise, you will construct a folium choropleth to show the density of permitted construction projects in different Nashville council districts. You will be using a single data source, the districts_and_permits GeoDataFrame, which is in your workspace.

nashville = [36.1636,-86.7823]

# Create map
m = folium.Map(location=nashville, zoom_start=10)

# Build choropleth
folium.Choropleth(
    geo_data=r'./dataset/council_districts.geojson',
    name='geometry',
    data=districts_and_permits,
    columns=['district', 'permit_density'],
    key_on='feature.properties.district',
    fill_color='Reds',
    fill_opacity=0.5,
    line_opacity=1.0,
    legend_name='2017 Permitted Building Projects per km squared',
    
).add_to(m)

folium.LayerControl().add_to(m)

m.save('./html/permits_choropleth.html')

Folium choropleth with markers and popups

Now you will add a marker to the center of each council district that shows the district number along with the count of building permits issued in 2017 for that district.

districts_and_permits['center'] = districts_and_permits.geometry.centroid

distinct = []
# Build markers and popups
for row in districts_and_permits.iterrows():
    row_values = row[1] 
    if row_values['district'] not in distinct:
        center_point = row_values['center']
        location = [center_point.y, center_point.x]
        popup = ('Council District: ' + str(row_values['district']) + 
             ';  ' + 'permits issued: ' + str(row_values['bldg_permits']))
        marker = folium.Marker(location = location, popup = popup)
        marker.add_to(m)
        distinct.append(row_values['district'])
    
# Display the map
m.save('./html/permits_choropleth2.html')
C:\Users\kcsgo\anaconda3\lib\site-packages\ipykernel_launcher.py:2: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation.