In this project, I will explore the NYC Airbnb listings and aim to find out listing that match with upcoming trip
#Import packages and libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
!pip install geopandas
import geopandas as gpd
#Load data
from google.colab import drive
drive.mount ('/content/gdrive')
data = pd.read_csv('gdrive/My Drive/dataset/AB_NYC_2019.csv')
data.head(5)
data.dtypes, data.columns
#Change name of 'neighbourhood_group' to 'boroname'
data.rename(columns={'neighbourhood_group':'boroname'}, inplace=True)
#Find out the listings by borough name
plt.figure(figsize=(10,10))
sns.scatterplot(x='longitude', y='latitude', hue='boroname', s=30, data=data)
# Count listing by borough
borough_count = data.groupby('boroname').agg('count').reset_index()
borough_count
#Plot the count by borough
fig, ax1 = plt.subplots(1,1, figsize=(6,6))
sns.barplot(x='boroname', y='id', data=borough_count, ax=ax1, palette="plasma" )
ax1.set_title('Number of Listings by Borough', fontsize=15)
ax1.set_xlabel('Borough', fontsize=12)
ax1.set_ylabel('Count', fontsize=12)
ax1.tick_params(axis='both', labelsize=10)
Manhattan have more listings that other regions
#Using geopandas to bring in a base layer of NYC boroughs
nyc = gpd.read_file(gpd.datasets.get_path('nybb'))
nyc.head(5)
#Rename the 'BoroName' to 'boroname', this help to join data for the next step
nyc.rename(columns={'BoroName':'boroname'}, inplace=True)
bc_geo = nyc.merge(borough_count,on='boroname')
bc_geo
#Plot into the map count listing by borough
fig,ax = plt.subplots(1,1, figsize=(10,10))
bc_geo.plot(column='id', cmap='viridis_r', alpha=.5, ax=ax, legend=True)
bc_geo.apply(lambda x:ax.annotate(s=x.boroname, color='black', xy=x.geometry.centroid.coords[0],ha='center'), axis=1)
plt.title('Number of Airbnb Listings by NYC Borough')
plt.axis('off')
We could see that most of listings locate in Brooklyn and Manhattan. In contract, Airbnb is not so popular in Bronx and Staten Island
#import shapely
from shapely import wkt
Since we don't have geometries of neighborhoods, so we could use CSV file from NYC Open Data Site. It has wkt in a geometry column
crs = {'init':'epsg:4326'}
geometry = gpd.points_from_xy(data.longitude, data.latitude)
geo_data = gpd.GeoDataFrame(data,crs=crs,geometry=geometry)
fig,ax = plt.subplots(figsize=(15,15))
nyc.plot(ax=ax,alpha=0.4,edgecolor='black')
geo_data.plot(column='id',ax=ax,legend=True,cmap='plasma',markersize=4)
plt.title("Number of Airbnb Listings")
plt.axis('off')
For the upcoming trip, I would like to find 'Entire home/apt' option and prefer if it locate in Brooklyn. So let's plot a map to see locations of room type
fig,ax = plt.subplots(figsize=(15,15))
nyc.plot(ax=ax, alpha=0.4, edgecolor='black')
geo_data.plot(column='room_type',ax=ax,legend=True,cmap='plasma',markersize=4)
plt.title('Locations of room type')
plt.axis('off')
Also find out different room type by borough
plt.figure(figsize=(10,10))
ax = sns.countplot(data['room_type'], hue=data['boroname'], palette='plasma')
Manhattan offer more entire home/apt than other regions. However, I am a student love travelling on budget. Therefore, I will focus on listings in Manhattan which have price under 65
data_manha_65=data[data.boroname=='Manhattan']
data_manha_65.head()
data_manha_65 = data_manha[data_manha.price <65]
data_manha_65['label']=data_manha_65.apply(lambda x: (x['name'],'price:'+str(x['price'])),axis=1)
data_manha_65.head()
According to below map, you could not only see location of listings but also comments.
#install folium to create a map
!pip install git+https://github.com/python-visualization/folium
import folium
from folium import plugins
Long=-73.92
Lat=40.86
manha_map=folium.Map([Lat,Long],zoom_start=12)
manha_rooms_map=plugins.MarkerCluster().add_to(manha_map)
for lat,lon,label in zip(data_manha_65.latitude,data_manha_65.longitude,data_manha_65.label):
folium.Marker(location=[lat,lon],icon=None,popup=label).add_to(manha_rooms_map)
manha_map.add_child(manha_rooms_map)
manha_map = folium.Map([Lat,Long],zoom_start=12)
manha_map