#installig python libraries for this project #pip install zipfile #pip install kaggle #importing libraries #import pandas as pd #import kaggle #import zipfile #downloaded the dataset for the project using Kaggle API #kaggle datasets download -d hmavrodiev/london-bike-sharing-dataset #extracting files from the downloaded kaggle file zipfile_name= 'london-bike-sharing-dataset.zip' with zipfile.ZipFiles(zipfile_name, 'r') as file: file.extractall() #read in the csv file as a pandas dataframe bikes=pd.read_csv("london_merged.csv") #exploring the data bikes.info() #count the unique values in the weather code column bikes.weather_code.value_counts() #counting the unique values in the season column bikes.season.value_counts() #specifying column names used in the project new_cols_dict ={'timestamp: toms', 'cnt:' 'count', 't1:' 'tempt_real)_C', 't2:' 'temp_feels_like_C', 'hum:' 'humifity_percent', 'wind_speed:''wind_speed_kph', 'weather_code:''weather', 'is_holiday:''is_holiday', 'is_weekend:''is_weekend', 'season:''season'} # Renaming the columns to the specified column names bikes.rename(new_cols_dict, axis=1, inplace=True) # changing the humidity values to percentage (i.e. a value between 0 and 1) bikes.humidity_percent = bikes.humidity_percent / 100 # creating a season dictionary so that we can map the integers 0-3 to the actual written values season_dict = { '0.0':'spring', '1.0':'summer', '2.0':'autumn', '3.0':'winter' } # creating a weather dictionary so that we can map the integers to the actual written values weather_dict = { '1.0':'Clear', '2.0':'Scattered clouds', '3.0':'Broken clouds', '4.0':'Cloudy', '7.0':'Rain', '10.0':'Rain with thunderstorm', '26.0':'Snowfall' } # changing the seasons column data type to string bikes.season = bikes.season.astype('str') # mapping the values 0-3 to the actual written seasons bikes.season = bikes.season.map(season_dict) # changing the weather column data type to string bikes.weather = bikes.weather.astype('str') # mapping the values to the actual written weathers bikes.weather = bikes.weather.map(weather_dict) # checking the dataframe to see if the mappings have worked bikes.head() # writing the final dataframe to an excel file to use in Tableau visualisations. # The file will be the 'london_bikes_final.xlsx' file and the sheet name is 'Data' bikes.to_excel('london_bikes_final.xlsx', sheet_name='Data')