diff --git a/task3/sql_insert_readings.sql b/task3/sql_insert_readings.sql new file mode 100644 index 0000000000000000000000000000000000000000..91bcd86087d44fe708b807576540b0232196521f --- /dev/null +++ b/task3/sql_insert_readings.sql @@ -0,0 +1,3 @@ +INSERT INTO readings +VALUES + ('452', '2013-08-23 07:00:00+00:00', '51.54044', '30.50055', '13.72186', '27.8', '23.2', '4.6', '16.4', '19.454', '2.9', NULL, '20.40603', NULL, NULL, NULL, NULL, True); \ No newline at end of file diff --git a/task3/sql_insert_sites.sql b/task3/sql_insert_sites.sql index 2decda6340b38d1cb73bc35bc71c0c57d8494a0b..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 100644 --- a/task3/sql_insert_sites.sql +++ b/task3/sql_insert_sites.sql @@ -1,55 +0,0 @@ -INSERT INTO sites -VALUES - ('452', 'AURN St Pauls', '51.4628294172', '-2.58454081635', '2006-06-15', NULL, 'Continuous (Reference)'); - -INSERT INTO sites -VALUES - ('203', 'Brislington Depot', '51.4417471802', '-2.55995583224', '2001-01-01', NULL, 'Continuous (Reference)'); - -INSERT INTO sites -VALUES - ('206', 'Rupert Street', '51.4554331987', '-2.59626237324', '2003-01-01', '2015-12-31', 'Continuous (Reference)'); - -INSERT INTO sites -VALUES - ('270', 'Wells Road', '51.4278638883', '-2.56374153315', '2003-05-23', NULL, 'Continuous (Reference)'); - -INSERT INTO sites -VALUES - ('215', 'Parson Street School', '51.432675707', '-2.60495665673', '2002-02-01', NULL, 'Continuous (Reference)'); - -INSERT INTO sites -VALUES - ('375', 'Newfoundland Road Police Station', '51.4606738207', '-2.58225341824', '2005-01-01', '2015-12-31', 'Continuous (Reference)'); - -INSERT INTO sites -VALUES - ('463', 'Fishponds Road', '51.4780449714', '-2.53523027459', '2009-03-13', NULL, 'Continuous (Reference)'); - -INSERT INTO sites -VALUES - ('500', 'Temple Way', '51.4579497129', '-2.58398909033', '2017-08-01', NULL, 'Continuous (Reference)'); - -INSERT INTO sites -VALUES - ('501', 'Colston Avenue', '51.4552693825', '-2.59664882861', '2018-11-30', NULL, 'Continuous (Reference)'); - -INSERT INTO sites -VALUES - ('213', 'Old Market', '51.4560189999', '-2.58348949026', NULL, NULL, 'Continuous (Reference)'); - -INSERT INTO sites -VALUES - ('459', 'Cheltenham Road \ Station Road', '51.4689385901', '-2.5927241667', '2008-06-25', '2011-12-31', 'Continuous (Reference)'); - -INSERT INTO sites -VALUES - ('447', 'Bath Road', '51.4425372726', '-2.57137536073', '2005-10-29', '2013-01-04', 'Continuous (Reference)'); - -INSERT INTO sites -VALUES - ('395', 'Shiner''s Garage', '51.4577930324', '-2.56271419977', '2004-06-24', '2013-01-04', 'Continuous (Reference)'); - -INSERT INTO sites -VALUES - ('481', 'CREATE Centre Roof', '51.447213417', '-2.62247405516', NULL, NULL, 'Continuous (Reference)'); \ No newline at end of file diff --git a/task3/task3av2.py b/task3/task3av2.py index eb2acf45d8349bd6f4d79af1c2c112ec5d1cb57b..4471326350a37024b3b52ca16c6f20ab7245d96e 100644 --- a/task3/task3av2.py +++ b/task3/task3av2.py @@ -14,31 +14,24 @@ path = "data\\bristol-air-quality-data-siteclean.csv" df = pd.read_csv(path, sep = ";", keep_default_na = False) #print(df.head()) +# Convert Date Time to datetime format, then to string format +df['Date Time'] = pd.to_datetime(df['Date Time']) + # Convert DateStart to date format df['DateStart'] = pd.to_datetime(df['DateStart'], format = "%Y-%m-%d").dt.date -# Convert DateEnd to date format, then to string format, replacing 'NaT' values with None +# Convert DateEnd to date format df['DateEnd'] = pd.to_datetime(df['DateEnd'], format = "%Y-%m-%d").dt.date -""" -# Add additional "'" to Locations for SQL compatibility, where required -for i in range(len(df)): - df.loc[i,'Location'] = df.loc[i,'Location'].replace("'", "''") -""" - # Create dataframe of unique site values df_site = df[['SiteID', 'Location', 'geo_point_2d', 'DateStart', 'DateEnd', 'Instrument Type']].drop_duplicates(ignore_index = True) -print(df_site) - ############################################################################################################# # Create insert statement - sites table ############################################################################################################# # Open write file write_file = "task3\\sql_insert_sites.sql" -# Open read file -#read_file = "data\\bristol-air-quality-data-siteclean.csv" # Set table and headers sql_db = "'bristol-air-quality'" @@ -81,12 +74,9 @@ with open(write_file, 'w') as write_file: # Add sql_db and sql_table sql_insert += sql_table - - # Add first line of values sql_insert += "\n" + "VALUES " - # Insert attribute values sql_insert += "\n" + "\t" + "(" + \ @@ -99,7 +89,149 @@ with open(write_file, 'w') as write_file: instrument_type + ");" # Write sql_insert to file - write_file.write(sql_insert) + #write_file.write(sql_insert) # Increment line counter line_counter += 1 + + +############################################################################################################# +# Create insert statement - readings table +############################################################################################################# + +# Open write file +write_file = "task3\\sql_insert_readings.sql" + +# Set table and headers +sql_db = "'bristol-air-quality'" +sql_table = "readings" +sql_headers = ['site_id', 'date_time', 'nox', 'no2', 'no', 'pm10', 'nvpm10', 'vpm10', \ + 'nvpm2_5', 'pm2_5', 'vpm2_5', 'co', 'o3', 'so2', 'temperature', 'rh', 'air_pressure', 'current'] + +# Initialise line counter +line_counter = 0 + +# Create sql file to write to +with open(write_file, 'w') as write_file: + + + # Loop through rows of dataframe + for i in range(len(df)): + + # Initialise empty list of sql values + sql_vals = [] + + # Generate attribute values for INSERT INTO statement, append into list of sq_vals + # Composite key values - cannot be NULL + sql_vals.append(f"'{df['SiteID'][i]}'") + sql_vals.append(f"'{df['Date Time'][i]}'") + + # Replace other values with NULL if empty strings + # TODO - Replace with loop based on specified list of columns + if df['NOx'][i] != '': + sql_vals.append(f"'{df['NOx'][i]}'") + else: + sql_vals.append(f"NULL") + + if df['NO2'][i] != '': + sql_vals.append(f"'{df['NO2'][i]}'") + else: + sql_vals.append(f"NULL") + + if df['NO'][i] != '': + sql_vals.append(f"'{df['NO'][i]}'") + else: + sql_vals.append(f"NULL") + + if df['PM10'][i] != '': + sql_vals.append(f"'{df['PM10'][i]}'") + else: + sql_vals.append(f"NULL") + + if df['NVPM10'][i] != '': + sql_vals.append(f"'{df['NVPM10'][i]}'") + else: + sql_vals.append(f"NULL") + + if df['VPM10'][i] != '': + sql_vals.append(f"'{df['VPM10'][i]}'") + else: + sql_vals.append(f"NULL") + + if df['NVPM2.5'][i] != '': + sql_vals.append(f"'{df['NVPM2.5'][i]}'") + else: + sql_vals.append(f"NULL") + + if df['PM2.5'][i] != '': + sql_vals.append(f"'{df['PM2.5'][i]}'") + else: + sql_vals.append(f"NULL") + + if df['VPM2.5'][i] != '': + sql_vals.append(f"'{df['VPM2.5'][i]}'") + + if df['CO'][i] != '': + sql_vals.append(f"'{df['CO'][i]}'") + else: + sql_vals.append(f"NULL") + + if df['O3'][i] != '': + sql_vals.append(f"'{df['O3'][i]}'") + else: + sql_vals.append(f"NULL") + + if df['SO2'][i] != '': + sql_vals.append(f"'{df['SO2'][i]}'") + else: + sql_vals.append(f"NULL") + + if df['Temperature'][i] != '': + sql_vals.append(f"'{df['Temperature'][i]}'") + else: + sql_vals.append(f"NULL") + + if df['RH'][i] != '': + sql_vals.append(f"'{df['RH'][i]}'") + else: + sql_vals.append(f"NULL") + + if df['Air Pressure'][i] != '': + sql_vals.append(f"'{df['Air Pressure'][i]}'") + else: + sql_vals.append(f"NULL") + + if df['Current'][i] != '': + sql_vals.append(f"{df['Current'][i]}") + else: + sql_vals.append(f"NULL") + + + # Initialise insert statement + if line_counter == 0: + sql_insert = "INSERT INTO " + else: + sql_insert = "\n" + "\n" + "INSERT INTO " + + # Add sql_db and sql_table + sql_insert += sql_table + + # Add first line of values + sql_insert += "\n" + "VALUES " + + # Insert attribute values + sql_insert += "\n" + "\t" + "(" + + for j in range(len(sql_vals)): + if j < len(sql_vals) - 1: + sql_insert += sql_vals[j] + ", " + else: + sql_insert += sql_vals[j] + ");" + + line_counter += 1 + + # Write sql_insert to file + write_file.write(sql_insert) + + break + diff --git a/task3/workings.py b/task3/workings.py index ea08254ffe6f3d0dac3d3d661f56bc450b4e0f3a..3e14bfff91303a2d85553a9ac6b9063fe66d2429 100644 --- a/task3/workings.py +++ b/task3/workings.py @@ -12,6 +12,8 @@ df['DateStart'] = pd.to_datetime(df['DateStart'], format = "%Y-%m-%d").dt.date # Convert DateEnd to date format, then to string format, replacing 'NaT' values with None df['DateEnd'] = pd.to_datetime(df['DateEnd'], format = "%Y-%m-%d").dt.date +# Convert Date Time to datetime format, then to string format +df['Date Time'] = pd.to_datetime(df['Date Time']) #df['DateEnd'] = ['NULL' if str(date) == 'NaT' else date for date in df['DateEnd']] # Create dataframe of unique site values @@ -27,8 +29,7 @@ for location in df_site['Location']: # print(location[:position] + "'" + location[position:]) """ -for i in range(len(df_site)): - print(df_site['Location'][i].replace("'","''")) +print(df['Date Time'][0:10]) # Add additional "'" to Locations for SQL compatibility, where required