nikethanreddy dsid271 commited on
Commit
3c57246
·
verified ·
1 Parent(s): 59531d0

Update app.py (#10)

Browse files

- Update app.py (8a3817e742901dcb08b021086cab560022f34308)


Co-authored-by: SIDHARTHA D <dsid271@users.noreply.huggingface.co>

Files changed (1) hide show
  1. app.py +371 -306
app.py CHANGED
@@ -1,29 +1,31 @@
1
- import gradio as gr
 
 
2
  import numpy as np
3
  import tensorflow as tf
4
  from tensorflow.keras.models import load_model
5
  from tensorflow.keras.layers import Input
6
- # Assuming TKAN and TKAT are available after installing the respective packages
7
- from tkan import TKAN
8
- # If TKAT is from a different library, import it similarly
9
- try:
10
- from tkat import TKAT
11
- except ImportError:
12
- print("TKAT library not found. If your model uses TKAT, make sure the library is installed.")
13
- TKAT = None
14
-
15
  from tensorflow.keras.utils import custom_object_scope
16
  import pickle
17
  import os
18
  import requests
19
  import pandas as pd
20
  from datetime import datetime, timedelta, timezone
21
- import pytz # For timezone handling
 
 
 
 
 
 
 
 
 
 
22
 
23
  # --- Your MinMaxScaler Class (Copied from Notebook) ---
24
- # (Keep the MinMaxScaler class definition here as before)
25
  class MinMaxScaler:
26
- # ... (MinMaxScaler class definition) ...
27
  def __init__(self, feature_axis=None, minmax_range=(0, 1)):
28
  self.feature_axis = feature_axis
29
  self.min_ = None
@@ -49,21 +51,20 @@ class MinMaxScaler:
49
  return self
50
 
51
  def transform(self, X):
 
 
52
  X_scaled = (X - self.min_) / self.scale_
53
  X_scaled = X_scaled * (self.minmax_range[1] - self.minmax_range[0]) + self.minmax_range[0]
54
  return X_scaled
55
 
56
- def fit_transform(self, X):
57
- return self.fit(X).transform(X)
58
-
59
  def inverse_transform(self, X_scaled):
 
 
60
  X = (X_scaled - self.minmax_range[0]) / (self.minmax_range[1] - self.minmax_range[0])
61
  X = X * self.scale_ + self.min_
62
  return X
63
 
64
-
65
- # --- AQI Breakpoints and Calculation Logic (Copied from Notebook) ---
66
- # (Keep the aqi_breakpoints and calculate_overall_aqi functions here as before)
67
  aqi_breakpoints = {
68
  'pm25': [(0, 50, 0, 50), (51, 100, 51, 100), (101, 200, 101, 200), (201, 300, 201, 300)],
69
  'pm10': [(0, 50, 0, 50), (51, 100, 51, 100), (101, 250, 101, 200), (251, 350, 201, 300)],
@@ -85,327 +86,391 @@ def calculate_sub_aqi(concentration, breakpoints):
85
 
86
  def calculate_overall_aqi(row, aqi_breakpoints):
87
  sub_aqis = []
 
88
  pollutant_mapping = {
89
- 'pm2_5': 'pm25',
90
  'pm10': 'pm10',
91
- 'carbon_monoxide': 'co',
 
 
92
  }
93
  for api_pollutant, internal_pollutant in pollutant_mapping.items():
94
- concentration = row.get(api_pollutant, np.nan)
95
- if not np.isnan(concentration):
96
- sub_aqi = calculate_sub_aqi(concentration, aqi_breakpoints.get(internal_pollutant, []))
97
- sub_aqis.append(sub_aqi)
 
 
 
98
  else:
99
- sub_aqis.append(np.nan)
100
- return np.nanmax(sub_aqis) if sub_aqis and not all(np.isnan(sub_aqis)) else np.nan
101
-
102
-
103
- # --- Configuration ---
104
- MODEL_PATH = "best_model_TKAN_nahead_1 (2).keras"
105
- INPUT_SCALER_PATH = "input_scaler.pkl"
106
- TARGET_SCALER_PATH = "target_scaler.pkl"
107
- SEQUENCE_LENGTH = 24 # Matches the notebook
108
- NUM_INPUT_FEATURES = 5 # ['calculated_aqi', 'temp', 'pm25', 'pm10', 'co']
109
- N_AHEAD = 1 # Matches the notebook
110
-
111
- # --- Open-Meteo API Configuration ---
112
- OPENMETEO_AIR_QUALITY_API_URL = "https://air-quality-api.open-meteo.com/v1/air-quality"
113
- # You will also need the standard weather API for temperature
114
- OPENMETEO_WEATHER_API_URL = "https://api.open-meteo.com/v1/forecast" # Using forecast for recent hourly data
115
- # Replace with the actual latitude and longitude for your location
116
- LATITUDE = 17.33
117
- LONGITUDE = 78.27
118
- AIR_QUALITY_PARAMETERS = ["pm10", "pm2_5", "carbon_monoxide"]
119
- WEATHER_PARAMETERS_FOR_TEMP = ["temperature_2m"] # Parameter name for temperature
120
- TIMEZONE = "auto"
121
-
122
- # --- Ensure Required Files Exist ---
123
- # (Keep the file existence checks here as before)
124
- if not os.path.exists(MODEL_PATH):
125
- print(f"Error: Model file not found at {MODEL_PATH}")
126
- import sys
127
- sys.exit("Model file missing. Exiting.")
128
-
129
- if not os.path.exists(INPUT_SCALER_PATH):
130
- print(f"Error: Input scaler file not found at {INPUT_SCALER_PATH}")
131
- import sys
132
- sys.exit("Input scaler file missing. Exiting.")
133
-
134
- if not os.path.exists(TARGET_SCALER_PATH):
135
- print(f"Error: Target scaler file not found at {TARGET_SCALER_PATH}")
136
- import sys
137
- sys.exit("Target scaler file missing. Exiting.")
138
-
139
-
140
- # --- Load Model and Scalers ---
141
- # (Keep the loading logic here as before)
142
- custom_objects = {"TKAN": TKAN, "MinMaxScaler": MinMaxScaler}
143
- if TKAT is not None:
144
- custom_objects["TKAT"] = TKAT
145
 
146
- model = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  input_scaler = None
148
- target_scaler = None
 
149
 
150
  try:
151
- with custom_object_scope(custom_objects):
152
- model = load_model(MODEL_PATH)
153
- print("Model loaded successfully!")
154
- model.summary()
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
- with open(INPUT_SCALER_PATH, 'rb') as f:
157
- input_scaler = pickle.load(f)
158
- print(f"Input scaler loaded successfully from {INPUT_SCALER_PATH}")
159
 
160
- with open(TARGET_SCALER_PATH, 'rb') as f:
161
- target_scaler = pickle.load(f)
162
- print(f"Target scaler loaded successfully from {TARGET_SCALER_PATH}")
 
163
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  except Exception as e:
165
- print(f"Error during loading: {e}")
166
- import traceback
167
  traceback.print_exc()
168
- import sys
169
- sys.exit("Failed to load model or scaler(s). Exiting.")
170
-
171
-
172
- # --- Data Retrieval from Open-Meteo API ---
173
-
174
- def get_latest_data_sequence(sequence_length):
175
- """
176
- Retrieves the latest sequence of air quality and temperature data from Open-Meteo
177
- for the previous `sequence_length` hours based on the current hour,
178
- calculates historical AQI, and formats it for model input.
179
-
180
- Args:
181
- sequence_length (int): The length of the historical sequence required (e.g., 24).
182
-
183
- Returns:
184
- np.ndarray: A numpy array containing the historical data sequence.
185
- Shape: (sequence_length, NUM_INPUT_FEATURES)
186
- Returns None or raises an error on failure.
187
- """
188
- print(f"Attempting to retrieve data for the last {sequence_length} hours from Open-Meteo...")
189
-
190
- # Determine the exact start and end time for the last `sequence_length` hours
191
- # The API uses YYYY-MM-DD format for dates.
192
- # We need data from the hour `sequence_length` hours ago up to the current completed hour.
193
- now_utc = datetime.now(timezone.utc)
194
- # Round down to the nearest hour
195
- current_hour_utc = now_utc.replace(minute=0, second=0, microsecond=0)
196
- # The end date for the API request is the current date
197
- end_date_api = current_hour_utc.strftime('%Y-%m-%d')
198
- # The start date is `sequence_length` hours before the *start* of the current hour.
199
- # So, `sequence_length` hours before `current_hour_utc`.
200
- start_time_utc = current_hour_utc - timedelta(hours=sequence_length)
201
- start_date_api = start_time_utc.strftime('%Y-%m-%d')
202
-
203
- # --- Fetch Air Quality Data ---
204
- aq_params = {
205
- "latitude": LATITUDE,
206
- "longitude": LONGITUDE,
207
- "hourly": ",".join(AIR_QUALITY_PARAMETERS),
208
- "timezone": TIMEZONE,
209
- "start_date": start_date_api,
210
- "end_date": end_date_api,
211
- "domains": "auto"
212
- }
213
 
214
- try:
215
- aq_response = requests.get(OPENMETEO_AIR_QUALITY_API_URL, params=aq_params)
216
- aq_response.raise_for_status()
217
- aq_data = aq_response.json()
218
- print("Air quality data retrieved.")
219
 
220
- if 'hourly' not in aq_data or 'time' not in aq_data['hourly']:
221
- print("Error: 'hourly' or 'time' not found in AQ response.")
222
- return None
223
 
224
- aq_hourly_data = aq_data['hourly']
225
- aq_timestamps = aq_hourly_data['time']
226
- aq_extracted_data = {param: aq_hourly_data.get(param, []) for param in AIR_QUALITY_PARAMETERS}
 
 
 
 
 
 
227
 
228
- df_aq = pd.DataFrame(aq_extracted_data, index=pd.to_datetime(aq_timestamps))
229
 
230
- except requests.exceptions.RequestException as e:
231
- print(f"Error fetching air quality data: {e}")
232
- return None
233
- except Exception as e:
234
- print(f"Error processing air quality data: {e}")
235
- import traceback
236
- traceback.print_exc()
237
- return None
238
-
239
- # --- Fetch Temperature Data ---
240
- temp_params = {
241
- "latitude": LATITUDE,
242
- "longitude": LONGITUDE,
243
- "hourly": ",".join(WEATHER_PARAMETERS_FOR_TEMP),
244
- "timezone": TIMEZONE,
245
- "start_date": start_date_api,
246
- "end_date": end_date_api,
247
- "models": "best_match"
248
- }
249
 
250
- try:
251
- temp_response = requests.get(OPENMETEO_WEATHER_API_URL, params=temp_params)
252
- temp_response.raise_for_status()
253
- temp_data = temp_response.json()
254
- print("Temperature data retrieved.")
255
 
256
- if 'hourly' not in temp_data or 'time' not in temp_data['hourly']:
257
- print("Error: 'hourly' or 'time' not found in temperature response.")
258
- # Decide how to handle missing temperature data - return None, fill with NaNs, etc.
259
- print("Skipping temperature data due to missing fields.")
260
- df_temp = pd.DataFrame(index=df_aq.index) # Create empty DataFrame with AQ index
261
- for param in WEATHER_PARAMETERS_FOR_TEMP:
262
- df_temp[param] = np.nan # Add NaN columns for expected temperature parameters
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  else:
264
- temp_hourly_data = temp_data['hourly']
265
- temp_timestamps = temp_hourly_data['time']
266
- temp_extracted_data = {param: temp_hourly_data.get(param, []) for param in WEATHER_PARAMETERS_FOR_TEMP}
267
-
268
- df_temp = pd.DataFrame(temp_extracted_data, index=pd.to_datetime(temp_timestamps))
269
 
270
- except requests.exceptions.RequestException as e:
271
- print(f"Error fetching temperature data: {e}")
272
- print("Skipping temperature data due to API error.")
273
- df_temp = pd.DataFrame(index=df_aq.index) # Create empty DataFrame with AQ index
274
- for param in WEATHER_PARAMETERS_FOR_TEMP:
275
- df_temp[param] = np.nan # Add NaN columns for expected temperature parameters
276
  except Exception as e:
277
- print(f"Error processing temperature data: {e}")
278
- import traceback
279
  traceback.print_exc()
280
- print("Skipping temperature data due to processing error.")
281
- df_temp = pd.DataFrame(index=df_aq.index) # Create empty DataFrame with AQ index
282
- for param in WEATHER_PARAMETERS_FOR_TEMP:
283
- df_temp[param] = np.nan # Add NaN columns for expected temperature parameters
284
-
285
-
286
- # --- Merge DataFrames ---
287
- # Merge air quality and temperature data based on timestamp
288
- df_merged = pd.merge(df_aq, df_temp, left_index=True, right_index=True, how='outer')
289
-
290
- # --- Calculate Historical AQI ---
291
- # Calculate the 'calculated_aqi' for each row using your function
292
- df_merged['calculated_aqi'] = df_merged.apply(
293
- lambda row: calculate_overall_aqi(
294
- {'pm2_5': row.get('pm2_5'), 'pm10': row.get('pm10'), 'carbon_monoxide': row.get('carbon_monoxide')},
295
- aqi_breakpoints
296
- ),
297
- axis=1
298
- )
299
-
300
- # --- Process and Filter Merged Data ---
301
- # Ensure the index is a proper datetime index and sort
302
- df_merged.index = pd.to_datetime(df_merged.index)
303
- df_merged.sort_index(inplace=True)
304
-
305
- # Resample to ensure hourly frequency and fill missing gaps
306
- # Use forward fill then backward fill for robustness
307
- df_processed = df_merged.resample('H').ffill().bfill()
308
-
309
- # Filter to the exact time range for the sequence (last SEQUENCE_LENGTH hours)
310
- # Find the timestamp corresponding to the start of the desired sequence
311
- # We want the `sequence_length` hours ending at `current_hour_utc`
312
- sequence_start_time_utc = current_hour_utc - timedelta(hours=sequence_length -1)
313
-
314
- # Filter the DataFrame to include only the timestamps within the sequence
315
- # Use loc with inclusive endpoints
316
- df_sequence = df_processed.loc[sequence_start_time_utc:current_hour_utc]
317
-
318
- # Ensure you have exactly SEQUENCE_LENGTH data points
319
- if len(df_sequence) != sequence_length:
320
- print(f"Error: Retrieved and processed data length ({len(df_sequence)}) does not match sequence length ({sequence_length}).")
321
- print(f"Expected timestamps from {sequence_start_time_utc} to {current_hour_utc}. Got {df_sequence.index.min()} to {df_sequence.index.max()}.")
322
- print("Check API request time range and data availability.")
323
- return None
324
-
325
- # Reorder columns to match your model's expected input feature order:
326
- # ['calculated_aqi', 'temp', 'pm25', 'pm10', 'co']
327
- # Ensure 'temp' is the column from temperature_2m, and pollutant names are mapped.
328
-
329
- # Rename Open-Meteo columns to match your model's expected feature names
330
- # (This mapping was partly in calculate_overall_aqi, but needed for the DataFrame columns)
331
- column_rename_map = {
332
- 'temperature_2m': 'temp',
333
- 'pm2_5': 'pm25',
334
- 'pm10': 'pm10',
335
- 'carbon_monoxide': 'co',
336
- # 'calculated_aqi' is already correct after calculation
337
- }
338
- df_sequence.rename(columns=column_rename_map, inplace=True)
339
-
340
- # Ensure all expected features are present and in the correct order
341
- model_features_order = ['calculated_aqi', 'temp', 'pm25', 'pm10', 'co']
342
- missing_columns = [col for col in model_features_order if col not in df_sequence.columns]
343
- if missing_columns:
344
- print(f"Error: Missing required columns in final sequence data: {missing_columns}")
345
- print("Ensure all expected features are fetched and named correctly.")
346
- return None
347
-
348
- # Select and reorder columns to match the model's expected input
349
- df_final_sequence = df_sequence[model_features_order]
350
-
351
- # Convert to numpy array
352
- data_sequence = df_final_sequence.values
353
-
354
- # Ensure the final numpy array has the correct shape (redundant but safe)
355
- if data_sequence.shape != (sequence_length, NUM_INPUT_FEATURES):
356
- print(f"Error: Final data sequence shape {data_sequence.shape} does not match expected shape ({sequence_length}, {NUM_INPUT_FEATURES}).")
357
- return None
358
-
359
- print(f"Successfully prepared data sequence with shape {data_sequence.shape}")
360
- return data_sequence
361
-
362
- # --- Define Predict Function ---
363
- # (Keep the predict function as before, it calls get_latest_data_sequence)
364
- def predict():
365
- """
366
- Retrieves the latest data sequence from Open-Meteo, preprocesses it,
367
- and makes a prediction.
368
- """
369
- if model is None or input_scaler is None or target_scaler is None:
370
- return "Model or scaler(s) not loaded. Check logs."
371
-
372
- # 1. Get the latest historical data sequence from Open-Meteo
373
- latest_data_sequence = get_latest_data_sequence(SEQUENCE_LENGTH)
374
 
375
- if latest_data_sequence is None:
376
- return "Failed to retrieve or process latest data sequence."
377
 
378
- # Ensure the retrieved data has the correct shape (redundant check, but safe)
379
- if latest_data_sequence.shape != (SEQUENCE_LENGTH, NUM_INPUT_FEATURES):
380
- return f"Error: Retrieved data has incorrect shape {latest_data_sequence.shape}. Expected ({SEQUENCE_LENGTH}, {NUM_INPUT_FEATURES})."
 
 
 
 
 
381
 
382
 
383
- # 2. Scale the data sequence using the loaded input scaler
384
- latest_data_sequence_with_batch = latest_data_sequence[np.newaxis, :, :]
385
- scaled_input_data = input_scaler.transform(latest_data_sequence_with_batch)
 
 
386
 
387
- # 3. Perform prediction (outputs scaled target)
388
- output = model.predict(scaled_input_data)
 
389
 
390
- # 4. Process the output (get the scaled predicted value)
391
- predicted_scaled_value = output[0][0]
 
 
 
392
 
393
- # 5. Inverse transform the prediction using the target scaler
394
- predicted_original_scale = target_scaler.inverse_transform(np.array([[predicted_scaled_value]]))[0][0]
 
395
 
396
- predicted_value = predicted_original_scale
 
 
397
 
398
- return float(predicted_value)
 
 
399
 
400
- # --- Gradio Interface ---
401
- # (Keep the Gradio interface as before, inputs=None)
402
- interface = gr.Interface(
403
- fn=predict,
404
- inputs=None,
405
- outputs=gr.Number(label=f"Predicted AQI (Next {N_AHEAD} Hour(s))")
406
- )
407
 
 
408
 
409
- # --- Launch Gradio Interface ---
410
- if __name__ == "__main__":
411
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py (or main.py)
2
+ from fastapi import FastAPI, HTTPException
3
+ from pydantic import BaseModel
4
  import numpy as np
5
  import tensorflow as tf
6
  from tensorflow.keras.models import load_model
7
  from tensorflow.keras.layers import Input
 
 
 
 
 
 
 
 
 
8
  from tensorflow.keras.utils import custom_object_scope
9
  import pickle
10
  import os
11
  import requests
12
  import pandas as pd
13
  from datetime import datetime, timedelta, timezone
14
+ import pytz
15
+ import json
16
+ import traceback # Import traceback to print detailed error info
17
+
18
+ # Assuming TKAN is installed and available
19
+ from tkan import TKAN
20
+ try:
21
+ from tkat import TKAT
22
+ except ImportError:
23
+ print("TKAT library not found. If your model uses TKAT, ensure the library is installed.")
24
+ TKAT = None
25
 
26
  # --- Your MinMaxScaler Class (Copied from Notebook) ---
27
+ # This class is essential for loading your scalers
28
  class MinMaxScaler:
 
29
  def __init__(self, feature_axis=None, minmax_range=(0, 1)):
30
  self.feature_axis = feature_axis
31
  self.min_ = None
 
51
  return self
52
 
53
  def transform(self, X):
54
+ if self.min_ is None or self.max_ is None or self.scale_ is None:
55
+ raise ValueError("Scaler has not been fitted.")
56
  X_scaled = (X - self.min_) / self.scale_
57
  X_scaled = X_scaled * (self.minmax_range[1] - self.minmax_range[0]) + self.minmax_range[0]
58
  return X_scaled
59
 
 
 
 
60
  def inverse_transform(self, X_scaled):
61
+ if self.min_ is None or self.max_ is None or self.scale_ is None:
62
+ raise ValueError("Scaler has not been fitted.")
63
  X = (X_scaled - self.minmax_range[0]) / (self.minmax_range[1] - self.minmax_range[0])
64
  X = X * self.scale_ + self.min_
65
  return X
66
 
67
+ # --- AQI breakpoints and calculation functions (Copied from Notebook) ---
 
 
68
  aqi_breakpoints = {
69
  'pm25': [(0, 50, 0, 50), (51, 100, 51, 100), (101, 200, 101, 200), (201, 300, 201, 300)],
70
  'pm10': [(0, 50, 0, 50), (51, 100, 51, 100), (101, 250, 101, 200), (251, 350, 201, 300)],
 
86
 
87
  def calculate_overall_aqi(row, aqi_breakpoints):
88
  sub_aqis = []
89
+ # Mapping API names to internal names if necessary
90
  pollutant_mapping = {
91
+ 'pm25': 'pm25',
92
  'pm10': 'pm10',
93
+ 'co': 'co',
94
+ 'pm2_5': 'pm25', # Common API name for PM2.5
95
+ 'carbon_monoxide': 'co', # Common API name for CO
96
  }
97
  for api_pollutant, internal_pollutant in pollutant_mapping.items():
98
+ if api_pollutant in row:
99
+ concentration = row[api_pollutant]
100
+ if not pd.isna(concentration): # Use pd.isna for pandas DataFrames/Series
101
+ sub_aqi = calculate_sub_aqi(concentration, aqi_breakpoints.get(internal_pollutant, []))
102
+ sub_aqis.append(sub_aqi)
103
+ else:
104
+ sub_aqis.append(np.nan)
105
  else:
106
+ sub_aqis.append(np.nan)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
 
108
+ # Use np.nanmax to find the maximum ignoring NaNs. Returns -inf if all are NaN.
109
+ # Check if sub_aqis list is not empty and contains at least one non-NaN value
110
+ if sub_aqis and not all(pd.isna(sub_aqis)):
111
+ return np.nanmax(sub_aqis)
112
+ else:
113
+ return np.nan # Return NaN if no valid pollutant data is available
114
+
115
+ # --- Data Retrieval Function ---
116
+ def get_latest_data_sequence(sequence_length: int, latitude: float, longitude: float):
117
+ print(f"Attempting to retrieve data for the last {sequence_length} hours from Open-Meteo for Lat: {latitude}, Lon: {longitude}")
118
+
119
+ end_time = datetime.now(pytz.utc)
120
+ # Fetch slightly more data to allow for resampling and ensure sequence_length is met
121
+ fetch_hours = sequence_length + 5
122
+ start_time = end_time - timedelta(hours=fetch_hours)
123
+
124
+ # Format timestamps for API request (ISO 8601)
125
+ start_time_str = start_time.isoformat().split('.')[0] + 'Z'
126
+ end_time_str = end_time.isoformat().split('.')[0] + 'Z'
127
+
128
+ print(f"Requesting data from {start_time_str} to {end_time_str}")
129
+
130
+ # Open-Meteo Air Quality API
131
+ air_quality_url = "https://air-quality-api.open-meteo.com/v1/air-quality"
132
+ air_quality_params = {
133
+ "latitude": latitude,
134
+ "longitude": longitude,
135
+ "hourly": ["pm2_5", "pm10", "carbon_monoxide"],
136
+ "timezone": "UTC",
137
+ "start_date": start_time.strftime('%Y-%m-%d'), # Use YYYY-MM-DD format
138
+ "end_date": end_time.strftime('%Y-%m-%d'),
139
+ "past_hours": fetch_hours
140
+ }
141
+
142
+ # Open-Meteo Historical Weather API for Temperature
143
+ weather_url = "https://archive-api.open-meteo.com/v1/archive"
144
+ weather_params = {
145
+ "latitude": latitude,
146
+ "longitude": longitude,
147
+ "hourly": ["temperature_2m"],
148
+ "timezone": "UTC",
149
+ "start_date": start_time.strftime('%Y-%m-%d'),
150
+ "end_date": end_time.strftime('%Y-%m-%d')
151
+ }
152
+
153
+ try:
154
+ # Fetch Air Quality Data
155
+ print(f"Fetching air quality data from: {air_quality_url}")
156
+ air_quality_response = requests.get(air_quality_url, params=air_quality_params)
157
+ air_quality_response.raise_for_status()
158
+ air_quality_data = air_quality_response.json()
159
+ print("Air quality data retrieved.")
160
+
161
+ # Fetch Temperature Data
162
+ print(f"Fetching temperature data from: {weather_url}")
163
+ weather_response = requests.get(weather_url, params=weather_params)
164
+ weather_response.raise_for_status()
165
+ weather_data = weather_response.json()
166
+ print("Temperature data retrieved.")
167
+
168
+ print("Data fetched successfully.")
169
+
170
+ # Process Air Quality Data
171
+ if 'hourly' not in air_quality_data or 'time' not in air_quality_data['hourly']:
172
+ print("Error: 'hourly' or 'time' key not found in air quality response.")
173
+ return None, "Error: Invalid air quality data format from API."
174
+ df_aq = pd.DataFrame(air_quality_data['hourly'])
175
+ df_aq['time'] = pd.to_datetime(df_aq['time'])
176
+ df_aq.set_index('time', inplace=True)
177
+
178
+ # Process Temperature Data
179
+ if 'hourly' not in weather_data or 'time' not in weather_data['hourly']:
180
+ print("Error: 'hourly' or 'time' key not found in weather response.")
181
+ return None, "Error: Invalid weather data format from API."
182
+ df_temp = pd.DataFrame(weather_data['hourly'])
183
+ df_temp['time'] = pd.to_datetime(df_temp['time'])
184
+ df_temp.set_index('time', inplace=True)
185
+
186
+ # Merge dataframes
187
+ df_merged = df_aq.merge(df_temp, left_index=True, right_index=True, how='outer')
188
+ print("DataFrames merged.")
189
+
190
+
191
+ # Resample to ensure consistent hourly frequency and fill missing data
192
+ # Use 'h' for hourly resampling
193
+ df_processed = df_merged.resample('h').ffill().bfill()
194
+ print(f"DataFrame resampled to hourly. Shape: {df_processed.shape}")
195
+
196
+
197
+ # Rename columns to match internal naming convention
198
+ df_processed.rename(columns={'pm2_5': 'pm25', 'carbon_monoxide': 'co', 'temperature_2m': 'temp'}, inplace=True)
199
+ print("Renamed columns.")
200
+
201
+
202
+ # Calculate AQI for the processed data
203
+ df_processed['calculated_aqi'] = df_processed.apply(lambda row: calculate_overall_aqi(row, aqi_breakpoints), axis=1)
204
+ print("Calculated AQI.")
205
+
206
+
207
+ # Select and reorder columns to match training data order
208
+ required_columns = ['calculated_aqi', 'temp', 'pm25', 'pm10', 'co']
209
+ # Ensure all required columns exist before selecting
210
+ if not all(col in df_processed.columns for col in required_columns):
211
+ missing_cols = [col for col in required_columns if col not in df_processed.columns]
212
+ print(f"Error: Missing required columns after processing: {missing_cols}")
213
+ return None, f"Error: Missing required data columns: {missing_cols}"
214
+
215
+ df_processed = df_processed[required_columns].copy()
216
+ print(f"Selected and reordered columns. Final processing shape: {df_processed.shape}")
217
+
218
+
219
+ # Handle any remaining NaNs after ffill/bfill (e.g., if the very first values were NaN or API returned all NaNs)
220
+ initial_rows = len(df_processed)
221
+ df_processed.dropna(inplace=True)
222
+ if len(df_processed) < initial_rows:
223
+ print(f"Warning: Dropped {initial_rows - len(df_processed)} rows with remaining NaNs.")
224
+
225
+
226
+ # Check if enough data points are available
227
+ if len(df_processed) < sequence_length:
228
+ print(f"Error: Only retrieved and processed {len(df_processed)} data points, but {sequence_length} are required.")
229
+ return None, f"Error: Insufficient historical data ({len(df_processed)} points available, {sequence_length} required)."
230
+
231
+ # Select the last `sequence_length` rows for the input sequence
232
+ latest_data_sequence_df = df_processed.tail(sequence_length).copy() # Use .copy() to avoid SettingWithCopyWarning
233
+ print(f"Selected last {sequence_length} data points.")
234
+
235
+ # Convert to numpy array and reshape (1, sequence_length, num_features)
236
+ latest_data_sequence = latest_data_sequence_df.values.reshape(1, sequence_length, len(required_columns))
237
+
238
+ # Get the timestamps for output formatting later
239
+ timestamps = latest_data_sequence_df.index.tolist()
240
+
241
+ print(f"Prepared input sequence with shape: {latest_data_sequence.shape}")
242
+
243
+ return latest_data_sequence, timestamps # Return data and timestamps
244
+
245
+ except requests.exceptions.RequestException as e:
246
+ print(f"API Request Error: {e}")
247
+ return None, f"API Request Error: {e}"
248
+ except Exception as e:
249
+ print(f"An unexpected error occurred during data retrieval and processing: {e}")
250
+ traceback.print_exc()
251
+ return None, f"An unexpected error occurred during data processing: {e}"
252
+
253
+
254
+ # --- Define paths to your saved files ---
255
+ # Use relative paths assuming files are in the root directory of the Space
256
+ MODEL_PATH = '/content/best_model_TKAN_nahead_1.keras'
257
+ INPUT_SCALER_PATH = '/content/input_scaler.pkl'
258
+ TARGET_SCALER_PATH = '/content/target_scaler.pkl' # This should be the scaler for the ratio
259
+ # Y_SCALER_TRAIN_PATH = 'y_scaler_train.pkl' # Keep commented out unless you find a specific use for it in the inverse transform
260
+
261
+
262
+ # --- Load the scalers and model ---
263
  input_scaler = None
264
+ target_scaler = None # Scaler for the AQI/rolling_median ratio
265
+ model = None
266
 
267
  try:
268
+ with open(INPUT_SCALER_PATH, 'rb') as f:
269
+ input_scaler = pickle.load(f)
270
+ print(f"Input scaler loaded successfully from {INPUT_SCALER_PATH}")
271
+
272
+ with open(TARGET_SCALER_PATH, 'rb') as f:
273
+ target_scaler = pickle.load(f)
274
+ print(f"Target scaler (for ratio) loaded successfully from {TARGET_SCALER_PATH}")
275
+
276
+ except FileNotFoundError as e:
277
+ print(f"Error loading scaler files: {e}")
278
+ print("Please ensure input_scaler.pkl and target_scaler.pkl are in the correct directory.")
279
+ # These need to be loaded for the app to work, so we might let the startup fail or raise an error here.
280
+ # For a web app, letting it fail on startup and show in logs is better than running with None scalers.
281
+ # However, for the purpose of giving you the code structure, we'll just print and model=None below.
282
+ except Exception as e:
283
+ print(f"An unexpected error occurred during scaler loading: {e}")
284
+ traceback.print_exc()
285
 
 
 
 
286
 
287
+ # Load the trained model with custom_object_scope
288
+ custom_objects = {"TKAN": TKAN}
289
+ if TKAT is not None:
290
+ custom_objects["TKAT"] = TKAT
291
 
292
+ try:
293
+ print(f"Loading model from {MODEL_PATH}...")
294
+ # Use custom_object_scope to register custom layers during loading
295
+ with custom_object_scope(custom_objects):
296
+ # compile=False because we only need the model for inference
297
+ model = load_model(MODEL_PATH, compile=False)
298
+ print("Model loaded successfully.")
299
+ except FileNotFoundError:
300
+ print(f"Error: Model file not found at {MODEL_PATH}.")
301
+ except ValueError as e:
302
+ print(f"Error loading model (ValueError): {e}")
303
+ print("This can happen if the file is not a valid Keras file or if custom objects are not registered.")
304
+ traceback.print_exc()
305
  except Exception as e:
306
+ print(f"An unexpected error occurred during model loading: {e}")
 
307
  traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
 
 
 
 
 
 
309
 
310
+ # Initialize FastAPI app
311
+ app = FastAPI()
 
312
 
313
+ # Define the structure of the prediction request body
314
+ class PredictionRequest(BaseModel):
315
+ latitude: float
316
+ longitude: float
317
+ pm25: float = None # Make current inputs optional, rely primarily on historical fetch
318
+ pm10: float = None
319
+ co: float = None
320
+ temp: float = None
321
+ n_ahead: int = 1 # Default prediction steps
322
 
 
323
 
324
+ # Define the structure of the prediction response body
325
+ class PredictionResponse(BaseModel):
326
+ status: str # "success" or "error"
327
+ message: str # Description of the result or error
328
+ predictions: list = None # List of {"timestamp": "...", "aqi": ...} or None on error
 
 
 
 
 
 
 
 
 
 
 
 
 
 
329
 
 
 
 
 
 
330
 
331
+ # Define the prediction endpoint
332
+ @app.post("/predict", response_model=PredictionResponse)
333
+ async def predict_aqi_endpoint(request: PredictionRequest):
334
+ # Check if model and scalers were loaded successfully on startup
335
+ if model is None or input_scaler is None or target_scaler is None:
336
+ print("API called but model or scalers are not loaded.")
337
+ # Return a 500 Internal Server Error if dependencies failed to load
338
+ raise HTTPException(status_code=500, detail="Model or scalers not loaded. Check server logs for details.")
339
+
340
+ # Get the expected sequence length and number of features from the model's input shape
341
+ # Assuming input shape is (None, sequence_length, num_features)
342
+ if model.input_shape is None or len(model.input_shape) < 2:
343
+ print(f"Error: Model has unexpected input shape: {model.input_shape}")
344
+ raise HTTPException(status_code=500, detail=f"Model has unexpected input shape: {model.input_shape}")
345
+
346
+ SEQUENCE_LENGTH = model.input_shape[1]
347
+ NUM_FEATURES = model.input_shape[2]
348
+ required_num_features = len(['calculated_aqi', 'temp', 'pm25', 'pm10', 'co'])
349
+ if NUM_FEATURES != required_num_features:
350
+ print(f"Error: Model expects {NUM_FEATURES} features, but data processing provides {required_num_features}.")
351
+ raise HTTPException(status_code=500, detail=f"Model expects {NUM_FEATURES} features, but data processing provides {required_num_features}.")
352
+
353
+
354
+ # Get the historical data sequence and its timestamps from Open-Meteo
355
+ # The function now returns the data and a message (or error)
356
+ latest_data_sequence_unscaled, message = get_latest_data_sequence(SEQUENCE_LENGTH, request.latitude, request.longitude)
357
+
358
+ # Check if data retrieval was successful
359
+ if latest_data_sequence_unscaled is None:
360
+ # Return an error response if data fetching failed
361
+ print(f"Data retrieval failed: {message}")
362
+ return PredictionResponse(status="error", message=f"Data retrieval failed: {message}")
363
+
364
+ # The timestamps returned are for the sequence itself. We need timestamps for the *predictions*.
365
+ # The predictions are for n_ahead steps *after* the last timestamp in the sequence.
366
+ prediction_timestamps = []
367
+ if message and isinstance(message, list) and len(message) > 0: # 'message' is actually 'timestamps' here
368
+ last_timestamp_of_sequence = message[-1] # Get the last timestamp from the sequence
369
+ for i in range(request.n_ahead):
370
+ # Prediction i (0-indexed) is for hour i+1 after the last timestamp
371
+ prediction_timestamps.append(last_timestamp_of_sequence + timedelta(hours=i + 1))
372
+ else:
373
+ print("Warning: Could not get valid timestamps from data retrieval. Prediction timestamps will be approximate.")
374
+ # Fallback: Approximate timestamps based on current time
375
+ now_utc = datetime.now(pytz.utc)
376
+ for i in range(request.n_ahead):
377
+ prediction_timestamps.append(now_utc + timedelta(hours=i+1))
378
+
379
+
380
+ # Optional: Update the last timestep with current user inputs if provided
381
+ # Check if current inputs were provided and are valid (not None or NaN)
382
+ if request.pm25 is not None and not pd.isna(request.pm25) and \
383
+ request.pm10 is not None and not pd.isna(request.pm10) and \
384
+ request.co is not None and not pd.isna(request.co) and \
385
+ request.temp is not None and not pd.isna(request.temp):
386
+
387
+ current_aqi = calculate_overall_aqi({'pm25': request.pm25, 'pm10': request.pm10, 'co': request.co, 'temp': request.temp}, aqi_breakpoints)
388
+
389
+ if not pd.isna(current_aqi):
390
+ # Assuming column order: 'calculated_aqi', 'temp', 'pm25', 'pm10', 'co'
391
+ # Update the last row (-1) of the input sequence
392
+ latest_data_sequence_unscaled[0, -1, 0] = current_aqi
393
+ latest_data_sequence_unscaled[0, -1, 1] = request.temp
394
+ latest_data_sequence_unscaled[0, -1, 2] = request.pm25
395
+ latest_data_sequence_unscaled[0, -1, 3] = request.pm10
396
+ latest_data_sequence_unscaled[0, -1, 4] = request.co
397
+ print("Updated last timestep of input sequence with current user inputs.")
398
  else:
399
+ print("Warning: Could not calculate AQI for current inputs. Last timestep remains historical.")
 
 
 
 
400
 
401
+ # Scale the input data
402
+ try:
403
+ X_scaled = input_scaler.transform(latest_data_sequence_unscaled)
404
+ print("Input data scaled successfully.")
 
 
405
  except Exception as e:
406
+ print(f"Error scaling input data: {e}")
 
407
  traceback.print_exc()
408
+ raise HTTPException(status_code=500, detail="Error processing input data for prediction (scaling).")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
 
 
 
410
 
411
+ # Make prediction
412
+ try:
413
+ scaled_prediction = model.predict(X_scaled, verbose=0) # Shape (1, n_ahead)
414
+ print(f"Model prediction made. Scaled prediction shape: {scaled_prediction.shape}")
415
+ except Exception as e:
416
+ print(f"Error during model prediction: {e}")
417
+ traceback.print_exc()
418
+ raise HTTPException(status_code=500, detail="Error during model prediction.")
419
 
420
 
421
+ # Inverse transform the prediction
422
+ try:
423
+ # --- Inverse Transformation Logic (Based on Rolling Median Scaling) ---
424
+ # This part needs the actual rolling median for the future prediction timesteps.
425
+ # Using an approximation based on the input sequence.
426
 
427
+ if latest_data_sequence_unscaled.shape[1] > 0:
428
+ # Get the 'calculated_aqi' values from the unscaled input sequence
429
+ calculated_aqi_sequence = latest_data_sequence_unscaled[0, :, 0] # Assuming AQI is the first feature
430
 
431
+ # Approximate the rolling median based on the last few points of the input sequence
432
+ # This is a simple approximation. A more robust method might be needed.
433
+ approx_rolling_median_proxy = np.mean(calculated_aqi_sequence[-min(5, SEQUENCE_LENGTH):])
434
+ if pd.isna(approx_rolling_median_proxy) or approx_rolling_median_proxy <= 0:
435
+ approx_rolling_median_proxy = 1.0 # Prevent division by zero or invalid scaling
436
 
437
+ # Create a placeholder scaler array for the future timesteps
438
+ corresponding_rolling_median_scaler = np.full((1, request.n_ahead, 1), approx_rolling_median_proxy, dtype=np.float32)
439
+ print(f"Approximated rolling median proxy for inverse transform: {approx_rolling_median_proxy:.2f}")
440
 
441
+ # 1. Inverse transform the scaled prediction (ratio) using the target_scaler
442
+ y_unscaled_pred_ratio = target_scaler.inverse_transform(scaled_prediction.reshape(1, request.n_ahead, 1))
443
+ print(f"Inverse transformed to ratio scale. Shape: {y_unscaled_pred_ratio.shape}")
444
 
445
+ # 2. Multiply the unscaled ratio by the approximated rolling median scaler
446
+ predicted_aqi_values = y_unscaled_pred_ratio * corresponding_rolling_median_scaler
447
+ predicted_aqi_values = predicted_aqi_values.flatten() # Shape (n_ahead,)
448
 
449
+ else:
450
+ print("Error: Input sequence is empty, cannot perform inverse transform.")
451
+ raise ValueError("Input sequence is empty.")
 
 
 
 
452
 
453
+ print(f"Final predicted AQI values: {predicted_aqi_values}")
454
 
455
+ except Exception as e:
456
+ print(f"Error during inverse transformation: {e}")
457
+ traceback.print_exc()
458
+ raise HTTPException(status_code=500, detail="Error processing prediction results (inverse transform).")
459
+
460
+ # Prepare the prediction output list
461
+ predictions_list = []
462
+ for i in range(request.n_ahead):
463
+ # Use the calculated prediction_timestamps
464
+ timestamp_str = prediction_timestamps[i].strftime('%Y-%m-%d %H:%M:%S')
465
+ predictions_list.append({
466
+ "timestamp": timestamp_str,
467
+ "aqi": float(predicted_aqi_values[i]) # Ensure AQI is a standard float
468
+ })
469
+
470
+ # Return the successful response
471
+ return PredictionResponse(status="success", message="Prediction successful.", predictions=predictions_list)
472
+
473
+ # Root endpoint for health check
474
+ @app.get("/")
475
+ async def read_root():
476
+ return {"message": "AQI Prediction API is running."}