Files changed (1) hide show
  1. app.py +304 -150
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import numpy as np
3
  import tensorflow as tf
4
  from tensorflow.keras.models import load_model
5
- from tensorflow.keras.layers import Input # Explicitly import Input
6
  # Assuming TKAN and TKAT are available after installing the respective packages
7
  from tkan import TKAN
8
  # If TKAT is from a different library, import it similarly
@@ -10,72 +10,36 @@ try:
10
  from tkat import TKAT
11
  except ImportError:
12
  print("TKAT library not found. If your model uses TKAT, make sure the library is installed.")
13
- TKAT = None # Set to None if TKAT is not available
14
 
15
  from tensorflow.keras.utils import custom_object_scope
16
- import pickle # Used for saving/loading the scaler
17
- import os # For checking file existence
18
-
19
- # --- Configuration ---
20
- MODEL_PATH = "best_model_TKAN_nahead_1 (2).keras" # Your saved model file
21
- INPUT_SCALER_PATH = "input_scaler.pkl" # Your saved input scaler file
22
- SEQUENCE_LENGTH = 24 # Matches the notebook
23
- NUM_INPUT_FEATURES = 5 # ['calculated_aqi', 'temp', 'pm25', 'pm10', 'co']
24
- N_AHEAD = 1 # Matches the notebook
25
-
26
- # --- Ensure Required Files Exist ---
27
- if not os.path.exists(MODEL_PATH):
28
- print(f"Error: Model file not found at {MODEL_PATH}")
29
- import sys
30
- sys.exit("Model file missing. Exiting.")
31
-
32
- if not os.path.exists(INPUT_SCALER_PATH):
33
- print(f"Error: Input scaler file not found at {INPUT_SCALER_PATH}")
34
- import sys
35
- sys.exit("Input scaler file missing. Exiting.")
36
-
37
-
38
- # --- Load Model and Scalers ---
39
- # Define custom objects dictionary
40
- custom_objects = {"TKAN": TKAN}
41
- if TKAT is not None:
42
- custom_objects["TKAT"] = TKAT
43
- # Add your custom MinMaxScaler to custom_objects if you are using one that you defined
44
- # in your own code (not from a library). If your scaler is from scikit-learn, you
45
- # generally don't need to include it in custom_objects for pickle loading, but if it's
46
- # a custom implementation, you do. Based on your notebook, you have a custom MinMaxScaler.
47
- # Include the custom MinMaxScaler class definition here as well.
48
 
49
  # --- Your MinMaxScaler Class (Copied from Notebook) ---
 
50
  class MinMaxScaler:
 
51
  def __init__(self, feature_axis=None, minmax_range=(0, 1)):
52
- """
53
- Initialize the MinMaxScaler.
54
- Args:
55
- feature_axis (int, optional): The axis that represents the feature dimension if applicable.
56
- Use only for 3D data to specify which axis is the feature axis.
57
- Default is None, automatically managed based on data dimensions.
58
- """
59
  self.feature_axis = feature_axis
60
  self.min_ = None
61
  self.max_ = None
62
  self.scale_ = None
63
- self.minmax_range = minmax_range # Default range for scaling (min, max)
64
 
65
  def fit(self, X):
66
- """
67
- Fit the scaler to the data based on its dimensionality.
68
- Args:
69
- X (np.array): The data to fit the scaler on.
70
- """
71
- if X.ndim == 3 and self.feature_axis is not None: # 3D data
72
  axis = tuple(i for i in range(X.ndim) if i != self.feature_axis)
73
  self.min_ = np.min(X, axis=axis)
74
  self.max_ = np.max(X, axis=axis)
75
- elif X.ndim == 2: # 2D data
76
  self.min_ = np.min(X, axis=0)
77
  self.max_ = np.max(X, axis=0)
78
- elif X.ndim == 1: # 1D data
79
  self.min_ = np.min(X)
80
  self.max_ = np.max(X)
81
  else:
@@ -85,173 +49,363 @@ class MinMaxScaler:
85
  return self
86
 
87
  def transform(self, X):
88
- """
89
- Transform the data using the fitted scaler.
90
- Args:
91
- X (np.array): The data to transform.
92
- Returns:
93
- np.array: The scaled data.
94
- """
95
  X_scaled = (X - self.min_) / self.scale_
96
  X_scaled = X_scaled * (self.minmax_range[1] - self.minmax_range[0]) + self.minmax_range[0]
97
  return X_scaled
98
 
99
  def fit_transform(self, X):
100
- """
101
- Fit to data, then transform it.
102
- Args:
103
- X (np.array): The data to fit and transform.
104
- Returns:
105
- np.array: The scaled data.
106
- """
107
  return self.fit(X).transform(X)
108
 
109
  def inverse_transform(self, X_scaled):
110
- """
111
- Inverse transform the scaled data to original data.
112
- Args:
113
- X_scaled (np.array): The scaled data to inverse transform.
114
- Returns:
115
- np.array: The original data scale.
116
- """
117
  X = (X_scaled - self.minmax_range[0]) / (self.minmax_range[1] - self.minmax_range[0])
118
  X = X * self.scale_ + self.min_
119
  return X
120
- # --- End of MinMaxScaler Class ---
121
 
122
- # Add your custom MinMaxScaler to custom_objects
123
- custom_objects["MinMaxScaler"] = MinMaxScaler
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  model = None
127
  input_scaler = None
128
- # target_scaler = None # Load if needed
129
 
130
  try:
131
- # Use custom_object_scope for both model and scaler loading
132
  with custom_object_scope(custom_objects):
133
  model = load_model(MODEL_PATH)
134
  print("Model loaded successfully!")
135
- model.summary() # Verify the model structure after loading
136
 
137
  with open(INPUT_SCALER_PATH, 'rb') as f:
138
  input_scaler = pickle.load(f)
139
  print(f"Input scaler loaded successfully from {INPUT_SCALER_PATH}")
140
 
141
- # If you also scaled your target variable and need to inverse transform the prediction,
142
- # load the target scaler here as well.
143
- # with custom_object_scope(custom_objects): # Need custom_object_scope if target scaler is custom
144
- # with open(TARGET_SCALER_PATH, 'rb') as f:
145
- # target_scaler = pickle.load(f)
146
- # print(f"Target scaler loaded successfully from {TARGET_SCALER_PATH}")
147
 
148
  except Exception as e:
149
  print(f"Error during loading: {e}")
150
  import traceback
151
  traceback.print_exc()
152
  import sys
153
- sys.exit("Failed to load model or scaler. Exiting.")
154
 
155
 
156
- # --- Data Preparation (get_latest_data_sequence needs implementation) ---
157
 
158
- def get_latest_data_sequence(sequence_length, num_features):
159
  """
160
- Retrieves the latest sequence of data for the required features.
161
-
162
- This function needs to be implemented based on your data source in the
163
- deployment environment. It should return a numpy array with shape
164
- (sequence_length, num_features).
165
 
166
  Args:
167
- sequence_length (int): The length of the historical sequence required.
168
- num_features (int): The number of features in each time step.
169
 
170
  Returns:
171
  np.ndarray: A numpy array containing the historical data sequence.
172
- Shape: (sequence_length, num_features)
 
173
  """
174
- print("WARNING: Using dummy data sequence. Implement get_latest_data_sequence.")
175
- # --- REPLACE THIS WITH YOUR ACTUAL DATA RETRIEVAL LOGIC ---
176
- # The data should be in the correct order (oldest to newest time step).
177
- # The columns should be in the order ['calculated_aqi', 'temp', 'pm25', 'pm10', 'co'].
178
-
179
- # For now, returning a placeholder with the correct shape.
180
- dummy_data = np.zeros((sequence_length, num_features))
181
- # Populate dummy_data with some values for testing if you can load historical data
182
- # Example: If you saved a sample of X_test_unscaled, load it here temporarily.
183
- # You need to ensure this dummy data has the correct structure and feature order.
184
- return dummy_data
185
- # --- END OF PLACEHOLDER ---
186
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  # --- Define Predict Function ---
189
-
190
- def predict(): # Modify inputs as needed based on how you get data
191
  """
192
- Retrieves the latest data sequence, preprocesses it, and makes a prediction.
193
-
194
- The Gradio interface will need to trigger this function.
195
  """
196
- if model is None or input_scaler is None:
197
- return "Model or scaler not loaded. Check logs."
 
 
 
198
 
199
- # 1. Get the latest historical data sequence
200
- latest_data_sequence = get_latest_data_sequence(SEQUENCE_LENGTH, NUM_INPUT_FEATURES)
201
 
202
- # Ensure the retrieved data has the correct shape
203
  if latest_data_sequence.shape != (SEQUENCE_LENGTH, NUM_INPUT_FEATURES):
204
  return f"Error: Retrieved data has incorrect shape {latest_data_sequence.shape}. Expected ({SEQUENCE_LENGTH}, {NUM_INPUT_FEATURES})."
205
 
 
206
  # 2. Scale the data sequence using the loaded input scaler
207
- # Your MinMaxScaler from the notebook had feature_axis=2 for 3D data (samples, sequence, features).
208
- # So, for a single sequence (2D array), you need to add a batch dimension (1) before scaling.
209
  latest_data_sequence_with_batch = latest_data_sequence[np.newaxis, :, :]
210
  scaled_input_data = input_scaler.transform(latest_data_sequence_with_batch)
211
 
212
- # 3. Perform prediction
213
- # The model expects input shape (batch_size, sequence_length, num_features)
214
  output = model.predict(scaled_input_data)
215
 
216
- # 4. Process the output
217
- # The output shape is (batch_size, n_ahead). Since n_ahead=1, shape is (batch_size, 1).
218
- predicted_scaled_value = output[0][0] # Get the first prediction for the first sample
219
-
220
- # 5. Inverse transform the prediction if the target was scaled
221
- # If you scaled the target variable (calculated_aqi) before training,
222
- # you need to inverse transform the prediction back to the original scale.
223
- # This requires saving and loading the target_scaler as well and using it here.
224
-
225
- # Example if you need to inverse transform the target:
226
- if target_scaler is not None:
227
- # # Need to put the single predicted value into an array with the shape
228
- # # that the target_scaler's inverse_transform expects.
229
- # # Assuming y_scaler was fitted on a shape like (samples, n_ahead, 1) or (samples, 1)
230
- # # and inverse_transform works on a similar shape.
231
- # # If y_train shape was (samples, n_ahead):
232
- predicted_original_scale = target_scaler.inverse_transform(np.array([[predicted_scaled_value]]))[0][0]
233
- # # If y_train shape was (samples, n_ahead, 1):
234
- # # predicted_original_scale = target_scaler.inverse_transform(np.array([[[predicted_scaled_value]]]))[0][0][0]
235
- # pass # Implement the correct inverse transform based on how y_scaler was used
236
- else:
237
- predicted_original_scale = predicted_scaled_value
238
- predicted_value = predicted_original_scale
239
 
240
- # For now, assuming the model outputs directly in the desired scale or
241
- # you handle inverse transformation elsewhere if needed.
242
- # predicted_value = predicted_scaled_value # Adjust this if inverse transformation is needed
243
 
244
- return float(predicted_value)
245
 
 
246
 
247
  # --- Gradio Interface ---
248
- # Keep inputs=None as the predict function gets data internally.
249
  interface = gr.Interface(
250
  fn=predict,
251
- inputs=None, # `predict` function doesn't take direct inputs from Gradio
252
  outputs=gr.Number(label=f"Predicted AQI (Next {N_AHEAD} Hour(s))")
253
  )
254
 
 
255
  # --- Launch Gradio Interface ---
256
  if __name__ == "__main__":
257
  interface.launch()
 
2
  import numpy as np
3
  import tensorflow as tf
4
  from tensorflow.keras.models import load_model
5
+ from tensorflow.keras.layers import Input
6
  # Assuming TKAN and TKAT are available after installing the respective packages
7
  from tkan import TKAN
8
  # If TKAT is from a different library, import it similarly
 
10
  from tkat import TKAT
11
  except ImportError:
12
  print("TKAT library not found. If your model uses TKAT, make sure the library is installed.")
13
+ TKAT = None
14
 
15
  from tensorflow.keras.utils import custom_object_scope
16
+ import pickle
17
+ import os
18
+ import requests
19
+ import pandas as pd
20
+ from datetime import datetime, timedelta, timezone
21
+ import pytz # For timezone handling
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # --- Your MinMaxScaler Class (Copied from Notebook) ---
24
+ # (Keep the MinMaxScaler class definition here as before)
25
  class MinMaxScaler:
26
+ # ... (MinMaxScaler class definition) ...
27
  def __init__(self, feature_axis=None, minmax_range=(0, 1)):
 
 
 
 
 
 
 
28
  self.feature_axis = feature_axis
29
  self.min_ = None
30
  self.max_ = None
31
  self.scale_ = None
32
+ self.minmax_range = minmax_range
33
 
34
  def fit(self, X):
35
+ if X.ndim == 3 and self.feature_axis is not None:
 
 
 
 
 
36
  axis = tuple(i for i in range(X.ndim) if i != self.feature_axis)
37
  self.min_ = np.min(X, axis=axis)
38
  self.max_ = np.max(X, axis=axis)
39
+ elif X.ndim == 2:
40
  self.min_ = np.min(X, axis=0)
41
  self.max_ = np.max(X, axis=0)
42
+ elif X.ndim == 1:
43
  self.min_ = np.min(X)
44
  self.max_ = np.max(X)
45
  else:
 
49
  return self
50
 
51
  def transform(self, X):
 
 
 
 
 
 
 
52
  X_scaled = (X - self.min_) / self.scale_
53
  X_scaled = X_scaled * (self.minmax_range[1] - self.minmax_range[0]) + self.minmax_range[0]
54
  return X_scaled
55
 
56
  def fit_transform(self, X):
 
 
 
 
 
 
 
57
  return self.fit(X).transform(X)
58
 
59
  def inverse_transform(self, X_scaled):
 
 
 
 
 
 
 
60
  X = (X_scaled - self.minmax_range[0]) / (self.minmax_range[1] - self.minmax_range[0])
61
  X = X * self.scale_ + self.min_
62
  return X
 
63
 
 
 
64
 
65
+ # --- AQI Breakpoints and Calculation Logic (Copied from Notebook) ---
66
+ # (Keep the aqi_breakpoints and calculate_overall_aqi functions here as before)
67
+ aqi_breakpoints = {
68
+ 'pm25': [(0, 50, 0, 50), (51, 100, 51, 100), (101, 200, 101, 200), (201, 300, 201, 300)],
69
+ 'pm10': [(0, 50, 0, 50), (51, 100, 51, 100), (101, 250, 101, 200), (251, 350, 201, 300)],
70
+ 'co': [(0, 1.0, 0, 50), (1.1, 2.0, 51, 100), (2.1, 10.0, 101, 200), (10.1, 17.0, 201, 300)]
71
+ }
72
+
73
+ def calculate_sub_aqi(concentration, breakpoints):
74
+ for i_low, i_high, c_low, c_high in breakpoints:
75
+ if c_low <= concentration <= c_high:
76
+ if c_high == c_low:
77
+ return i_low
78
+ return ((i_high - i_low) / (c_high - c_low)) * (concentration - c_low) + i_low
79
+ if concentration < breakpoints[0][2]:
80
+ return breakpoints[0][0]
81
+ elif concentration > breakpoints[-1][3]:
82
+ return breakpoints[-1][1]
83
+ else:
84
+ return np.nan
85
+
86
+ def calculate_overall_aqi(row, aqi_breakpoints):
87
+ sub_aqis = []
88
+ pollutant_mapping = {
89
+ 'pm2_5': 'pm25',
90
+ 'pm10': 'pm10',
91
+ 'carbon_monoxide': 'co',
92
+ }
93
+ for api_pollutant, internal_pollutant in pollutant_mapping.items():
94
+ concentration = row.get(api_pollutant, np.nan)
95
+ if not np.isnan(concentration):
96
+ sub_aqi = calculate_sub_aqi(concentration, aqi_breakpoints.get(internal_pollutant, []))
97
+ sub_aqis.append(sub_aqi)
98
+ else:
99
+ sub_aqis.append(np.nan)
100
+ return np.nanmax(sub_aqis) if sub_aqis and not all(np.isnan(sub_aqis)) else np.nan
101
+
102
+
103
+ # --- Configuration ---
104
+ MODEL_PATH = "best_model_TKAN_nahead_1 (2).keras"
105
+ INPUT_SCALER_PATH = "input_scaler.pkl"
106
+ TARGET_SCALER_PATH = "target_scaler.pkl"
107
+ SEQUENCE_LENGTH = 24 # Matches the notebook
108
+ NUM_INPUT_FEATURES = 5 # ['calculated_aqi', 'temp', 'pm25', 'pm10', 'co']
109
+ N_AHEAD = 1 # Matches the notebook
110
+
111
+ # --- Open-Meteo API Configuration ---
112
+ OPENMETEO_AIR_QUALITY_API_URL = "https://air-quality-api.open-meteo.com/v1/air-quality"
113
+ # You will also need the standard weather API for temperature
114
+ OPENMETEO_WEATHER_API_URL = "https://api.open-meteo.com/v1/forecast" # Using forecast for recent hourly data
115
+ # Replace with the actual latitude and longitude for your location
116
+ LATITUDE = 17.33
117
+ LONGITUDE = 78.27
118
+ AIR_QUALITY_PARAMETERS = ["pm10", "pm2_5", "carbon_monoxide"]
119
+ WEATHER_PARAMETERS_FOR_TEMP = ["temperature_2m"] # Parameter name for temperature
120
+ TIMEZONE = "auto"
121
+
122
+ # --- Ensure Required Files Exist ---
123
+ # (Keep the file existence checks here as before)
124
+ if not os.path.exists(MODEL_PATH):
125
+ print(f"Error: Model file not found at {MODEL_PATH}")
126
+ import sys
127
+ sys.exit("Model file missing. Exiting.")
128
+
129
+ if not os.path.exists(INPUT_SCALER_PATH):
130
+ print(f"Error: Input scaler file not found at {INPUT_SCALER_PATH}")
131
+ import sys
132
+ sys.exit("Input scaler file missing. Exiting.")
133
+
134
+ if not os.path.exists(TARGET_SCALER_PATH):
135
+ print(f"Error: Target scaler file not found at {TARGET_SCALER_PATH}")
136
+ import sys
137
+ sys.exit("Target scaler file missing. Exiting.")
138
+
139
+
140
+ # --- Load Model and Scalers ---
141
+ # (Keep the loading logic here as before)
142
+ custom_objects = {"TKAN": TKAN, "MinMaxScaler": MinMaxScaler}
143
+ if TKAT is not None:
144
+ custom_objects["TKAT"] = TKAT
145
 
146
  model = None
147
  input_scaler = None
148
+ target_scaler = None
149
 
150
  try:
 
151
  with custom_object_scope(custom_objects):
152
  model = load_model(MODEL_PATH)
153
  print("Model loaded successfully!")
154
+ model.summary()
155
 
156
  with open(INPUT_SCALER_PATH, 'rb') as f:
157
  input_scaler = pickle.load(f)
158
  print(f"Input scaler loaded successfully from {INPUT_SCALER_PATH}")
159
 
160
+ with open(TARGET_SCALER_PATH, 'rb') as f:
161
+ target_scaler = pickle.load(f)
162
+ print(f"Target scaler loaded successfully from {TARGET_SCALER_PATH}")
 
 
 
163
 
164
  except Exception as e:
165
  print(f"Error during loading: {e}")
166
  import traceback
167
  traceback.print_exc()
168
  import sys
169
+ sys.exit("Failed to load model or scaler(s). Exiting.")
170
 
171
 
172
+ # --- Data Retrieval from Open-Meteo API ---
173
 
174
+ def get_latest_data_sequence(sequence_length):
175
  """
176
+ Retrieves the latest sequence of air quality and temperature data from Open-Meteo
177
+ for the previous `sequence_length` hours based on the current hour,
178
+ calculates historical AQI, and formats it for model input.
 
 
179
 
180
  Args:
181
+ sequence_length (int): The length of the historical sequence required (e.g., 24).
 
182
 
183
  Returns:
184
  np.ndarray: A numpy array containing the historical data sequence.
185
+ Shape: (sequence_length, NUM_INPUT_FEATURES)
186
+ Returns None or raises an error on failure.
187
  """
188
+ print(f"Attempting to retrieve data for the last {sequence_length} hours from Open-Meteo...")
189
+
190
+ # Determine the exact start and end time for the last `sequence_length` hours
191
+ # The API uses YYYY-MM-DD format for dates.
192
+ # We need data from the hour `sequence_length` hours ago up to the current completed hour.
193
+ now_utc = datetime.now(timezone.utc)
194
+ # Round down to the nearest hour
195
+ current_hour_utc = now_utc.replace(minute=0, second=0, microsecond=0)
196
+ # The end date for the API request is the current date
197
+ end_date_api = current_hour_utc.strftime('%Y-%m-%d')
198
+ # The start date is `sequence_length` hours before the *start* of the current hour.
199
+ # So, `sequence_length` hours before `current_hour_utc`.
200
+ start_time_utc = current_hour_utc - timedelta(hours=sequence_length)
201
+ start_date_api = start_time_utc.strftime('%Y-%m-%d')
202
+
203
+ # --- Fetch Air Quality Data ---
204
+ aq_params = {
205
+ "latitude": LATITUDE,
206
+ "longitude": LONGITUDE,
207
+ "hourly": ",".join(AIR_QUALITY_PARAMETERS),
208
+ "timezone": TIMEZONE,
209
+ "start_date": start_date_api,
210
+ "end_date": end_date_api,
211
+ "domains": "auto"
212
+ }
213
+
214
+ try:
215
+ aq_response = requests.get(OPENMETEO_AIR_QUALITY_API_URL, params=aq_params)
216
+ aq_response.raise_for_status()
217
+ aq_data = aq_response.json()
218
+ print("Air quality data retrieved.")
219
+
220
+ if 'hourly' not in aq_data or 'time' not in aq_data['hourly']:
221
+ print("Error: 'hourly' or 'time' not found in AQ response.")
222
+ return None
223
+
224
+ aq_hourly_data = aq_data['hourly']
225
+ aq_timestamps = aq_hourly_data['time']
226
+ aq_extracted_data = {param: aq_hourly_data.get(param, []) for param in AIR_QUALITY_PARAMETERS}
227
+
228
+ df_aq = pd.DataFrame(aq_extracted_data, index=pd.to_datetime(aq_timestamps))
229
+
230
+ except requests.exceptions.RequestException as e:
231
+ print(f"Error fetching air quality data: {e}")
232
+ return None
233
+ except Exception as e:
234
+ print(f"Error processing air quality data: {e}")
235
+ import traceback
236
+ traceback.print_exc()
237
+ return None
238
+
239
+ # --- Fetch Temperature Data ---
240
+ temp_params = {
241
+ "latitude": LATITUDE,
242
+ "longitude": LONGITUDE,
243
+ "hourly": ",".join(WEATHER_PARAMETERS_FOR_TEMP),
244
+ "timezone": TIMEZONE,
245
+ "start_date": start_date_api,
246
+ "end_date": end_date_api,
247
+ "models": "best_match"
248
+ }
249
+
250
+ try:
251
+ temp_response = requests.get(OPENMETEO_WEATHER_API_URL, params=temp_params)
252
+ temp_response.raise_for_status()
253
+ temp_data = temp_response.json()
254
+ print("Temperature data retrieved.")
255
+
256
+ if 'hourly' not in temp_data or 'time' not in temp_data['hourly']:
257
+ print("Error: 'hourly' or 'time' not found in temperature response.")
258
+ # Decide how to handle missing temperature data - return None, fill with NaNs, etc.
259
+ print("Skipping temperature data due to missing fields.")
260
+ df_temp = pd.DataFrame(index=df_aq.index) # Create empty DataFrame with AQ index
261
+ for param in WEATHER_PARAMETERS_FOR_TEMP:
262
+ df_temp[param] = np.nan # Add NaN columns for expected temperature parameters
263
+ else:
264
+ temp_hourly_data = temp_data['hourly']
265
+ temp_timestamps = temp_hourly_data['time']
266
+ temp_extracted_data = {param: temp_hourly_data.get(param, []) for param in WEATHER_PARAMETERS_FOR_TEMP}
267
+
268
+ df_temp = pd.DataFrame(temp_extracted_data, index=pd.to_datetime(temp_timestamps))
269
+
270
+ except requests.exceptions.RequestException as e:
271
+ print(f"Error fetching temperature data: {e}")
272
+ print("Skipping temperature data due to API error.")
273
+ df_temp = pd.DataFrame(index=df_aq.index) # Create empty DataFrame with AQ index
274
+ for param in WEATHER_PARAMETERS_FOR_TEMP:
275
+ df_temp[param] = np.nan # Add NaN columns for expected temperature parameters
276
+ except Exception as e:
277
+ print(f"Error processing temperature data: {e}")
278
+ import traceback
279
+ traceback.print_exc()
280
+ print("Skipping temperature data due to processing error.")
281
+ df_temp = pd.DataFrame(index=df_aq.index) # Create empty DataFrame with AQ index
282
+ for param in WEATHER_PARAMETERS_FOR_TEMP:
283
+ df_temp[param] = np.nan # Add NaN columns for expected temperature parameters
284
+
285
+
286
+ # --- Merge DataFrames ---
287
+ # Merge air quality and temperature data based on timestamp
288
+ df_merged = pd.merge(df_aq, df_temp, left_index=True, right_index=True, how='outer')
289
+
290
+ # --- Calculate Historical AQI ---
291
+ # Calculate the 'calculated_aqi' for each row using your function
292
+ df_merged['calculated_aqi'] = df_merged.apply(
293
+ lambda row: calculate_overall_aqi(
294
+ {'pm2_5': row.get('pm2_5'), 'pm10': row.get('pm10'), 'carbon_monoxide': row.get('carbon_monoxide')},
295
+ aqi_breakpoints
296
+ ),
297
+ axis=1
298
+ )
299
+
300
+ # --- Process and Filter Merged Data ---
301
+ # Ensure the index is a proper datetime index and sort
302
+ df_merged.index = pd.to_datetime(df_merged.index)
303
+ df_merged.sort_index(inplace=True)
304
+
305
+ # Resample to ensure hourly frequency and fill missing gaps
306
+ # Use forward fill then backward fill for robustness
307
+ df_processed = df_merged.resample('H').ffill().bfill()
308
+
309
+ # Filter to the exact time range for the sequence (last SEQUENCE_LENGTH hours)
310
+ # Find the timestamp corresponding to the start of the desired sequence
311
+ # We want the `sequence_length` hours ending at `current_hour_utc`
312
+ sequence_start_time_utc = current_hour_utc - timedelta(hours=sequence_length -1)
313
+
314
+ # Filter the DataFrame to include only the timestamps within the sequence
315
+ # Use loc with inclusive endpoints
316
+ df_sequence = df_processed.loc[sequence_start_time_utc:current_hour_utc]
317
+
318
+ # Ensure you have exactly SEQUENCE_LENGTH data points
319
+ if len(df_sequence) != sequence_length:
320
+ print(f"Error: Retrieved and processed data length ({len(df_sequence)}) does not match sequence length ({sequence_length}).")
321
+ print(f"Expected timestamps from {sequence_start_time_utc} to {current_hour_utc}. Got {df_sequence.index.min()} to {df_sequence.index.max()}.")
322
+ print("Check API request time range and data availability.")
323
+ return None
324
+
325
+ # Reorder columns to match your model's expected input feature order:
326
+ # ['calculated_aqi', 'temp', 'pm25', 'pm10', 'co']
327
+ # Ensure 'temp' is the column from temperature_2m, and pollutant names are mapped.
328
+
329
+ # Rename Open-Meteo columns to match your model's expected feature names
330
+ # (This mapping was partly in calculate_overall_aqi, but needed for the DataFrame columns)
331
+ column_rename_map = {
332
+ 'temperature_2m': 'temp',
333
+ 'pm2_5': 'pm25',
334
+ 'pm10': 'pm10',
335
+ 'carbon_monoxide': 'co',
336
+ # 'calculated_aqi' is already correct after calculation
337
+ }
338
+ df_sequence.rename(columns=column_rename_map, inplace=True)
339
+
340
+ # Ensure all expected features are present and in the correct order
341
+ model_features_order = ['calculated_aqi', 'temp', 'pm25', 'pm10', 'co']
342
+ missing_columns = [col for col in model_features_order if col not in df_sequence.columns]
343
+ if missing_columns:
344
+ print(f"Error: Missing required columns in final sequence data: {missing_columns}")
345
+ print("Ensure all expected features are fetched and named correctly.")
346
+ return None
347
+
348
+ # Select and reorder columns to match the model's expected input
349
+ df_final_sequence = df_sequence[model_features_order]
350
+
351
+ # Convert to numpy array
352
+ data_sequence = df_final_sequence.values
353
+
354
+ # Ensure the final numpy array has the correct shape (redundant but safe)
355
+ if data_sequence.shape != (sequence_length, NUM_INPUT_FEATURES):
356
+ print(f"Error: Final data sequence shape {data_sequence.shape} does not match expected shape ({sequence_length}, {NUM_INPUT_FEATURES}).")
357
+ return None
358
+
359
+ print(f"Successfully prepared data sequence with shape {data_sequence.shape}")
360
+ return data_sequence
361
 
362
  # --- Define Predict Function ---
363
+ # (Keep the predict function as before, it calls get_latest_data_sequence)
364
+ def predict():
365
  """
366
+ Retrieves the latest data sequence from Open-Meteo, preprocesses it,
367
+ and makes a prediction.
 
368
  """
369
+ if model is None or input_scaler is None or target_scaler is None:
370
+ return "Model or scaler(s) not loaded. Check logs."
371
+
372
+ # 1. Get the latest historical data sequence from Open-Meteo
373
+ latest_data_sequence = get_latest_data_sequence(SEQUENCE_LENGTH)
374
 
375
+ if latest_data_sequence is None:
376
+ return "Failed to retrieve or process latest data sequence."
377
 
378
+ # Ensure the retrieved data has the correct shape (redundant check, but safe)
379
  if latest_data_sequence.shape != (SEQUENCE_LENGTH, NUM_INPUT_FEATURES):
380
  return f"Error: Retrieved data has incorrect shape {latest_data_sequence.shape}. Expected ({SEQUENCE_LENGTH}, {NUM_INPUT_FEATURES})."
381
 
382
+
383
  # 2. Scale the data sequence using the loaded input scaler
 
 
384
  latest_data_sequence_with_batch = latest_data_sequence[np.newaxis, :, :]
385
  scaled_input_data = input_scaler.transform(latest_data_sequence_with_batch)
386
 
387
+ # 3. Perform prediction (outputs scaled target)
 
388
  output = model.predict(scaled_input_data)
389
 
390
+ # 4. Process the output (get the scaled predicted value)
391
+ predicted_scaled_value = output[0][0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
392
 
393
+ # 5. Inverse transform the prediction using the target scaler
394
+ predicted_original_scale = target_scaler.inverse_transform(np.array([[predicted_scaled_value]]))[0][0]
 
395
 
396
+ predicted_value = predicted_original_scale
397
 
398
+ return float(predicted_value)
399
 
400
  # --- Gradio Interface ---
401
+ # (Keep the Gradio interface as before, inputs=None)
402
  interface = gr.Interface(
403
  fn=predict,
404
+ inputs=None,
405
  outputs=gr.Number(label=f"Predicted AQI (Next {N_AHEAD} Hour(s))")
406
  )
407
 
408
+
409
  # --- Launch Gradio Interface ---
410
  if __name__ == "__main__":
411
  interface.launch()