Spaces:

nikethanreddy
/

project

Sleeping

App Files Files Community

nikethanreddy

dsid271 commited on Jun 1, 2025

Commit

3c57246

verified ·

1 Parent(s): 59531d0

Update app.py (#10)

Browse files

- Update app.py (8a3817e742901dcb08b021086cab560022f34308)

Co-authored-by: SIDHARTHA D <dsid271@users.noreply.huggingface.co>

Files changed (1) hide show

app.py +371 -306

app.py CHANGED Viewed

@@ -1,29 +1,31 @@
-import gradio as gr
 import numpy as np
 import tensorflow as tf
 from tensorflow.keras.models import load_model
 from tensorflow.keras.layers import Input
-# Assuming TKAN and TKAT are available after installing the respective packages
-from tkan import TKAN
-# If TKAT is from a different library, import it similarly
-try:
-    from tkat import TKAT
-except ImportError:
-    print("TKAT library not found. If your model uses TKAT, make sure the library is installed.")
-    TKAT = None
 from tensorflow.keras.utils import custom_object_scope
 import pickle
 import os
 import requests
 import pandas as pd
 from datetime import datetime, timedelta, timezone
-import pytz # For timezone handling
 # --- Your MinMaxScaler Class (Copied from Notebook) ---
-# (Keep the MinMaxScaler class definition here as before)
 class MinMaxScaler:
-    # ... (MinMaxScaler class definition) ...
     def __init__(self, feature_axis=None, minmax_range=(0, 1)):
         self.feature_axis = feature_axis
         self.min_ = None
@@ -49,21 +51,20 @@ class MinMaxScaler:
         return self
     def transform(self, X):
         X_scaled = (X - self.min_) / self.scale_
         X_scaled = X_scaled * (self.minmax_range[1] - self.minmax_range[0]) + self.minmax_range[0]
         return X_scaled
-    def fit_transform(self, X):
-        return self.fit(X).transform(X)
     def inverse_transform(self, X_scaled):
         X = (X_scaled - self.minmax_range[0]) / (self.minmax_range[1] - self.minmax_range[0])
         X = X * self.scale_ + self.min_
         return X
-# --- AQI Breakpoints and Calculation Logic (Copied from Notebook) ---
-# (Keep the aqi_breakpoints and calculate_overall_aqi functions here as before)
 aqi_breakpoints = {
     'pm25': [(0, 50, 0, 50), (51, 100, 51, 100), (101, 200, 101, 200), (201, 300, 201, 300)],
     'pm10': [(0, 50, 0, 50), (51, 100, 51, 100), (101, 250, 101, 200), (251, 350, 201, 300)],
@@ -85,327 +86,391 @@ def calculate_sub_aqi(concentration, breakpoints):
 def calculate_overall_aqi(row, aqi_breakpoints):
     sub_aqis = []
     pollutant_mapping = {
-        'pm2_5': 'pm25',
         'pm10': 'pm10',
-        'carbon_monoxide': 'co',
     }
     for api_pollutant, internal_pollutant in pollutant_mapping.items():
-        concentration = row.get(api_pollutant, np.nan)
-        if not np.isnan(concentration):
-            sub_aqi = calculate_sub_aqi(concentration, aqi_breakpoints.get(internal_pollutant, []))
-            sub_aqis.append(sub_aqi)
         else:
-            sub_aqis.append(np.nan)
-    return np.nanmax(sub_aqis) if sub_aqis and not all(np.isnan(sub_aqis)) else np.nan
-# --- Configuration ---
-MODEL_PATH = "best_model_TKAN_nahead_1 (2).keras"
-INPUT_SCALER_PATH = "input_scaler.pkl"
-TARGET_SCALER_PATH = "target_scaler.pkl"
-SEQUENCE_LENGTH = 24 # Matches the notebook
-NUM_INPUT_FEATURES = 5 # ['calculated_aqi', 'temp', 'pm25', 'pm10', 'co']
-N_AHEAD = 1 # Matches the notebook
-# --- Open-Meteo API Configuration ---
-OPENMETEO_AIR_QUALITY_API_URL = "https://air-quality-api.open-meteo.com/v1/air-quality"
-# You will also need the standard weather API for temperature
-OPENMETEO_WEATHER_API_URL = "https://api.open-meteo.com/v1/forecast" # Using forecast for recent hourly data
-# Replace with the actual latitude and longitude for your location
-LATITUDE = 17.33
-LONGITUDE = 78.27
-AIR_QUALITY_PARAMETERS = ["pm10", "pm2_5", "carbon_monoxide"]
-WEATHER_PARAMETERS_FOR_TEMP = ["temperature_2m"] # Parameter name for temperature
-TIMEZONE = "auto"
-# --- Ensure Required Files Exist ---
-# (Keep the file existence checks here as before)
-if not os.path.exists(MODEL_PATH):
-    print(f"Error: Model file not found at {MODEL_PATH}")
-    import sys
-    sys.exit("Model file missing. Exiting.")
-if not os.path.exists(INPUT_SCALER_PATH):
-    print(f"Error: Input scaler file not found at {INPUT_SCALER_PATH}")
-    import sys
-    sys.exit("Input scaler file missing. Exiting.")
-if not os.path.exists(TARGET_SCALER_PATH):
-    print(f"Error: Target scaler file not found at {TARGET_SCALER_PATH}")
-    import sys
-    sys.exit("Target scaler file missing. Exiting.")
-# --- Load Model and Scalers ---
-# (Keep the loading logic here as before)
-custom_objects = {"TKAN": TKAN, "MinMaxScaler": MinMaxScaler}
-if TKAT is not None:
-    custom_objects["TKAT"] = TKAT
-model = None
 input_scaler = None
-target_scaler = None
 try:
-    with custom_object_scope(custom_objects):
-        model = load_model(MODEL_PATH)
-        print("Model loaded successfully!")
-        model.summary()
-        with open(INPUT_SCALER_PATH, 'rb') as f:
-            input_scaler = pickle.load(f)
-        print(f"Input scaler loaded successfully from {INPUT_SCALER_PATH}")
-        with open(TARGET_SCALER_PATH, 'rb') as f:
-            target_scaler = pickle.load(f)
-        print(f"Target scaler loaded successfully from {TARGET_SCALER_PATH}")
 except Exception as e:
-    print(f"Error during loading: {e}")
-    import traceback
     traceback.print_exc()
-    import sys
-    sys.exit("Failed to load model or scaler(s). Exiting.")
-# --- Data Retrieval from Open-Meteo API ---
-def get_latest_data_sequence(sequence_length):
-    """
-    Retrieves the latest sequence of air quality and temperature data from Open-Meteo
-    for the previous `sequence_length` hours based on the current hour,
-    calculates historical AQI, and formats it for model input.
-    Args:
-        sequence_length (int): The length of the historical sequence required (e.g., 24).
-    Returns:
-        np.ndarray: A numpy array containing the historical data sequence.
-                    Shape: (sequence_length, NUM_INPUT_FEATURES)
-                    Returns None or raises an error on failure.
-    """
-    print(f"Attempting to retrieve data for the last {sequence_length} hours from Open-Meteo...")
-    # Determine the exact start and end time for the last `sequence_length` hours
-    # The API uses YYYY-MM-DD format for dates.
-    # We need data from the hour `sequence_length` hours ago up to the current completed hour.
-    now_utc = datetime.now(timezone.utc)
-    # Round down to the nearest hour
-    current_hour_utc = now_utc.replace(minute=0, second=0, microsecond=0)
-    # The end date for the API request is the current date
-    end_date_api = current_hour_utc.strftime('%Y-%m-%d')
-    # The start date is `sequence_length` hours before the *start* of the current hour.
-    # So, `sequence_length` hours before `current_hour_utc`.
-    start_time_utc = current_hour_utc - timedelta(hours=sequence_length)
-    start_date_api = start_time_utc.strftime('%Y-%m-%d')
-    # --- Fetch Air Quality Data ---
-    aq_params = {
-        "latitude": LATITUDE,
-        "longitude": LONGITUDE,
-        "hourly": ",".join(AIR_QUALITY_PARAMETERS),
-        "timezone": TIMEZONE,
-        "start_date": start_date_api,
-        "end_date": end_date_api,
-         "domains": "auto"
-    }
-    try:
-        aq_response = requests.get(OPENMETEO_AIR_QUALITY_API_URL, params=aq_params)
-        aq_response.raise_for_status()
-        aq_data = aq_response.json()
-        print("Air quality data retrieved.")
-        if 'hourly' not in aq_data or 'time' not in aq_data['hourly']:
-            print("Error: 'hourly' or 'time' not found in AQ response.")
-            return None
-        aq_hourly_data = aq_data['hourly']
-        aq_timestamps = aq_hourly_data['time']
-        aq_extracted_data = {param: aq_hourly_data.get(param, []) for param in AIR_QUALITY_PARAMETERS}
-        df_aq = pd.DataFrame(aq_extracted_data, index=pd.to_datetime(aq_timestamps))
-    except requests.exceptions.RequestException as e:
-        print(f"Error fetching air quality data: {e}")
-        return None
-    except Exception as e:
-        print(f"Error processing air quality data: {e}")
-        import traceback
-        traceback.print_exc()
-        return None
-    # --- Fetch Temperature Data ---
-    temp_params = {
-        "latitude": LATITUDE,
-        "longitude": LONGITUDE,
-        "hourly": ",".join(WEATHER_PARAMETERS_FOR_TEMP),
-        "timezone": TIMEZONE,
-        "start_date": start_date_api,
-        "end_date": end_date_api,
-         "models": "best_match"
-    }
-    try:
-        temp_response = requests.get(OPENMETEO_WEATHER_API_URL, params=temp_params)
-        temp_response.raise_for_status()
-        temp_data = temp_response.json()
-        print("Temperature data retrieved.")
-        if 'hourly' not in temp_data or 'time' not in temp_data['hourly']:
-            print("Error: 'hourly' or 'time' not found in temperature response.")
-            # Decide how to handle missing temperature data - return None, fill with NaNs, etc.
-            print("Skipping temperature data due to missing fields.")
-            df_temp = pd.DataFrame(index=df_aq.index) # Create empty DataFrame with AQ index
-            for param in WEATHER_PARAMETERS_FOR_TEMP:
-                 df_temp[param] = np.nan # Add NaN columns for expected temperature parameters
         else:
-            temp_hourly_data = temp_data['hourly']
-            temp_timestamps = temp_hourly_data['time']
-            temp_extracted_data = {param: temp_hourly_data.get(param, []) for param in WEATHER_PARAMETERS_FOR_TEMP}
-            df_temp = pd.DataFrame(temp_extracted_data, index=pd.to_datetime(temp_timestamps))
-    except requests.exceptions.RequestException as e:
-        print(f"Error fetching temperature data: {e}")
-        print("Skipping temperature data due to API error.")
-        df_temp = pd.DataFrame(index=df_aq.index) # Create empty DataFrame with AQ index
-        for param in WEATHER_PARAMETERS_FOR_TEMP:
-             df_temp[param] = np.nan # Add NaN columns for expected temperature parameters
     except Exception as e:
-        print(f"Error processing temperature data: {e}")
-        import traceback
         traceback.print_exc()
-        print("Skipping temperature data due to processing error.")
-        df_temp = pd.DataFrame(index=df_aq.index) # Create empty DataFrame with AQ index
-        for param in WEATHER_PARAMETERS_FOR_TEMP:
-             df_temp[param] = np.nan # Add NaN columns for expected temperature parameters
-    # --- Merge DataFrames ---
-    # Merge air quality and temperature data based on timestamp
-    df_merged = pd.merge(df_aq, df_temp, left_index=True, right_index=True, how='outer')
-    # --- Calculate Historical AQI ---
-    # Calculate the 'calculated_aqi' for each row using your function
-    df_merged['calculated_aqi'] = df_merged.apply(
-        lambda row: calculate_overall_aqi(
-            {'pm2_5': row.get('pm2_5'), 'pm10': row.get('pm10'), 'carbon_monoxide': row.get('carbon_monoxide')},
-            aqi_breakpoints
-        ),
-        axis=1
-    )
-    # --- Process and Filter Merged Data ---
-    # Ensure the index is a proper datetime index and sort
-    df_merged.index = pd.to_datetime(df_merged.index)
-    df_merged.sort_index(inplace=True)
-    # Resample to ensure hourly frequency and fill missing gaps
-    # Use forward fill then backward fill for robustness
-    df_processed = df_merged.resample('H').ffill().bfill()
-    # Filter to the exact time range for the sequence (last SEQUENCE_LENGTH hours)
-    # Find the timestamp corresponding to the start of the desired sequence
-    # We want the `sequence_length` hours ending at `current_hour_utc`
-    sequence_start_time_utc = current_hour_utc - timedelta(hours=sequence_length -1)
-    # Filter the DataFrame to include only the timestamps within the sequence
-    # Use loc with inclusive endpoints
-    df_sequence = df_processed.loc[sequence_start_time_utc:current_hour_utc]
-    # Ensure you have exactly SEQUENCE_LENGTH data points
-    if len(df_sequence) != sequence_length:
-         print(f"Error: Retrieved and processed data length ({len(df_sequence)}) does not match sequence length ({sequence_length}).")
-         print(f"Expected timestamps from {sequence_start_time_utc} to {current_hour_utc}. Got {df_sequence.index.min()} to {df_sequence.index.max()}.")
-         print("Check API request time range and data availability.")
-         return None
-    # Reorder columns to match your model's expected input feature order:
-    # ['calculated_aqi', 'temp', 'pm25', 'pm10', 'co']
-    # Ensure 'temp' is the column from temperature_2m, and pollutant names are mapped.
-    # Rename Open-Meteo columns to match your model's expected feature names
-    # (This mapping was partly in calculate_overall_aqi, but needed for the DataFrame columns)
-    column_rename_map = {
-        'temperature_2m': 'temp',
-        'pm2_5': 'pm25',
-        'pm10': 'pm10',
-        'carbon_monoxide': 'co',
-        # 'calculated_aqi' is already correct after calculation
-    }
-    df_sequence.rename(columns=column_rename_map, inplace=True)
-    # Ensure all expected features are present and in the correct order
-    model_features_order = ['calculated_aqi', 'temp', 'pm25', 'pm10', 'co']
-    missing_columns = [col for col in model_features_order if col not in df_sequence.columns]
-    if missing_columns:
-        print(f"Error: Missing required columns in final sequence data: {missing_columns}")
-        print("Ensure all expected features are fetched and named correctly.")
-        return None
-    # Select and reorder columns to match the model's expected input
-    df_final_sequence = df_sequence[model_features_order]
-    # Convert to numpy array
-    data_sequence = df_final_sequence.values
-    # Ensure the final numpy array has the correct shape (redundant but safe)
-    if data_sequence.shape != (sequence_length, NUM_INPUT_FEATURES):
-         print(f"Error: Final data sequence shape {data_sequence.shape} does not match expected shape ({sequence_length}, {NUM_INPUT_FEATURES}).")
-         return None
-    print(f"Successfully prepared data sequence with shape {data_sequence.shape}")
-    return data_sequence
-# --- Define Predict Function ---
-# (Keep the predict function as before, it calls get_latest_data_sequence)
-def predict():
-    """
-    Retrieves the latest data sequence from Open-Meteo, preprocesses it,
-    and makes a prediction.
-    """
-    if model is None or input_scaler is None or target_scaler is None:
-         return "Model or scaler(s) not loaded. Check logs."
-    # 1. Get the latest historical data sequence from Open-Meteo
-    latest_data_sequence = get_latest_data_sequence(SEQUENCE_LENGTH)
-    if latest_data_sequence is None:
-        return "Failed to retrieve or process latest data sequence."
-    # Ensure the retrieved data has the correct shape (redundant check, but safe)
-    if latest_data_sequence.shape != (SEQUENCE_LENGTH, NUM_INPUT_FEATURES):
-        return f"Error: Retrieved data has incorrect shape {latest_data_sequence.shape}. Expected ({SEQUENCE_LENGTH}, {NUM_INPUT_FEATURES})."
-    # 2. Scale the data sequence using the loaded input scaler
-    latest_data_sequence_with_batch = latest_data_sequence[np.newaxis, :, :]
-    scaled_input_data = input_scaler.transform(latest_data_sequence_with_batch)
-    # 3. Perform prediction (outputs scaled target)
-    output = model.predict(scaled_input_data)
-    # 4. Process the output (get the scaled predicted value)
-    predicted_scaled_value = output[0][0]
-    # 5. Inverse transform the prediction using the target scaler
-    predicted_original_scale = target_scaler.inverse_transform(np.array([[predicted_scaled_value]]))[0][0]
-    predicted_value = predicted_original_scale
-    return float(predicted_value)
-# --- Gradio Interface ---
-# (Keep the Gradio interface as before, inputs=None)
-interface = gr.Interface(
-    fn=predict,
-    inputs=None,
-    outputs=gr.Number(label=f"Predicted AQI (Next {N_AHEAD} Hour(s))")
-)
-# --- Launch Gradio Interface ---
-if __name__ == "__main__":
-    interface.launch()

+# app.py (or main.py)
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
 import numpy as np
 import tensorflow as tf
 from tensorflow.keras.models import load_model
 from tensorflow.keras.layers import Input
 from tensorflow.keras.utils import custom_object_scope
 import pickle
 import os
 import requests
 import pandas as pd
 from datetime import datetime, timedelta, timezone
+import pytz
+import json
+import traceback # Import traceback to print detailed error info
+# Assuming TKAN is installed and available
+from tkan import TKAN
+try:
+    from tkat import TKAT
+except ImportError:
+    print("TKAT library not found. If your model uses TKAT, ensure the library is installed.")
+    TKAT = None
 # --- Your MinMaxScaler Class (Copied from Notebook) ---
+# This class is essential for loading your scalers
 class MinMaxScaler:
     def __init__(self, feature_axis=None, minmax_range=(0, 1)):
         self.feature_axis = feature_axis
         self.min_ = None
         return self
     def transform(self, X):
+        if self.min_ is None or self.max_ is None or self.scale_ is None:
+             raise ValueError("Scaler has not been fitted.")
         X_scaled = (X - self.min_) / self.scale_
         X_scaled = X_scaled * (self.minmax_range[1] - self.minmax_range[0]) + self.minmax_range[0]
         return X_scaled
     def inverse_transform(self, X_scaled):
+        if self.min_ is None or self.max_ is None or self.scale_ is None:
+             raise ValueError("Scaler has not been fitted.")
         X = (X_scaled - self.minmax_range[0]) / (self.minmax_range[1] - self.minmax_range[0])
         X = X * self.scale_ + self.min_
         return X
+# --- AQI breakpoints and calculation functions (Copied from Notebook) ---
 aqi_breakpoints = {
     'pm25': [(0, 50, 0, 50), (51, 100, 51, 100), (101, 200, 101, 200), (201, 300, 201, 300)],
     'pm10': [(0, 50, 0, 50), (51, 100, 51, 100), (101, 250, 101, 200), (251, 350, 201, 300)],
 def calculate_overall_aqi(row, aqi_breakpoints):
     sub_aqis = []
+    # Mapping API names to internal names if necessary
     pollutant_mapping = {
+        'pm25': 'pm25',
         'pm10': 'pm10',
+        'co': 'co',
+        'pm2_5': 'pm25', # Common API name for PM2.5
+        'carbon_monoxide': 'co', # Common API name for CO
     }
     for api_pollutant, internal_pollutant in pollutant_mapping.items():
+        if api_pollutant in row:
+            concentration = row[api_pollutant]
+            if not pd.isna(concentration): # Use pd.isna for pandas DataFrames/Series
+                sub_aqi = calculate_sub_aqi(concentration, aqi_breakpoints.get(internal_pollutant, []))
+                sub_aqis.append(sub_aqi)
+            else:
+                sub_aqis.append(np.nan)
         else:
+             sub_aqis.append(np.nan)
+    # Use np.nanmax to find the maximum ignoring NaNs. Returns -inf if all are NaN.
+    # Check if sub_aqis list is not empty and contains at least one non-NaN value
+    if sub_aqis and not all(pd.isna(sub_aqis)):
+        return np.nanmax(sub_aqis)
+    else:
+        return np.nan # Return NaN if no valid pollutant data is available
+# --- Data Retrieval Function ---
+def get_latest_data_sequence(sequence_length: int, latitude: float, longitude: float):
+    print(f"Attempting to retrieve data for the last {sequence_length} hours from Open-Meteo for Lat: {latitude}, Lon: {longitude}")
+    end_time = datetime.now(pytz.utc)
+    # Fetch slightly more data to allow for resampling and ensure sequence_length is met
+    fetch_hours = sequence_length + 5
+    start_time = end_time - timedelta(hours=fetch_hours)
+    # Format timestamps for API request (ISO 8601)
+    start_time_str = start_time.isoformat().split('.')[0] + 'Z'
+    end_time_str = end_time.isoformat().split('.')[0] + 'Z'
+    print(f"Requesting data from {start_time_str} to {end_time_str}")
+    # Open-Meteo Air Quality API
+    air_quality_url = "https://air-quality-api.open-meteo.com/v1/air-quality"
+    air_quality_params = {
+        "latitude": latitude,
+        "longitude": longitude,
+        "hourly": ["pm2_5", "pm10", "carbon_monoxide"],
+        "timezone": "UTC",
+        "start_date": start_time.strftime('%Y-%m-%d'), # Use YYYY-MM-DD format
+        "end_date": end_time.strftime('%Y-%m-%d'),
+        "past_hours": fetch_hours
+    }
+    # Open-Meteo Historical Weather API for Temperature
+    weather_url = "https://archive-api.open-meteo.com/v1/archive"
+    weather_params = {
+        "latitude": latitude,
+        "longitude": longitude,
+        "hourly": ["temperature_2m"],
+        "timezone": "UTC",
+        "start_date": start_time.strftime('%Y-%m-%d'),
+        "end_date": end_time.strftime('%Y-%m-%d')
+    }
+    try:
+        # Fetch Air Quality Data
+        print(f"Fetching air quality data from: {air_quality_url}")
+        air_quality_response = requests.get(air_quality_url, params=air_quality_params)
+        air_quality_response.raise_for_status()
+        air_quality_data = air_quality_response.json()
+        print("Air quality data retrieved.")
+        # Fetch Temperature Data
+        print(f"Fetching temperature data from: {weather_url}")
+        weather_response = requests.get(weather_url, params=weather_params)
+        weather_response.raise_for_status()
+        weather_data = weather_response.json()
+        print("Temperature data retrieved.")
+        print("Data fetched successfully.")
+        # Process Air Quality Data
+        if 'hourly' not in air_quality_data or 'time' not in air_quality_data['hourly']:
+             print("Error: 'hourly' or 'time' key not found in air quality response.")
+             return None, "Error: Invalid air quality data format from API."
+        df_aq = pd.DataFrame(air_quality_data['hourly'])
+        df_aq['time'] = pd.to_datetime(df_aq['time'])
+        df_aq.set_index('time', inplace=True)
+        # Process Temperature Data
+        if 'hourly' not in weather_data or 'time' not in weather_data['hourly']:
+             print("Error: 'hourly' or 'time' key not found in weather response.")
+             return None, "Error: Invalid weather data format from API."
+        df_temp = pd.DataFrame(weather_data['hourly'])
+        df_temp['time'] = pd.to_datetime(df_temp['time'])
+        df_temp.set_index('time', inplace=True)
+        # Merge dataframes
+        df_merged = df_aq.merge(df_temp, left_index=True, right_index=True, how='outer')
+        print("DataFrames merged.")
+        # Resample to ensure consistent hourly frequency and fill missing data
+        # Use 'h' for hourly resampling
+        df_processed = df_merged.resample('h').ffill().bfill()
+        print(f"DataFrame resampled to hourly. Shape: {df_processed.shape}")
+        # Rename columns to match internal naming convention
+        df_processed.rename(columns={'pm2_5': 'pm25', 'carbon_monoxide': 'co', 'temperature_2m': 'temp'}, inplace=True)
+        print("Renamed columns.")
+        # Calculate AQI for the processed data
+        df_processed['calculated_aqi'] = df_processed.apply(lambda row: calculate_overall_aqi(row, aqi_breakpoints), axis=1)
+        print("Calculated AQI.")
+        # Select and reorder columns to match training data order
+        required_columns = ['calculated_aqi', 'temp', 'pm25', 'pm10', 'co']
+        # Ensure all required columns exist before selecting
+        if not all(col in df_processed.columns for col in required_columns):
+             missing_cols = [col for col in required_columns if col not in df_processed.columns]
+             print(f"Error: Missing required columns after processing: {missing_cols}")
+             return None, f"Error: Missing required data columns: {missing_cols}"
+        df_processed = df_processed[required_columns].copy()
+        print(f"Selected and reordered columns. Final processing shape: {df_processed.shape}")
+        # Handle any remaining NaNs after ffill/bfill (e.g., if the very first values were NaN or API returned all NaNs)
+        initial_rows = len(df_processed)
+        df_processed.dropna(inplace=True)
+        if len(df_processed) < initial_rows:
+             print(f"Warning: Dropped {initial_rows - len(df_processed)} rows with remaining NaNs.")
+        # Check if enough data points are available
+        if len(df_processed) < sequence_length:
+            print(f"Error: Only retrieved and processed {len(df_processed)} data points, but {sequence_length} are required.")
+            return None, f"Error: Insufficient historical data ({len(df_processed)} points available, {sequence_length} required)."
+        # Select the last `sequence_length` rows for the input sequence
+        latest_data_sequence_df = df_processed.tail(sequence_length).copy() # Use .copy() to avoid SettingWithCopyWarning
+        print(f"Selected last {sequence_length} data points.")
+        # Convert to numpy array and reshape (1, sequence_length, num_features)
+        latest_data_sequence = latest_data_sequence_df.values.reshape(1, sequence_length, len(required_columns))
+        # Get the timestamps for output formatting later
+        timestamps = latest_data_sequence_df.index.tolist()
+        print(f"Prepared input sequence with shape: {latest_data_sequence.shape}")
+        return latest_data_sequence, timestamps # Return data and timestamps
+    except requests.exceptions.RequestException as e:
+        print(f"API Request Error: {e}")
+        return None, f"API Request Error: {e}"
+    except Exception as e:
+        print(f"An unexpected error occurred during data retrieval and processing: {e}")
+        traceback.print_exc()
+        return None, f"An unexpected error occurred during data processing: {e}"
+# --- Define paths to your saved files ---
+# Use relative paths assuming files are in the root directory of the Space
+MODEL_PATH = '/content/best_model_TKAN_nahead_1.keras'
+INPUT_SCALER_PATH = '/content/input_scaler.pkl'
+TARGET_SCALER_PATH = '/content/target_scaler.pkl' # This should be the scaler for the ratio
+# Y_SCALER_TRAIN_PATH = 'y_scaler_train.pkl' # Keep commented out unless you find a specific use for it in the inverse transform
+# --- Load the scalers and model ---
 input_scaler = None
+target_scaler = None # Scaler for the AQI/rolling_median ratio
+model = None
 try:
+    with open(INPUT_SCALER_PATH, 'rb') as f:
+        input_scaler = pickle.load(f)
+    print(f"Input scaler loaded successfully from {INPUT_SCALER_PATH}")
+    with open(TARGET_SCALER_PATH, 'rb') as f:
+        target_scaler = pickle.load(f)
+    print(f"Target scaler (for ratio) loaded successfully from {TARGET_SCALER_PATH}")
+except FileNotFoundError as e:
+    print(f"Error loading scaler files: {e}")
+    print("Please ensure input_scaler.pkl and target_scaler.pkl are in the correct directory.")
+    # These need to be loaded for the app to work, so we might let the startup fail or raise an error here.
+    # For a web app, letting it fail on startup and show in logs is better than running with None scalers.
+    # However, for the purpose of giving you the code structure, we'll just print and model=None below.
+except Exception as e:
+    print(f"An unexpected error occurred during scaler loading: {e}")
+    traceback.print_exc()
+# Load the trained model with custom_object_scope
+custom_objects = {"TKAN": TKAN}
+if TKAT is not None:
+     custom_objects["TKAT"] = TKAT
+try:
+    print(f"Loading model from {MODEL_PATH}...")
+    # Use custom_object_scope to register custom layers during loading
+    with custom_object_scope(custom_objects):
+        # compile=False because we only need the model for inference
+        model = load_model(MODEL_PATH, compile=False)
+    print("Model loaded successfully.")
+except FileNotFoundError:
+    print(f"Error: Model file not found at {MODEL_PATH}.")
+except ValueError as e:
+     print(f"Error loading model (ValueError): {e}")
+     print("This can happen if the file is not a valid Keras file or if custom objects are not registered.")
+     traceback.print_exc()
 except Exception as e:
+    print(f"An unexpected error occurred during model loading: {e}")
     traceback.print_exc()
+# Initialize FastAPI app
+app = FastAPI()
+# Define the structure of the prediction request body
+class PredictionRequest(BaseModel):
+    latitude: float
+    longitude: float
+    pm25: float = None # Make current inputs optional, rely primarily on historical fetch
+    pm10: float = None
+    co: float = None
+    temp: float = None
+    n_ahead: int = 1 # Default prediction steps
+# Define the structure of the prediction response body
+class PredictionResponse(BaseModel):
+    status: str # "success" or "error"
+    message: str # Description of the result or error
+    predictions: list = None # List of {"timestamp": "...", "aqi": ...} or None on error
+# Define the prediction endpoint
+@app.post("/predict", response_model=PredictionResponse)
+async def predict_aqi_endpoint(request: PredictionRequest):
+    # Check if model and scalers were loaded successfully on startup
+    if model is None or input_scaler is None or target_scaler is None:
+        print("API called but model or scalers are not loaded.")
+        # Return a 500 Internal Server Error if dependencies failed to load
+        raise HTTPException(status_code=500, detail="Model or scalers not loaded. Check server logs for details.")
+    # Get the expected sequence length and number of features from the model's input shape
+    # Assuming input shape is (None, sequence_length, num_features)
+    if model.input_shape is None or len(model.input_shape) < 2:
+         print(f"Error: Model has unexpected input shape: {model.input_shape}")
+         raise HTTPException(status_code=500, detail=f"Model has unexpected input shape: {model.input_shape}")
+    SEQUENCE_LENGTH = model.input_shape[1]
+    NUM_FEATURES = model.input_shape[2]
+    required_num_features = len(['calculated_aqi', 'temp', 'pm25', 'pm10', 'co'])
+    if NUM_FEATURES != required_num_features:
+         print(f"Error: Model expects {NUM_FEATURES} features, but data processing provides {required_num_features}.")
+         raise HTTPException(status_code=500, detail=f"Model expects {NUM_FEATURES} features, but data processing provides {required_num_features}.")
+    # Get the historical data sequence and its timestamps from Open-Meteo
+    # The function now returns the data and a message (or error)
+    latest_data_sequence_unscaled, message = get_latest_data_sequence(SEQUENCE_LENGTH, request.latitude, request.longitude)
+    # Check if data retrieval was successful
+    if latest_data_sequence_unscaled is None:
+        # Return an error response if data fetching failed
+        print(f"Data retrieval failed: {message}")
+        return PredictionResponse(status="error", message=f"Data retrieval failed: {message}")
+    # The timestamps returned are for the sequence itself. We need timestamps for the *predictions*.
+    # The predictions are for n_ahead steps *after* the last timestamp in the sequence.
+    prediction_timestamps = []
+    if message and isinstance(message, list) and len(message) > 0: # 'message' is actually 'timestamps' here
+        last_timestamp_of_sequence = message[-1] # Get the last timestamp from the sequence
+        for i in range(request.n_ahead):
+            # Prediction i (0-indexed) is for hour i+1 after the last timestamp
+            prediction_timestamps.append(last_timestamp_of_sequence + timedelta(hours=i + 1))
+    else:
+        print("Warning: Could not get valid timestamps from data retrieval. Prediction timestamps will be approximate.")
+        # Fallback: Approximate timestamps based on current time
+        now_utc = datetime.now(pytz.utc)
+        for i in range(request.n_ahead):
+             prediction_timestamps.append(now_utc + timedelta(hours=i+1))
+    # Optional: Update the last timestep with current user inputs if provided
+    # Check if current inputs were provided and are valid (not None or NaN)
+    if request.pm25 is not None and not pd.isna(request.pm25) and \
+       request.pm10 is not None and not pd.isna(request.pm10) and \
+       request.co is not None and not pd.isna(request.co) and \
+       request.temp is not None and not pd.isna(request.temp):
+        current_aqi = calculate_overall_aqi({'pm25': request.pm25, 'pm10': request.pm10, 'co': request.co, 'temp': request.temp}, aqi_breakpoints)
+        if not pd.isna(current_aqi):
+            # Assuming column order: 'calculated_aqi', 'temp', 'pm25', 'pm10', 'co'
+            # Update the last row (-1) of the input sequence
+            latest_data_sequence_unscaled[0, -1, 0] = current_aqi
+            latest_data_sequence_unscaled[0, -1, 1] = request.temp
+            latest_data_sequence_unscaled[0, -1, 2] = request.pm25
+            latest_data_sequence_unscaled[0, -1, 3] = request.pm10
+            latest_data_sequence_unscaled[0, -1, 4] = request.co
+            print("Updated last timestep of input sequence with current user inputs.")
         else:
+             print("Warning: Could not calculate AQI for current inputs. Last timestep remains historical.")
+    # Scale the input data
+    try:
+        X_scaled = input_scaler.transform(latest_data_sequence_unscaled)
+        print("Input data scaled successfully.")
     except Exception as e:
+        print(f"Error scaling input data: {e}")
         traceback.print_exc()
+        raise HTTPException(status_code=500, detail="Error processing input data for prediction (scaling).")
+    # Make prediction
+    try:
+        scaled_prediction = model.predict(X_scaled, verbose=0) # Shape (1, n_ahead)
+        print(f"Model prediction made. Scaled prediction shape: {scaled_prediction.shape}")
+    except Exception as e:
+        print(f"Error during model prediction: {e}")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail="Error during model prediction.")
+    # Inverse transform the prediction
+    try:
+        # --- Inverse Transformation Logic (Based on Rolling Median Scaling) ---
+        # This part needs the actual rolling median for the future prediction timesteps.
+        # Using an approximation based on the input sequence.
+        if latest_data_sequence_unscaled.shape[1] > 0:
+            # Get the 'calculated_aqi' values from the unscaled input sequence
+            calculated_aqi_sequence = latest_data_sequence_unscaled[0, :, 0] # Assuming AQI is the first feature
+            # Approximate the rolling median based on the last few points of the input sequence
+            # This is a simple approximation. A more robust method might be needed.
+            approx_rolling_median_proxy = np.mean(calculated_aqi_sequence[-min(5, SEQUENCE_LENGTH):])
+            if pd.isna(approx_rolling_median_proxy) or approx_rolling_median_proxy <= 0:
+                 approx_rolling_median_proxy = 1.0 # Prevent division by zero or invalid scaling
+            # Create a placeholder scaler array for the future timesteps
+            corresponding_rolling_median_scaler = np.full((1, request.n_ahead, 1), approx_rolling_median_proxy, dtype=np.float32)
+            print(f"Approximated rolling median proxy for inverse transform: {approx_rolling_median_proxy:.2f}")
+            # 1. Inverse transform the scaled prediction (ratio) using the target_scaler
+            y_unscaled_pred_ratio = target_scaler.inverse_transform(scaled_prediction.reshape(1, request.n_ahead, 1))
+            print(f"Inverse transformed to ratio scale. Shape: {y_unscaled_pred_ratio.shape}")
+            # 2. Multiply the unscaled ratio by the approximated rolling median scaler
+            predicted_aqi_values = y_unscaled_pred_ratio * corresponding_rolling_median_scaler
+            predicted_aqi_values = predicted_aqi_values.flatten() # Shape (n_ahead,)
+        else:
+            print("Error: Input sequence is empty, cannot perform inverse transform.")
+            raise ValueError("Input sequence is empty.")
+        print(f"Final predicted AQI values: {predicted_aqi_values}")
+    except Exception as e:
+        print(f"Error during inverse transformation: {e}")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail="Error processing prediction results (inverse transform).")
+    # Prepare the prediction output list
+    predictions_list = []
+    for i in range(request.n_ahead):
+        # Use the calculated prediction_timestamps
+        timestamp_str = prediction_timestamps[i].strftime('%Y-%m-%d %H:%M:%S')
+        predictions_list.append({
+            "timestamp": timestamp_str,
+            "aqi": float(predicted_aqi_values[i]) # Ensure AQI is a standard float
+        })
+    # Return the successful response
+    return PredictionResponse(status="success", message="Prediction successful.", predictions=predictions_list)
+# Root endpoint for health check
+@app.get("/")
+async def read_root():
+    return {"message": "AQI Prediction API is running."}