Spaces:

nikethanreddy
/

project

Sleeping

App Files Files Community

nikethanreddy commited on Jun 2, 2025

Commit

d7ffece

verified ·

1 Parent(s): 103ba23

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -79

app.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import os
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 os.environ['JAX_PLATFORMS'] = 'cpu'
@@ -135,16 +136,16 @@ def calculate_overall_aqi(row, aqi_breakpoints):
 def get_latest_data_sequence(sequence_length: int, latitude: float, longitude: float):
     print(f"Attempting to retrieve data for the last {sequence_length} hours from Open-Meteo for Lat: {latitude}, Lon: {longitude}")
-    end_time = datetime.now(pytz.utc)
-    # Fetch slightly more data to allow for resampling and ensure sequence_length is met
-    fetch_hours = sequence_length + 5
-    start_time = end_time - timedelta(hours=fetch_hours)
-    # Format timestamps for API request (ISO 8601)
-    start_time_str = start_time.isoformat().split('.')[0] + 'Z'
-    end_time_str = end_time.isoformat().split('.')[0] + 'Z'
-    print(f"Requesting data from {start_time_str} to {end_time_str}")
     # Open-Meteo Air Quality API
     air_quality_url = "https://air-quality-api.open-meteo.com/v1/air-quality"
@@ -153,32 +154,30 @@ def get_latest_data_sequence(sequence_length: int, latitude: float, longitude: f
         "longitude": longitude,
         "hourly": ["pm2_5", "pm10", "carbon_monoxide"],
         "timezone": "UTC",
-        "start_date": start_time.strftime('%Y-%m-%d'), # Use YYYY-MM-DD format
-        "end_date": end_time.strftime('%Y-%m-%d'),
-        "past_hours": fetch_hours
     }
-    # Open-Meteo Historical Weather API for Temperature
     weather_url = "https://archive-api.open-meteo.com/v1/archive"
     weather_params = {
         "latitude": latitude,
         "longitude": longitude,
         "hourly": ["temperature_2m"],
         "timezone": "UTC",
-        "start_date": start_time.strftime('%Y-%m-%d'),
-        "end_date": end_time.strftime('%Y-%m-%d')
     }
     try:
         # Fetch Air Quality Data
-        print(f"Fetching air quality data from: {air_quality_url}")
         air_quality_response = requests.get(air_quality_url, params=air_quality_params)
         air_quality_response.raise_for_status()
         air_quality_data = air_quality_response.json()
         print("Air quality data retrieved.")
         # Fetch Temperature Data
-        print(f"Fetching temperature data from: {weather_url}")
         weather_response = requests.get(weather_url, params=weather_params)
         weather_response.raise_for_status()
         weather_data = weather_response.json()
@@ -208,7 +207,6 @@ def get_latest_data_sequence(sequence_length: int, latitude: float, longitude: f
         # Resample to ensure consistent hourly frequency and fill missing data
-        # Use 'h' for hourly resampling
         df_processed = df_merged.resample('h').ffill().bfill()
         print(f"DataFrame resampled to hourly. Shape: {df_processed.shape}")
@@ -235,7 +233,7 @@ def get_latest_data_sequence(sequence_length: int, latitude: float, longitude: f
         print(f"Selected and reordered columns. Final processing shape: {df_processed.shape}")
-        # Handle any remaining NaNs after ffill/bfill (e.g., if the very first values were NaN or API returned all NaNs)
         initial_rows = len(df_processed)
         df_processed.dropna(inplace=True)
         if len(df_processed) < initial_rows:
@@ -248,7 +246,7 @@ def get_latest_data_sequence(sequence_length: int, latitude: float, longitude: f
             return None, f"Error: Insufficient historical data ({len(df_processed)} points available, {sequence_length} required)."
         # Select the last `sequence_length` rows for the input sequence
-        latest_data_sequence_df = df_processed.tail(sequence_length).copy() # Use .copy() to avoid SettingWithCopyWarning
         print(f"Selected last {sequence_length} data points.")
         # Convert to numpy array and reshape (1, sequence_length, num_features)
@@ -259,7 +257,7 @@ def get_latest_data_sequence(sequence_length: int, latitude: float, longitude: f
         print(f"Prepared input sequence with shape: {latest_data_sequence.shape}")
-        return latest_data_sequence, timestamps # Return data and timestamps
     except requests.exceptions.RequestException as e:
         print(f"API Request Error: {e}")
@@ -271,7 +269,6 @@ def get_latest_data_sequence(sequence_length: int, latitude: float, longitude: f
 # --- Define paths to your saved files ---
-# Use relative paths assuming files are in the root directory of the Space
 MODEL_PATH = 'best_model_TKAN_nahead_1.keras'
 INPUT_SCALER_ATTR_PATH = 'input_scaler_attributes.json'
 TARGET_SCALER_ATTR_PATH = 'target_scaler_attributes.json'
@@ -280,25 +277,24 @@ Y_SCALER_TRAIN_PATH = 'y_scaler_train.npy'
 # --- Load the scalers and model ---
 input_scaler = None
-target_scaler = None # Scaler for the AQI/rolling_median ratio
 model = None
 try:
     print(f"Attempting to load input scaler attributes from {INPUT_SCALER_ATTR_PATH}...")
     with open(INPUT_SCALER_ATTR_PATH, 'r') as f:
         input_attrs = json.load(f)
-    input_scaler = MinMaxScaler() # Create a new instance
-    input_scaler.load_attributes(input_attrs) # Load attributes
     print("Input scaler loaded manually.")
     print(f"Attempting to load target scaler attributes from {TARGET_SCALER_ATTR_PATH}...")
     with open(TARGET_SCALER_ATTR_PATH, 'r') as f:
         target_attrs = json.load(f)
-    target_scaler = MinMaxScaler() # Create a new instance
-    target_scaler.load_attributes(target_attrs) # Load attributes
     print("Target scaler loaded manually.")
-    # Load y_scaler_train numpy array if saved as .npy
     print(f"Attempting to load y_scaler_train numpy array from {Y_SCALER_TRAIN_PATH}...")
     y_scaler_train = np.load(Y_SCALER_TRAIN_PATH)
     print("y_scaler_train numpy array loaded.")
@@ -311,16 +307,13 @@ except Exception as e:
     import traceback
     traceback.print_exc()
-# Load the trained model with custom_object_scope
 custom_objects = {"TKAN": TKAN}
 if TKAT is not None:
      custom_objects["TKAT"] = TKAT
 try:
     print(f"Loading model from {MODEL_PATH}...")
-    # Use custom_object_scope to register custom layers during loading
     with custom_object_scope(custom_objects):
-        # compile=False because we only need the model for inference
         model = load_model(MODEL_PATH, compile=False)
     print("Model loaded successfully.")
 except FileNotFoundError:
@@ -334,38 +327,30 @@ except Exception as e:
     traceback.print_exc()
-# Initialize FastAPI app
 app = FastAPI()
-# Define the structure of the prediction request body
 class PredictionRequest(BaseModel):
     latitude: float
     longitude: float
-    pm25: float = None # Make current inputs optional, rely primarily on historical fetch
     pm10: float = None
     co: float = None
     temp: float = None
-    n_ahead: int = 1 # Default prediction steps
-# Define the structure of the prediction response body
 class PredictionResponse(BaseModel):
-    status: str # "success" or "error"
-    message: str # Description of the result or error
-    predictions: list = None # List of {"timestamp": "...", "aqi": ...} or None on error
-# Define the prediction endpoint
 @app.post("/predict", response_model=PredictionResponse)
 async def predict_aqi_endpoint(request: PredictionRequest):
-    # Check if model and scalers were loaded successfully on startup
     if model is None or input_scaler is None or target_scaler is None:
         print("API called but model or scalers are not loaded.")
-        # Return a 500 Internal Server Error if dependencies failed to load
         raise HTTPException(status_code=500, detail="Model or scalers not loaded. Check server logs for details.")
-    # Get the expected sequence length and number of features from the model's input shape
-    # Assuming input shape is (None, sequence_length, num_features)
     if model.input_shape is None or len(model.input_shape) < 2:
          print(f"Error: Model has unexpected input shape: {model.input_shape}")
          raise HTTPException(status_code=500, detail=f"Model has unexpected input shape: {model.input_shape}")
@@ -378,34 +363,24 @@ async def predict_aqi_endpoint(request: PredictionRequest):
          raise HTTPException(status_code=500, detail=f"Model expects {NUM_FEATURES} features, but data processing provides {required_num_features}.")
-    # Get the historical data sequence and its timestamps from Open-Meteo
-    # The function now returns the data and a message (or error)
     latest_data_sequence_unscaled, message = get_latest_data_sequence(SEQUENCE_LENGTH, request.latitude, request.longitude)
-    # Check if data retrieval was successful
     if latest_data_sequence_unscaled is None:
-        # Return an error response if data fetching failed
         print(f"Data retrieval failed: {message}")
         return PredictionResponse(status="error", message=f"Data retrieval failed: {message}")
-    # The timestamps returned are for the sequence itself. We need timestamps for the *predictions*.
-    # The predictions are for n_ahead steps *after* the last timestamp in the sequence.
     prediction_timestamps = []
-    if message and isinstance(message, list) and len(message) > 0: # 'message' is actually 'timestamps' here
-        last_timestamp_of_sequence = message[-1] # Get the last timestamp from the sequence
         for i in range(request.n_ahead):
-            # Prediction i (0-indexed) is for hour i+1 after the last timestamp
             prediction_timestamps.append(last_timestamp_of_sequence + timedelta(hours=i + 1))
     else:
         print("Warning: Could not get valid timestamps from data retrieval. Prediction timestamps will be approximate.")
-        # Fallback: Approximate timestamps based on current time
         now_utc = datetime.now(pytz.utc)
         for i in range(request.n_ahead):
              prediction_timestamps.append(now_utc + timedelta(hours=i+1))
-    # Optional: Update the last timestep with current user inputs if provided
-    # Check if current inputs were provided and are valid (not None or NaN)
     if request.pm25 is not None and not pd.isna(request.pm25) and \
        request.pm10 is not None and not pd.isna(request.pm10) and \
        request.co is not None and not pd.isna(request.co) and \
@@ -414,8 +389,6 @@ async def predict_aqi_endpoint(request: PredictionRequest):
         current_aqi = calculate_overall_aqi({'pm25': request.pm25, 'pm10': request.pm10, 'co': request.co, 'temp': request.temp}, aqi_breakpoints)
         if not pd.isna(current_aqi):
-            # Assuming column order: 'calculated_aqi', 'temp', 'pm25', 'pm10', 'co'
-            # Update the last row (-1) of the input sequence
             latest_data_sequence_unscaled[0, -1, 0] = current_aqi
             latest_data_sequence_unscaled[0, -1, 1] = request.temp
             latest_data_sequence_unscaled[0, -1, 2] = request.pm25
@@ -425,7 +398,6 @@ async def predict_aqi_endpoint(request: PredictionRequest):
         else:
              print("Warning: Could not calculate AQI for current inputs. Last timestep remains historical.")
-    # Scale the input data
     try:
         X_scaled = input_scaler.transform(latest_data_sequence_unscaled)
         print("Input data scaled successfully.")
@@ -435,9 +407,8 @@ async def predict_aqi_endpoint(request: PredictionRequest):
         raise HTTPException(status_code=500, detail="Error processing input data for prediction (scaling).")
-    # Make prediction
     try:
-        scaled_prediction = model.predict(X_scaled, verbose=0) # Shape (1, n_ahead)
         print(f"Model prediction made. Scaled prediction shape: {scaled_prediction.shape}")
     except Exception as e:
         print(f"Error during model prediction: {e}")
@@ -445,33 +416,22 @@ async def predict_aqi_endpoint(request: PredictionRequest):
         raise HTTPException(status_code=500, detail="Error during model prediction.")
-    # Inverse transform the prediction
     try:
-        # --- Inverse Transformation Logic (Based on Rolling Median Scaling) ---
-        # This part needs the actual rolling median for the future prediction timesteps.
-        # Using an approximation based on the input sequence.
         if latest_data_sequence_unscaled.shape[1] > 0:
-            # Get the 'calculated_aqi' values from the unscaled input sequence
-            calculated_aqi_sequence = latest_data_sequence_unscaled[0, :, 0] # Assuming AQI is the first feature
-            # Approximate the rolling median based on the last few points of the input sequence
-            # This is a simple approximation. A more robust method might be needed.
             approx_rolling_median_proxy = np.mean(calculated_aqi_sequence[-min(5, SEQUENCE_LENGTH):])
             if pd.isna(approx_rolling_median_proxy) or approx_rolling_median_proxy <= 0:
-                 approx_rolling_median_proxy = 1.0 # Prevent division by zero or invalid scaling
-            # Create a placeholder scaler array for the future timesteps
             corresponding_rolling_median_scaler = np.full((1, request.n_ahead, 1), approx_rolling_median_proxy, dtype=np.float32)
             print(f"Approximated rolling median proxy for inverse transform: {approx_rolling_median_proxy:.2f}")
-            # 1. Inverse transform the scaled prediction (ratio) using the target_scaler
             y_unscaled_pred_ratio = target_scaler.inverse_transform(scaled_prediction.reshape(1, request.n_ahead, 1))
             print(f"Inverse transformed to ratio scale. Shape: {y_unscaled_pred_ratio.shape}")
-            # 2. Multiply the unscaled ratio by the approximated rolling median scaler
             predicted_aqi_values = y_unscaled_pred_ratio * corresponding_rolling_median_scaler
-            predicted_aqi_values = predicted_aqi_values.flatten() # Shape (n_ahead,)
         else:
             print("Error: Input sequence is empty, cannot perform inverse transform.")
@@ -484,20 +444,16 @@ async def predict_aqi_endpoint(request: PredictionRequest):
         traceback.print_exc()
         raise HTTPException(status_code=500, detail="Error processing prediction results (inverse transform).")
-    # Prepare the prediction output list
     predictions_list = []
     for i in range(request.n_ahead):
-        # Use the calculated prediction_timestamps
         timestamp_str = prediction_timestamps[i].strftime('%Y-%m-%d %H:%M:%S')
         predictions_list.append({
             "timestamp": timestamp_str,
-            "aqi": float(predicted_aqi_values[i]) # Ensure AQI is a standard float
         })
-    # Return the successful response
     return PredictionResponse(status="success", message="Prediction successful.", predictions=predictions_list)
-# Root endpoint for health check
 @app.get("/")
 async def read_root():
     return {"message": "AQI Prediction API is running."}

 import os
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 os.environ['JAX_PLATFORMS'] = 'cpu'
 def get_latest_data_sequence(sequence_length: int, latitude: float, longitude: float):
     print(f"Attempting to retrieve data for the last {sequence_length} hours from Open-Meteo for Lat: {latitude}, Lon: {longitude}")
+    # Calculate fetch_hours needed (sequence_length + buffer for resampling/NaNs)
+    fetch_hours = sequence_length + 5
+    # For temperature, we still need a date range for the archive API
+    end_time_for_temp = datetime.now(pytz.utc)
+    start_time_for_temp = end_time_for_temp - timedelta(hours=fetch_hours)
+    print(f"Requesting data for the past {fetch_hours} hours for air quality.")
+    print(f"Requesting temperature data from {start_time_for_temp.strftime('%Y-%m-%d')} to {end_time_for_temp.strftime('%Y-%m-%d')}")
     # Open-Meteo Air Quality API
     air_quality_url = "https://air-quality-api.open-meteo.com/v1/air-quality"
         "longitude": longitude,
         "hourly": ["pm2_5", "pm10", "carbon_monoxide"],
         "timezone": "UTC",
+        "past_hours": fetch_hours # Use past_hours instead of start/end_date
     }
+    # Open-Meteo Historical Weather API for Temperature (still uses start/end_date)
     weather_url = "https://archive-api.open-meteo.com/v1/archive"
     weather_params = {
         "latitude": latitude,
         "longitude": longitude,
         "hourly": ["temperature_2m"],
         "timezone": "UTC",
+        "start_date": start_time_for_temp.strftime('%Y-%m-%d'),
+        "end_date": end_time_for_temp.strftime('%Y-%m-%d')
     }
     try:
         # Fetch Air Quality Data
+        print(f"Fetching air quality data from: {air_quality_url} with params: {air_quality_params}")
         air_quality_response = requests.get(air_quality_url, params=air_quality_params)
         air_quality_response.raise_for_status()
         air_quality_data = air_quality_response.json()
         print("Air quality data retrieved.")
         # Fetch Temperature Data
+        print(f"Fetching temperature data from: {weather_url} with params: {weather_params}")
         weather_response = requests.get(weather_url, params=weather_params)
         weather_response.raise_for_status()
         weather_data = weather_response.json()
         # Resample to ensure consistent hourly frequency and fill missing data
         df_processed = df_merged.resample('h').ffill().bfill()
         print(f"DataFrame resampled to hourly. Shape: {df_processed.shape}")
         print(f"Selected and reordered columns. Final processing shape: {df_processed.shape}")
+        # Handle any remaining NaNs after ffill/bfill
         initial_rows = len(df_processed)
         df_processed.dropna(inplace=True)
         if len(df_processed) < initial_rows:
             return None, f"Error: Insufficient historical data ({len(df_processed)} points available, {sequence_length} required)."
         # Select the last `sequence_length` rows for the input sequence
+        latest_data_sequence_df = df_processed.tail(sequence_length).copy()
         print(f"Selected last {sequence_length} data points.")
         # Convert to numpy array and reshape (1, sequence_length, num_features)
         print(f"Prepared input sequence with shape: {latest_data_sequence.shape}")
+        return latest_data_sequence, timestamps
     except requests.exceptions.RequestException as e:
         print(f"API Request Error: {e}")
 # --- Define paths to your saved files ---
 MODEL_PATH = 'best_model_TKAN_nahead_1.keras'
 INPUT_SCALER_ATTR_PATH = 'input_scaler_attributes.json'
 TARGET_SCALER_ATTR_PATH = 'target_scaler_attributes.json'
 # --- Load the scalers and model ---
 input_scaler = None
+target_scaler = None
 model = None
 try:
     print(f"Attempting to load input scaler attributes from {INPUT_SCALER_ATTR_PATH}...")
     with open(INPUT_SCALER_ATTR_PATH, 'r') as f:
         input_attrs = json.load(f)
+    input_scaler = MinMaxScaler()
+    input_scaler.load_attributes(input_attrs)
     print("Input scaler loaded manually.")
     print(f"Attempting to load target scaler attributes from {TARGET_SCALER_ATTR_PATH}...")
     with open(TARGET_SCALER_ATTR_PATH, 'r') as f:
         target_attrs = json.load(f)
+    target_scaler = MinMaxScaler()
+    target_scaler.load_attributes(target_attrs)
     print("Target scaler loaded manually.")
     print(f"Attempting to load y_scaler_train numpy array from {Y_SCALER_TRAIN_PATH}...")
     y_scaler_train = np.load(Y_SCALER_TRAIN_PATH)
     print("y_scaler_train numpy array loaded.")
     import traceback
     traceback.print_exc()
 custom_objects = {"TKAN": TKAN}
 if TKAT is not None:
      custom_objects["TKAT"] = TKAT
 try:
     print(f"Loading model from {MODEL_PATH}...")
     with custom_object_scope(custom_objects):
         model = load_model(MODEL_PATH, compile=False)
     print("Model loaded successfully.")
 except FileNotFoundError:
     traceback.print_exc()
 app = FastAPI()
 class PredictionRequest(BaseModel):
     latitude: float
     longitude: float
+    pm25: float = None
     pm10: float = None
     co: float = None
     temp: float = None
+    n_ahead: int = 1
 class PredictionResponse(BaseModel):
+    status: str
+    message: str
+    predictions: list = None
 @app.post("/predict", response_model=PredictionResponse)
 async def predict_aqi_endpoint(request: PredictionRequest):
     if model is None or input_scaler is None or target_scaler is None:
         print("API called but model or scalers are not loaded.")
         raise HTTPException(status_code=500, detail="Model or scalers not loaded. Check server logs for details.")
     if model.input_shape is None or len(model.input_shape) < 2:
          print(f"Error: Model has unexpected input shape: {model.input_shape}")
          raise HTTPException(status_code=500, detail=f"Model has unexpected input shape: {model.input_shape}")
          raise HTTPException(status_code=500, detail=f"Model expects {NUM_FEATURES} features, but data processing provides {required_num_features}.")
     latest_data_sequence_unscaled, message = get_latest_data_sequence(SEQUENCE_LENGTH, request.latitude, request.longitude)
     if latest_data_sequence_unscaled is None:
         print(f"Data retrieval failed: {message}")
         return PredictionResponse(status="error", message=f"Data retrieval failed: {message}")
     prediction_timestamps = []
+    if message and isinstance(message, list) and len(message) > 0:
+        last_timestamp_of_sequence = message[-1]
         for i in range(request.n_ahead):
             prediction_timestamps.append(last_timestamp_of_sequence + timedelta(hours=i + 1))
     else:
         print("Warning: Could not get valid timestamps from data retrieval. Prediction timestamps will be approximate.")
         now_utc = datetime.now(pytz.utc)
         for i in range(request.n_ahead):
              prediction_timestamps.append(now_utc + timedelta(hours=i+1))
     if request.pm25 is not None and not pd.isna(request.pm25) and \
        request.pm10 is not None and not pd.isna(request.pm10) and \
        request.co is not None and not pd.isna(request.co) and \
         current_aqi = calculate_overall_aqi({'pm25': request.pm25, 'pm10': request.pm10, 'co': request.co, 'temp': request.temp}, aqi_breakpoints)
         if not pd.isna(current_aqi):
             latest_data_sequence_unscaled[0, -1, 0] = current_aqi
             latest_data_sequence_unscaled[0, -1, 1] = request.temp
             latest_data_sequence_unscaled[0, -1, 2] = request.pm25
         else:
              print("Warning: Could not calculate AQI for current inputs. Last timestep remains historical.")
     try:
         X_scaled = input_scaler.transform(latest_data_sequence_unscaled)
         print("Input data scaled successfully.")
         raise HTTPException(status_code=500, detail="Error processing input data for prediction (scaling).")
     try:
+        scaled_prediction = model.predict(X_scaled, verbose=0)
         print(f"Model prediction made. Scaled prediction shape: {scaled_prediction.shape}")
     except Exception as e:
         print(f"Error during model prediction: {e}")
         raise HTTPException(status_code=500, detail="Error during model prediction.")
     try:
         if latest_data_sequence_unscaled.shape[1] > 0:
+            calculated_aqi_sequence = latest_data_sequence_unscaled[0, :, 0]
             approx_rolling_median_proxy = np.mean(calculated_aqi_sequence[-min(5, SEQUENCE_LENGTH):])
             if pd.isna(approx_rolling_median_proxy) or approx_rolling_median_proxy <= 0:
+                 approx_rolling_median_proxy = 1.0
             corresponding_rolling_median_scaler = np.full((1, request.n_ahead, 1), approx_rolling_median_proxy, dtype=np.float32)
             print(f"Approximated rolling median proxy for inverse transform: {approx_rolling_median_proxy:.2f}")
             y_unscaled_pred_ratio = target_scaler.inverse_transform(scaled_prediction.reshape(1, request.n_ahead, 1))
             print(f"Inverse transformed to ratio scale. Shape: {y_unscaled_pred_ratio.shape}")
             predicted_aqi_values = y_unscaled_pred_ratio * corresponding_rolling_median_scaler
+            predicted_aqi_values = predicted_aqi_values.flatten()
         else:
             print("Error: Input sequence is empty, cannot perform inverse transform.")
         traceback.print_exc()
         raise HTTPException(status_code=500, detail="Error processing prediction results (inverse transform).")
     predictions_list = []
     for i in range(request.n_ahead):
         timestamp_str = prediction_timestamps[i].strftime('%Y-%m-%d %H:%M:%S')
         predictions_list.append({
             "timestamp": timestamp_str,
+            "aqi": float(predicted_aqi_values[i])
         })
     return PredictionResponse(status="success", message="Prediction successful.", predictions=predictions_list)
 @app.get("/")
 async def read_root():
     return {"message": "AQI Prediction API is running."}