Spaces:

nikethanreddy
/

project

Sleeping

dummy data part replacement? => open meteo api:
import requests
import numpy as np
import pandas as pd # Might be useful for processing time series data
from datetime import datetime, timedelta, timezone # For handling time
import pytz # For timezone handling

--- Configuration (Add your location and API details) ---

OPENMETEO_API_URL = "https://archive-api.open-meteo.com/v1/archive" # Example historical data endpoint

Replace with the actual latitude and longitude for your location

LATITUDE = 52.52
LONGITUDE = 13.41

Specify the weather parameters you need (temperature and pollutants)

Refer to Open-Meteo documentation for available parameters.

Parameters for temperature and pollutants:

- temperature_2m

- pm10

- pm2_5

- carbon_monoxide

- ... other pollutants if your model uses them

WEATHER_PARAMETERS = ["temperature_2m", "pm10", "pm2_5", "carbon_monoxide"]

You might need to specify the data interval (e.g., "hourly")

DATA_INTERVAL = "hourly"

Timezone for the location

TIMEZONE = "auto" # Or a specific timezone like "Europe/Berlin"

--- AQI Breakpoints and Calculation Logic (Copied from Notebook) ---

You need to have this logic available in your deployment environment

aqi_breakpoints = {
'pm25': [(0, 50, 0, 50), (51, 100, 51, 100), (101, 200, 101, 200), (201, 300, 201, 300)],
'pm10': [(0, 50, 0, 50), (51, 100, 51, 100), (101, 250, 101, 200), (251, 350, 201, 300)],
'co': [(0, 1.0, 0, 50), (1.1, 2.0, 51, 100), (2.1, 10.0, 101, 200), (10.1, 17.0, 201, 300)]
}

def calculate_sub_aqi(concentration, breakpoints):
"""Calculates the sub-AQI for a single pollutant concentration."""
for i_low, i_high, c_low, c_high in breakpoints:
if c_low <= concentration <= c_high:
if c_high == c_low:
return i_low
return ((i_high - i_low) / (c_high - c_low)) * (concentration - c_low) + i_low
if concentration < breakpoints[0][2]:
return breakpoints[0][0]
elif concentration > breakpoints[-1][3]:
return breakpoints[-1][1]
else:
return np.nan

def calculate_overall_aqi(row, aqi_breakpoints):
"""Calculates the overall AQI for a given row (timestamp) based on pollutant sub-AQIs."""
sub_aqis = []
# Map Open-Meteo parameter names to your internal pollutant names if necessary
pollutant_mapping = {
'pm2_5': 'pm25',
'pm10': 'pm10',
'carbon_monoxide': 'co', # Note: Open-Meteo uses 'carbon_monoxide', your breakpoints use 'co'
# Add other mappings if needed
}
for api_pollutant, internal_pollutant in pollutant_mapping.items():
if api_pollutant in row:
# Ensure the concentration is treated as a number (might be NaN)
concentration = row.get(api_pollutant, np.nan)
if not np.isnan(concentration):
sub_aqi = calculate_sub_aqi(concentration, aqi_breakpoints.get(internal_pollutant, []))
sub_aqis.append(sub_aqi)
else:
sub_aqis.append(np.nan) # Pollutant data missing

# The overall AQI is the maximum of the individual pollutant sub-AQIs
return np.nanmax(sub_aqis) if sub_aqis and not all(np.isnan(sub_aqis)) else np.nan # Use nanmax and check if any valid sub_aqis exist

--- Data Retrieval from Open-Meteo (Placeholder with API Call) ---

def get_latest_data_sequence(sequence_length, num_features):
"""
Retrieves the latest sequence of data from Open-Meteo, calculates AQI,
and formats it for model input.

Args:
    sequence_length (int): The length of the historical sequence required.
    num_features (int): The number of features in each time step.

Returns:
    np.ndarray: A numpy array containing the historical data sequence.
                Shape: (sequence_length, num_features)
                Returns None or raises an error on failure.
"""
print("Attempting to retrieve data from Open-Meteo...")

# Calculate the start and end dates for the API request
end_date = datetime.now(timezone.utc) # Get current time in UTC
start_date = end_date - timedelta(hours=sequence_length)

# Format dates for the API (YYYY-MM-DD)
start_date_str = start_date.strftime('%Y-%m-%d')
end_date_str = end_date.strftime('%Y-%m-%d')

# API parameters
params = {
    "latitude": LATITUDE,
    "longitude": LONGITUDE,
    "start_date": start_date_str,
    "end_date": end_date_str,
    "hourly": ",".join(WEATHER_PARAMETERS), # Request hourly data for specified parameters
    "timezone": TIMEZONE,
    "models": "best_match", # Use best available model data
     # "api_key": "YOUR_API_KEY" # Uncomment and add your API key if required
}

try:
    response = requests.get(OPENMETEO_API_URL, params=params)
    response.raise_for_status() # Raise an exception for bad status codes (4xx or 5xx)
    data = response.json()
    print("Data retrieved successfully from Open-Meteo.")

    # --- Process the API Response ---
    # The exact structure of the 'hourly' data in the response might vary slightly.
    # You need to inspect the JSON response from a test call to Open-Meteo
    # to get the exact keys and structure.

    if 'hourly' not in data or 'time' not in data['hourly']:
        print("Error: 'hourly' or 'time' not found in Open-Meteo response.")
        return None

    hourly_data = data['hourly']
    timestamps = hourly_data['time']
    # Extract data for requested parameters
    extracted_data = {param: hourly_data.get(param, []) for param in WEATHER_PARAMETERS}

    # Create a Pandas DataFrame for easier processing
    # Open-Meteo timestamps are typically ISO 8601 strings
    df_api = pd.DataFrame(extracted_data, index=pd.to_datetime(timestamps))

    # Resample to hourly and forward fill missing data if necessary to get exactly SEQUENCE_LENGTH points
    # Ensure the index is a proper datetime index
    df_api.index = pd.to_datetime(df_api.index)
    # Resample to ensure hourly frequency and fill missing gaps
    df_api = df_api.resample('H').ffill() # Use forward fill for simplicity, adjust as needed

    # Filter to the exact time range needed (last SEQUENCE_LENGTH hours)
    df_api = df_api[start_date:end_date].tail(sequence_length)

    # --- Calculate Historical AQI (Crucial Placeholder) ---
    # You need to calculate the 'calculated_aqi' for each row in df_api
    # using your calculate_overall_aqi function.
    # This requires mapping the Open-Meteo pollutant names to your aqi_breakpoints keys.

    # Placeholder: Assuming df_api has columns that map to your aqi_breakpoints keys
    # If not, you'll need to rename columns or adjust calculate_overall_aqi.

    # Example: Calculate AQI for the retrieved data
    # Need to map Open-Meteo keys ('pm2_5', 'pm10', 'carbon_monoxide')
    # to your aqi_breakpoints keys ('pm25', 'pm10', 'co').
    df_api['calculated_aqi'] = df_api.apply(
        lambda row: calculate_overall_aqi(
            {'pm25': row.get('pm2_5'), 'pm10': row.get('pm10'), 'co': row.get('carbon_monoxide')},
            aqi_breakpoints
        ),
        axis=1
    )
    # Handle potential NaNs after calculation (e.g., if pollutant data was missing)
    df_api.fillna(method='ffill', inplace=True)
    df_api.fillna(method='bfill', inplace=True)
    df_api.dropna(inplace=True) # Drop if still NaNs

    # Ensure you have exactly SEQUENCE_LENGTH data points
    if len(df_api) != sequence_length:
         print(f"Warning: Retrieved data length ({len(df_api)}) does not match sequence length ({sequence_length}).")
         # You might need more sophisticated handling here, e.g., raise an error or pad data.
         # For now, return None if the length is incorrect.
         return None

    # Reorder columns to match your model's expected input feature order:
    # ['calculated_aqi', 'temp', 'pm25', 'pm10', 'co']
    # Note: Open-Meteo uses 'temperature_2m'. Map this to 'temp'.
    # Note: Open-Meteo uses 'pm2_5', 'pm10', 'carbon_monoxide'. Map these to 'pm25', 'pm10', 'co'.

    # Create a new DataFrame with the correct columns and order
    # Ensure you map the Open-Meteo column names to your model's feature names
    # The mapping needs to be consistent with how you prepared your training data.
    model_features_order = ['calculated_aqi', 'temp', 'pm25', 'pm10', 'co'] # Your model's expected input order
    openmeteo_to_model_feature_map = {
        'calculated_aqi': 'calculated_aqi', # This is the column we just calculated
        'temperature_2m': 'temp',
        'pm2_5': 'pm25',
        'pm10': 'pm10',
        'carbon_monoxide': 'co',
        # Add other mappings if you included other pollutants in your model
    }

    # Filter and reorder columns
    processed_data = df_api.rename(columns={v: k for k, v in openmeteo_to_model_feature_map.items()}) # Rename to your model's feature names
    # Select only the features your model expects, in the correct order
    processed_data = processed_data[model_features_order].tail(sequence_length) # Use tail to ensure the last `sequence_length` points


    # Convert to numpy array
    data_sequence = processed_data.values

    # Ensure the final numpy array has the correct shape
    if data_sequence.shape != (sequence_length, num_features):
         print(f"Error: Processed data shape {data_sequence.shape} does not match expected shape ({sequence_length}, {num_features}).")
         return None

    return data_sequence

except requests.exceptions.RequestException as e:
    print(f"Error fetching data from Open-Meteo API: {e}")
    return None
except Exception as e:
    print(f"Error processing Open-Meteo data: {e}")
    import traceback
    traceback.print_exc()
    return None

--- Rest of your app.py (Load Model/Scalers, Predict Function, Gradio) ---

... (The rest of the app.py code from the previous response remains the same,

using the get_latest_data_sequence function defined above) ...

--- Define Predict Function ---

def predict(): # Inputs remain None if get_latest_data_sequence fetches data internally
"""
Retrieves the latest data sequence from Open-Meteo, preprocesses it,
and makes a prediction.
"""
if model is None or input_scaler is None or target_scaler is None:
return "Model or scaler(s) not loaded. Check logs."

# 1. Get the latest historical data sequence from Open-Meteo
latest_data_sequence = get_latest_data_sequence(SEQUENCE_LENGTH, NUM_INPUT_FEATURES)

if latest_data_sequence is None:
    return "Failed to retrieve or process latest data sequence."

# Ensure the retrieved data has the correct shape (redundant check, but safe)
if latest_data_sequence.shape != (SEQUENCE_LENGTH, NUM_INPUT_FEATURES):
    return f"Error: Retrieved data has incorrect shape {latest_data_sequence.shape}. Expected ({SEQUENCE_LENGTH}, {NUM_INPUT_FEATURES})."


# 2. Scale the data sequence using the loaded input scaler
latest_data_sequence_with_batch = latest_data_sequence[np.newaxis, :, :]
scaled_input_data = input_scaler.transform(latest_data_sequence_with_batch)

# 3. Perform prediction (outputs scaled target)
output = model.predict(scaled_input_data)

# 4. Process the output (get the scaled predicted value)
predicted_scaled_value = output[0][0]

# 5. Inverse transform the prediction using the target scaler
# Ensure target_scaler is loaded.
predicted_original_scale = target_scaler.inverse_transform(np.array([[predicted_scaled_value]]))[0][0]

predicted_value = predicted_original_scale

return float(predicted_value)

... (Gradio interface and launch) ...

nikethanreddy changed pull request status to merged May 31, 2025

Upload images, audio, and videos by dragging in the text input, pasting, or clicking here.

Tap or paste here to upload images

· Sign up or log in to comment