import os import subprocess import numpy as np import pandas as pd import gradio as gr import zipfile from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from sklearn.ensemble import RandomForestRegressor # Ensure required packages are installed try: import gradio as gr except ImportError: subprocess.run(["pip", "install", "gradio"], check=True) import gradio as gr # Define file paths zip_file_path = "AI-powered Weather Forecasting.zip" extract_folder = "weather_forecasting_dataset" # Extract the ZIP file with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: zip_ref.extractall(extract_folder) # Locate CSV file csv_file_path = os.path.join(extract_folder, "weatherHistory.csv") # Load the dataset df = pd.read_csv(csv_file_path) # Convert 'Formatted Date' to datetime format df['Formatted Date'] = pd.to_datetime(df['Formatted Date'], utc=True) df.set_index('Formatted Date', inplace=True) # ✅ Drop unnecessary columns df.drop(columns=['Summary', 'Daily Summary', 'Apparent Temperature (C)'], inplace=True) # ✅ Fill missing values in 'Precip Type' df['Precip Type'].fillna("rain", inplace=True) # ✅ Encode categorical variable 'Precip Type' le = LabelEncoder() df['Precip Type'] = le.fit_transform(df['Precip Type']) # ✅ Feature engineering: Extract time-based features df['Year'] = df.index.year df['Month'] = df.index.month df['Day'] = df.index.day df['Hour'] = df.index.hour # ✅ Print final feature names before training print("✅ Final Training Features:", df.columns.tolist()) # Define target variable (temperature prediction) X = df.drop(columns=['Temperature (C)']) y = df['Temperature (C)'] # ✅ Store feature names feature_names = X.columns.tolist() num_features = len(feature_names) # ✅ Split data into training and testing sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # ✅ Train RandomForestRegressor model = RandomForestRegressor(n_estimators=500, random_state=42) model.fit(X_train, y_train) # ✅ Debug: Print feature importance feature_importance = model.feature_importances_ print("📊 Feature Importance:", dict(zip(feature_names, feature_importance))) # ✅ Define Prediction Function def predict_temperature(precip_type, humidity, wind_speed, wind_bearing, visibility, pressure, loud_cover, year, month, day, hour): try: # ✅ Encode categorical variable precip_type_encoded = le.transform([precip_type])[0] # ✅ Create correct sample data (Ensures 12 Features) sample_data = np.array([[precip_type_encoded, humidity, wind_speed, wind_bearing, visibility, pressure, loud_cover, year, month, day, hour]]) # ✅ Debug: Print input features before prediction print("🔹 Prediction Input Features:", feature_names) print("🔹 Prediction Input Sample:", sample_data) # ✅ Fix feature mismatch by adding missing feature if needed if sample_data.shape[1] < num_features: missing_features = num_features - sample_data.shape[1] sample_data = np.hstack((sample_data, np.zeros((1, missing_features)))) print(f"⚠️ Added {missing_features} missing features to match model training!") # ✅ Debug: Print adjusted sample data print("🔹 Adjusted Sample Data:", sample_data) # ✅ Predict temperature prediction = model.predict(sample_data)[0] # ✅ Debug: Print final prediction value print("🔥 Final Prediction:", prediction) return f"Predicted Temperature: {prediction:.2f}°C" except Exception as e: return f"Error: {e}" # ✅ Gradio UI inputs = [ gr.Radio(["rain", "snow"], label="Precip Type"), gr.Number(label="Humidity"), gr.Number(label="Wind Speed (km/h)"), gr.Number(label="Wind Bearing (degrees)"), gr.Number(label="Visibility (km)"), gr.Number(label="Pressure (millibars)"), gr.Number(label="Loud Cover", value=0.0), # Default to 0 if always 0 gr.Number(label="Year"), gr.Number(label="Month"), gr.Number(label="Day"), gr.Number(label="Hour"), ] demo = gr.Interface(fn=predict_temperature, inputs=inputs, outputs="text", title="AI-Powered Weather Forecasting") demo.launch()