import zipfile import os import pandas as pd import gradio as gr import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder, StandardScaler from sklearn.ensemble import RandomForestClassifier # Extract ZIP file zip_path = "ROAD TRAFFIC ACCIDENTS.zip" extract_folder = "road_traffic_accidents/" with zipfile.ZipFile(zip_path, 'r') as zip_ref: zip_ref.extractall(extract_folder) # Load dataset csv_path = os.path.join(extract_folder, "cleaned.csv") df = pd.read_csv(csv_path) # Select relevant columns selected_columns = [ "Age_band_of_driver", "Sex_of_driver", "Educational_level", "Vehicle_driver_relation", "Driving_experience", "Lanes_or_Medians", "Types_of_Junction", "Road_surface_type", "Light_conditions", "Weather_conditions", "Type_of_collision", "Vehicle_movement", "Pedestrian_movement", "Cause_of_accident", "Accident_severity" ] df = df[selected_columns] # Encode categorical features using LabelEncoder label_encoders = {} for col in df.columns: if df[col].dtype == 'object': le = LabelEncoder() df[col] = le.fit_transform(df[col]) label_encoders[col] = le # Save encoder for decoding later # Encode target variable separately severity_encoder = LabelEncoder() df["Accident_severity"] = severity_encoder.fit_transform(df["Accident_severity"]) # Split data into features (X) and target (y) X = df.drop(columns=["Accident_severity"]) y = df["Accident_severity"] # Standardize numerical features scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # Train-test split X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) # Train RandomForest model model = RandomForestClassifier(n_estimators=100, random_state=42) model.fit(X_train, y_train) # Get severity mapping severity_mapping = {index: label for index, label in enumerate(severity_encoder.classes_)} # Define the prediction function def predict_accident(*features): # Convert the features back to the original categorical encoded values feature_values = [] for i, (col, le) in enumerate(label_encoders.items()): feature_values.append(le.transform([features[i]])[0]) # Convert to encoded value features_scaled = scaler.transform([feature_values]) # Scale the features prediction = model.predict(features_scaled)[0] return severity_mapping.get(prediction, "Unknown") # Create Gradio UI with dropdowns instead of text inputs input_features = [ gr.Dropdown(choices=list(label_encoders[col].classes_), label=col) for col in X.columns ] iface = gr.Interface( fn=predict_accident, inputs=input_features, outputs="text", title="Traffic Accident Severity Prediction", description="Select accident-related details to predict severity." ) # Run the app if __name__ == "__main__": iface.launch()