Snigs98's picture
Create app.py
ac799e2 verified
import zipfile
import os
import pandas as pd
import gradio as gr
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
# Extract ZIP file
zip_path = "ROAD TRAFFIC ACCIDENTS.zip"
extract_folder = "road_traffic_accidents/"
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
zip_ref.extractall(extract_folder)
# Load dataset
csv_path = os.path.join(extract_folder, "cleaned.csv")
df = pd.read_csv(csv_path)
# Select relevant columns
selected_columns = [
"Age_band_of_driver", "Sex_of_driver", "Educational_level",
"Vehicle_driver_relation", "Driving_experience", "Lanes_or_Medians",
"Types_of_Junction", "Road_surface_type", "Light_conditions",
"Weather_conditions", "Type_of_collision", "Vehicle_movement",
"Pedestrian_movement", "Cause_of_accident", "Accident_severity"
]
df = df[selected_columns]
# Encode categorical features using LabelEncoder
label_encoders = {}
for col in df.columns:
if df[col].dtype == 'object':
le = LabelEncoder()
df[col] = le.fit_transform(df[col])
label_encoders[col] = le # Save encoder for decoding later
# Encode target variable separately
severity_encoder = LabelEncoder()
df["Accident_severity"] = severity_encoder.fit_transform(df["Accident_severity"])
# Split data into features (X) and target (y)
X = df.drop(columns=["Accident_severity"])
y = df["Accident_severity"]
# Standardize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
# Train RandomForest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Get severity mapping
severity_mapping = {index: label for index, label in enumerate(severity_encoder.classes_)}
# Define the prediction function
def predict_accident(*features):
# Convert the features back to the original categorical encoded values
feature_values = []
for i, (col, le) in enumerate(label_encoders.items()):
feature_values.append(le.transform([features[i]])[0]) # Convert to encoded value
features_scaled = scaler.transform([feature_values]) # Scale the features
prediction = model.predict(features_scaled)[0]
return severity_mapping.get(prediction, "Unknown")
# Create Gradio UI with dropdowns instead of text inputs
input_features = [
gr.Dropdown(choices=list(label_encoders[col].classes_), label=col)
for col in X.columns
]
iface = gr.Interface(
fn=predict_accident,
inputs=input_features,
outputs="text",
title="Traffic Accident Severity Prediction",
description="Select accident-related details to predict severity."
)
# Run the app
if __name__ == "__main__":
iface.launch()