Spaces:
Runtime error
Runtime error
File size: 1,943 Bytes
55a920e efa5966 fda7c4e efa5966 fda7c4e 55a920e fda7c4e 55a920e fda7c4e 55a920e fda7c4e 55a920e efa5966 55a920e fda7c4e efa5966 55a920e efa5966 fda7c4e efa5966 fda7c4e efa5966 fda7c4e efa5966 fda7c4e 55a920e fda7c4e 55a920e fda7c4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, RocCurveDisplay
from sklearn.decomposition import PCA
st.set_option('deprecation.showPyplotGlobalUse', False)
st.title("Electric Vehicle ML Dashboard (Optimized for Hugging Face)")
# Load data
@st.cache_data
def load_data():
url = "https://drive.google.com/uc?export=download&id=1QBTnXxORRbJzE5Z2aqKHsVqgB7mqowiN"
return pd.read_csv(url)
df = load_data()
st.subheader("1. Data Preview")
st.dataframe(df.head())
# Fill missing values
for col in df.select_dtypes(include='object').columns:
df[col] = df[col].fillna(df[col].mode()[0])
for col in df.select_dtypes(include=np.number).columns:
df[col] = df[col].fillna(df[col].median())
# Encode categories
for col in df.select_dtypes(include='object').columns:
df[col] = LabelEncoder().fit_transform(df[col])
# Feature engineering
if 'Model Year' in df.columns:
df['Vehicle_Age'] = 2025 - df['Model Year']
# Target setup
if 'Electric Range' not in df.columns:
st.error("'Electric Range' column missing!")
st.stop()
df['Target'] = (df['Electric Range'] > df['Electric Range'].median()).astype(int)
y = df['Target']
X = df.drop(columns=['Electric Range', 'Target'])
# Feature selection via Random Forest
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
rf = RandomForestClassifier(n_estimators=50, random_state=42)
rf.fit(X_scaled, y)
top_features = pd.Series(rf.feature_importances_, index=X.columns).nlargest(5).index.tolis_
|