import streamlit as st
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

st.set_page_config(page_title="EV Predictor", layout="centered")
st.title("🔋 EV Range Classifier (Ultra-Light)")

@st.cache_data
def load_data():
    url = "https://drive.google.com/uc?export=download&id=1QBTnXxORRbJzE5Z2aqKHsVqgB7mqowiN"
    return pd.read_csv(url)

# Load and clean data
df = load_data()
for col in df.select_dtypes(include='object').columns:
    df[col] = df[col].fillna(df[col].mode()[0])
    df[col] = LabelEncoder().fit_transform(df[col])
for col in df.select_dtypes(include='number').columns:
    df[col] = df[col].fillna(df[col].median())

# Prepare features
target_col = 'Electric Range'
if target_col not in df.columns:
    st.error("Required column not found: 'Electric Range'")
    st.stop()

df['Target'] = (df[target_col] > df[target_col].median()).astype(int)
feature_cols = [col for col in df.select_dtypes(include='number').columns if col != target_col and col != 'Target'][:2]

X = df[feature_cols]
y = df['Target']

# Train model on split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=10, random_state=42)
model.fit(X_train, y_train)

# Output
acc = model.score(X_test, y_test)
st.success(f"✅ Accuracy: {acc:.2f}")
if st.checkbox("Show features used"):
    st.write(feature_cols)