opinder2906 commited on
Commit
aa36d26
·
verified ·
1 Parent(s): 831c7aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -60
app.py CHANGED
@@ -1,80 +1,44 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import numpy as np
4
- import matplotlib.pyplot as plt
5
- import seaborn as sns
6
-
7
- from sklearn.model_selection import train_test_split
8
- from sklearn.preprocessing import LabelEncoder, StandardScaler
9
  from sklearn.ensemble import RandomForestClassifier
10
- from sklearn.metrics import classification_report, confusion_matrix
11
-
12
- st.set_option('deprecation.showPyplotGlobalUse', False)
13
 
14
- st.title("Lightweight Electric Vehicle ML App")
 
15
 
16
  @st.cache_data
17
  def load_data():
18
  url = "https://drive.google.com/uc?export=download&id=1QBTnXxORRbJzE5Z2aqKHsVqgB7mqowiN"
19
  return pd.read_csv(url)
20
 
 
21
  df = load_data()
22
-
23
- st.subheader("1. Dataset Preview")
24
- st.write(df.head())
25
-
26
- # Fill missing values
27
  for col in df.select_dtypes(include='object').columns:
28
  df[col] = df[col].fillna(df[col].mode()[0])
29
- for col in df.select_dtypes(include=np.number).columns:
 
30
  df[col] = df[col].fillna(df[col].median())
31
 
32
- # Encode categorical columns
33
- cat_cols = df.select_dtypes(include='object').columns
34
- for col in cat_cols:
35
- le = LabelEncoder()
36
- df[col] = le.fit_transform(df[col])
37
-
38
- # Create target and features
39
- if 'Electric Range' in df.columns:
40
- df['Target'] = (df['Electric Range'] > df['Electric Range'].median()).astype(int)
41
- y = df['Target']
42
- X = df.drop(columns=['Electric Range', 'Target'])
43
- else:
44
- st.error("Dataset missing 'Electric Range' column.")
45
  st.stop()
46
 
47
- # Use only top 5 numeric features
48
- num_features = X.select_dtypes(include=np.number).columns[:5]
49
- X = X[num_features]
50
-
51
- # Train/Test Split
52
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
53
-
54
- # Standardize features
55
- scaler = StandardScaler()
56
- X_train_scaled = scaler.fit_transform(X_train)
57
- X_test_scaled = scaler.transform(X_test)
58
-
59
- # Model Training
60
- model = RandomForestClassifier(n_estimators=50, random_state=42)
61
- model.fit(X_train_scaled, y_train)
62
- y_pred = model.predict(X_test_scaled)
63
 
64
- st.subheader("2. Model Evaluation")
65
- st.text("Classification Report:")
66
- st.text(classification_report(y_test, y_pred))
67
 
68
- st.text("Confusion Matrix:")
69
- st.write(confusion_matrix(y_test, y_pred))
 
 
70
 
71
- # Optional: simple plot
72
- if st.checkbox("Show Feature Importance"):
73
- importances = model.feature_importances_
74
- indices = np.argsort(importances)[::-1]
75
- plt.figure(figsize=(8, 4))
76
- plt.title("Feature Importances")
77
- plt.bar(range(len(indices)), importances[indices], align="center")
78
- plt.xticks(range(len(indices)), [num_features[i] for i in indices], rotation=45)
79
- plt.tight_layout()
80
- st.pyplot()
 
1
  import streamlit as st
2
  import pandas as pd
 
 
 
 
 
 
3
  from sklearn.ensemble import RandomForestClassifier
4
+ from sklearn.model_selection import train_test_split
5
+ from sklearn.preprocessing import LabelEncoder
 
6
 
7
+ st.set_page_config(page_title="EV Predictor", layout="centered")
8
+ st.title("🔋 EV Range Classifier (Ultra-Light)")
9
 
10
  @st.cache_data
11
  def load_data():
12
  url = "https://drive.google.com/uc?export=download&id=1QBTnXxORRbJzE5Z2aqKHsVqgB7mqowiN"
13
  return pd.read_csv(url)
14
 
15
+ # Load and clean data
16
  df = load_data()
 
 
 
 
 
17
  for col in df.select_dtypes(include='object').columns:
18
  df[col] = df[col].fillna(df[col].mode()[0])
19
+ df[col] = LabelEncoder().fit_transform(df[col])
20
+ for col in df.select_dtypes(include='number').columns:
21
  df[col] = df[col].fillna(df[col].median())
22
 
23
+ # Prepare features
24
+ target_col = 'Electric Range'
25
+ if target_col not in df.columns:
26
+ st.error("Required column not found: 'Electric Range'")
 
 
 
 
 
 
 
 
 
27
  st.stop()
28
 
29
+ df['Target'] = (df[target_col] > df[target_col].median()).astype(int)
30
+ feature_cols = [col for col in df.select_dtypes(include='number').columns if col != target_col and col != 'Target'][:2]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
+ X = df[feature_cols]
33
+ y = df['Target']
 
34
 
35
+ # Train model on split
36
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
37
+ model = RandomForestClassifier(n_estimators=10, random_state=42)
38
+ model.fit(X_train, y_train)
39
 
40
+ # Output
41
+ acc = model.score(X_test, y_test)
42
+ st.success(f"✅ Accuracy: {acc:.2f}")
43
+ if st.checkbox("Show features used"):
44
+ st.write(feature_cols)