opinder2906 commited on
Commit
cc472a5
·
verified ·
1 Parent(s): 206264d

Create app3.py

Browse files
Files changed (1) hide show
  1. app3.py +80 -0
app3.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import seaborn as sns
6
+
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.preprocessing import LabelEncoder, StandardScaler
9
+ from sklearn.ensemble import RandomForestClassifier
10
+ from sklearn.metrics import classification_report, confusion_matrix
11
+
12
+ st.set_option('deprecation.showPyplotGlobalUse', False)
13
+
14
+ st.title("Lightweight Electric Vehicle ML App")
15
+
16
+ @st.cache_data
17
+ def load_data():
18
+ url = "https://drive.google.com/uc?export=download&id=1QBTnXxORRbJzE5Z2aqKHsVqgB7mqowiN"
19
+ return pd.read_csv(url)
20
+
21
+ df = load_data()
22
+
23
+ st.subheader("1. Dataset Preview")
24
+ st.write(df.head())
25
+
26
+ # Fill missing values
27
+ for col in df.select_dtypes(include='object').columns:
28
+ df[col] = df[col].fillna(df[col].mode()[0])
29
+ for col in df.select_dtypes(include=np.number).columns:
30
+ df[col] = df[col].fillna(df[col].median())
31
+
32
+ # Encode categorical columns
33
+ cat_cols = df.select_dtypes(include='object').columns
34
+ for col in cat_cols:
35
+ le = LabelEncoder()
36
+ df[col] = le.fit_transform(df[col])
37
+
38
+ # Create target and features
39
+ if 'Electric Range' in df.columns:
40
+ df['Target'] = (df['Electric Range'] > df['Electric Range'].median()).astype(int)
41
+ y = df['Target']
42
+ X = df.drop(columns=['Electric Range', 'Target'])
43
+ else:
44
+ st.error("Dataset missing 'Electric Range' column.")
45
+ st.stop()
46
+
47
+ # Use only top 5 numeric features
48
+ num_features = X.select_dtypes(include=np.number).columns[:5]
49
+ X = X[num_features]
50
+
51
+ # Train/Test Split
52
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
53
+
54
+ # Standardize features
55
+ scaler = StandardScaler()
56
+ X_train_scaled = scaler.fit_transform(X_train)
57
+ X_test_scaled = scaler.transform(X_test)
58
+
59
+ # Model Training
60
+ model = RandomForestClassifier(n_estimators=50, random_state=42)
61
+ model.fit(X_train_scaled, y_train)
62
+ y_pred = model.predict(X_test_scaled)
63
+
64
+ st.subheader("2. Model Evaluation")
65
+ st.text("Classification Report:")
66
+ st.text(classification_report(y_test, y_pred))
67
+
68
+ st.text("Confusion Matrix:")
69
+ st.write(confusion_matrix(y_test, y_pred))
70
+
71
+ # Optional: simple plot
72
+ if st.checkbox("Show Feature Importance"):
73
+ importances = model.feature_importances_
74
+ indices = np.argsort(importances)[::-1]
75
+ plt.figure(figsize=(8, 4))
76
+ plt.title("Feature Importances")
77
+ plt.bar(range(len(indices)), importances[indices], align="center")
78
+ plt.xticks(range(len(indices)), [num_features[i] for i in indices], rotation=45)
79
+ plt.tight_layout()
80
+ st.pyplot()