Ashar086 commited on
Commit
d39c40e
·
verified ·
1 Parent(s): 260a971

Delete machine_learning.py

Browse files
Files changed (1) hide show
  1. machine_learning.py +0 -106
machine_learning.py DELETED
@@ -1,106 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import numpy as np
4
- from sklearn.model_selection import train_test_split
5
- from sklearn.preprocessing import StandardScaler
6
- from sklearn.linear_model import LogisticRegression
7
- from sklearn.tree import DecisionTreeClassifier
8
- from sklearn.ensemble import RandomForestClassifier
9
- from sklearn.svm import SVC
10
- from sklearn.metrics import accuracy_score, classification_report
11
- from sklearn.cluster import KMeans
12
- from sklearn.decomposition import PCA
13
-
14
- class MachineLearning:
15
- def perform_ml_tasks(self, df):
16
- task_type = st.selectbox("Select ML task", ["Classification", "Clustering", "Dimensionality Reduction"])
17
-
18
- if task_type == "Classification":
19
- self.perform_classification(df)
20
- elif task_type == "Clustering":
21
- self.perform_clustering(df)
22
- elif task_type == "Dimensionality Reduction":
23
- self.perform_dimensionality_reduction(df)
24
-
25
- def perform_classification(self, df):
26
- target_column = st.selectbox("Select target column", df.columns)
27
- feature_columns = st.multiselect("Select feature columns", df.columns.drop(target_column))
28
-
29
- if len(feature_columns) > 0:
30
- X = df[feature_columns]
31
- y = df[target_column]
32
-
33
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
34
-
35
- scaler = StandardScaler()
36
- X_train_scaled = scaler.fit_transform(X_train)
37
- X_test_scaled = scaler.transform(X_test)
38
-
39
- model_type = st.selectbox("Select model type", ["Logistic Regression", "Decision Tree", "Random Forest", "SVM"])
40
-
41
- if model_type == "Logistic Regression":
42
- model = LogisticRegression()
43
- elif model_type == "Decision Tree":
44
- model = DecisionTreeClassifier()
45
- elif model_type == "Random Forest":
46
- model = RandomForestClassifier()
47
- elif model_type == "SVM":
48
- model = SVC()
49
-
50
- model.fit(X_train_scaled, y_train)
51
- y_pred = model.predict(X_test_scaled)
52
-
53
- st.subheader("Classification Results")
54
- st.write(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
55
- st.write("Classification Report:")
56
- st.code(classification_report(y_test, y_pred))
57
-
58
- def perform_clustering(self, df):
59
- feature_columns = st.multiselect("Select feature columns for clustering", df.columns)
60
-
61
- if len(feature_columns) > 0:
62
- X = df[feature_columns]
63
-
64
- scaler = StandardScaler()
65
- X_scaled = scaler.fit_transform(X)
66
-
67
- n_clusters = st.slider("Select number of clusters", min_value=2, max_value=10, value=3)
68
-
69
- kmeans = KMeans(n_clusters=n_clusters, random_state=42)
70
- cluster_labels = kmeans.fit_predict(X_scaled)
71
-
72
- df['Cluster'] = cluster_labels
73
-
74
- st.subheader("Clustering Results")
75
- if len(feature_columns) >= 2:
76
- fig = px.scatter(df, x=feature_columns[0], y=feature_columns[1], color='Cluster')
77
- st.plotly_chart(fig)
78
-
79
- st.write("Cluster Centers:")
80
- cluster_centers = scaler.inverse_transform(kmeans.cluster_centers_)
81
- st.write(pd.DataFrame(cluster_centers, columns=feature_columns))
82
-
83
- def perform_dimensionality_reduction(self, df):
84
- feature_columns = st.multiselect("Select feature columns for dimensionality reduction", df.columns)
85
-
86
- if len(feature_columns) > 0:
87
- X = df[feature_columns]
88
-
89
- scaler = StandardScaler()
90
- X_scaled = scaler.fit_transform(X)
91
-
92
- n_components = st.slider("Select number of components", min_value=2, max_value=min(len(feature_columns), 10), value=2)
93
-
94
- pca = PCA(n_components=n_components)
95
- X_pca = pca.fit_transform(X_scaled)
96
-
97
- st.subheader("PCA Results")
98
- explained_variance_ratio = pca.explained_variance_ratio_
99
- st.write(f"Explained Variance Ratio: {explained_variance_ratio}")
100
-
101
- if n_components >= 2:
102
- fig = px.scatter(x=X_pca[:, 0], y=X_pca[:, 1], title="PCA Visualization")
103
- st.plotly_chart(fig)
104
-
105
- st.write("PCA Components:")
106
- st.write(pd.DataFrame(pca.components_, columns=feature_columns))