Spaces:
Sleeping
Sleeping
| # Create a Streamlit app for data analysis | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| from sklearn.decomposition import PCA | |
| from sklearn.cluster import KMeans, DBSCAN | |
| from sklearn.metrics import silhouette_score | |
| from sklearn.neighbors import LocalOutlierFactor | |
| from sklearn.ensemble import IsolationForest | |
| from sklearn.preprocessing import StandardScaler | |
| # Streamlit app | |
| st.title('Data Analysis with Streamlit') | |
| # File uploader | |
| uploaded_file = st.file_uploader("Choose a CSV file", type="csv") | |
| if uploaded_file is not None: | |
| # Read the uploaded CSV file | |
| df = pd.read_csv(uploaded_file) | |
| st.write("Data loaded successfully.") | |
| st.write(df.head()) | |
| # Exclude non-numeric columns for analysis | |
| numeric_df = df.select_dtypes(include=[np.number]) | |
| # Standardize the data | |
| scaler = StandardScaler() | |
| scaled_data = scaler.fit_transform(numeric_df) | |
| # PCA | |
| pca = PCA(n_components=2) | |
| pca_result = pca.fit_transform(scaled_data) | |
| fig, ax = plt.subplots() | |
| ax.scatter(pca_result[:, 0], pca_result[:, 1], c='blue', edgecolor='k', s=50) | |
| ax.set_title('PCA Result') | |
| ax.set_xlabel('Principal Component 1') | |
| ax.set_ylabel('Principal Component 2') | |
| st.pyplot(fig) | |
| # KMeans Clustering | |
| kmeans = KMeans(n_clusters=3, random_state=42) | |
| clusters = kmeans.fit_predict(scaled_data) | |
| fig, ax = plt.subplots() | |
| ax.scatter(pca_result[:, 0], pca_result[:, 1], c=clusters, cmap='viridis', edgecolor='k', s=50) | |
| ax.set_title('KMeans Clustering') | |
| ax.set_xlabel('Principal Component 1') | |
| ax.set_ylabel('Principal Component 2') | |
| st.pyplot(fig) | |
| # Silhouette Score | |
| silhouette_avg = silhouette_score(scaled_data, clusters) | |
| st.write('Silhouette Score:', silhouette_avg) | |
| # Local Outlier Factor (LOF) | |
| lof = LocalOutlierFactor(n_neighbors=20) | |
| lof_labels = lof.fit_predict(scaled_data) | |
| lof_outliers = np.sum(lof_labels == -1) | |
| st.write("Number of outliers detected by LOF:", lof_outliers) | |
| # Isolation Forest | |
| isolation_forest = IsolationForest(contamination=0.1, random_state=42) | |
| isolation_labels = isolation_forest.fit_predict(scaled_data) | |
| isolation_outliers = np.sum(isolation_labels == -1) | |
| st.write("Number of outliers detected by Isolation Forest:", isolation_outliers) | |
| # DBSCAN | |
| dbscan = DBSCAN(eps=0.5, min_samples=5) | |
| dbscan_labels = dbscan.fit_predict(scaled_data) | |
| silhouette_dbscan = silhouette_score(scaled_data, dbscan_labels) | |
| st.write("DBSCAN Silhouette Score:", silhouette_dbscan) | |
| # To run this Streamlit app, save it as a .py file and execute it using the command: streamlit run <filename>.py | |
| print("Streamlit app code generated.") |