Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.preprocessing import StandardScaler | |
| from sklearn.neighbors import KNeighborsClassifier | |
| from sklearn.metrics import classification_report | |
| # Load your data (replace with your actual data loading) | |
| # Assuming you have a CSV file named 'penguins_lter.csv' in your working directory | |
| try: | |
| df = pd.read_csv('penguins_lter.csv') | |
| except FileNotFoundError: | |
| st.error("Error: 'penguins_lter.csv' not found. Please upload the file or adjust the path.") | |
| st.stop() | |
| # Data preprocessing (handle missing values) | |
| numeric_cols = df.select_dtypes(include=['number']).columns | |
| for col in numeric_cols: | |
| df[col].fillna(df[col].mean(), inplace=True) | |
| categorical_cols = df.select_dtypes(exclude=['number']).columns | |
| for col in categorical_cols: | |
| df[col].fillna(df[col].mode()[0], inplace=True) | |
| # Model training and prediction (same as your original code) | |
| # Assuming 'Species' is your target variable | |
| X = df.drop('Species', axis=1) | |
| y = df['Species'] | |
| # Convert categorical features to numerical using one-hot encoding | |
| X = pd.get_dummies(X, drop_first=True) | |
| # Split data into training and testing sets | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
| # Create a pipeline | |
| pipeline = Pipeline([ | |
| ('scaler', StandardScaler()), | |
| ('knn', KNeighborsClassifier(n_neighbors=5)) | |
| ]) | |
| # Train the pipeline | |
| pipeline.fit(X_train, y_train) | |
| # Make predictions | |
| y_pred = pipeline.predict(X_test) | |
| # Streamlit app | |
| st.title("Penguin Species Classification") | |
| st.write("This app predicts the species of a penguin based on its physical characteristics.") | |
| # Display classification report | |
| st.subheader("Classification Report") | |
| st.text(classification_report(y_test, y_pred)) | |
| st.dataframe(df.head()) |