Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| from sklearn.model_selection import train_test_split | |
| from sklearn.naive_bayes import GaussianNB | |
| from sklearn.metrics import accuracy_score | |
| from sklearn.preprocessing import LabelEncoder | |
| # LICENSE.streamlit.Apachev2 - Copyright (c) Streamlit Inc. (2018-2022) Snowflake Inc. (2022-2024) (https://github.com/streamlit/streamlit/blob/develop/LICENSE) | |
| # LICENSE.pandas.BSD-3 - Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team (https://github.com/pandas-dev/pandas/blob/main/LICENSE) | |
| # LICENSE.sklearn.BSD-3 - Copyright (c) 2007-2024 The scikit-learn developers (https://github.com/scikit-learn/scikit-learn/blob/main/COPYING) | |
| # Title of the app | |
| st.title("Scoring Engine") | |
| # File upload section | |
| uploaded_file = st.file_uploader("Upload your dataset (CSV format)", type="csv") | |
| if uploaded_file is not None: | |
| # Load the dataset | |
| df = pd.read_csv(uploaded_file, index_col=0) | |
| # Dynamically calculate the mean ignoring NaN values | |
| df['Average_score'] = df[['Boss_score', 'Colleague_score', 'Colleague_other_score', | |
| 'Report_score', 'Customer_score']].mean(axis=1, skipna=True) | |
| # Round the calculated average score to 2 decimal places | |
| df['Average_score'] = df['Average_score'].round(1) | |
| # Function to calculate self-score | |
| def self_score(average, benchmark): | |
| if average > benchmark: | |
| return "High" | |
| elif average < benchmark: | |
| return "Low" | |
| else: | |
| return "Equal" | |
| # Apply the function to calculate 'Self_score' | |
| df['Self_score'] = df.apply(lambda row: self_score(row['Average_score'], row['Benchmark_score']), axis=1) | |
| # Encode object-type columns | |
| encoded_df = df.copy() | |
| le = LabelEncoder() | |
| for column in encoded_df.select_dtypes(include=['object']).columns: | |
| encoded_df[column] = le.fit_transform(encoded_df[column].astype(str)) | |
| # Fill missing values with 0 | |
| encoded_df = encoded_df.fillna(0) | |
| # Prepare features (X) and labels (y) | |
| X = encoded_df.drop(columns=['Self_score']) | |
| y = encoded_df['Self_score'] | |
| # Split data into training and testing sets | |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42) | |
| # Train the Gaussian Naive Bayes model | |
| gnb = GaussianNB() | |
| gnb.fit(X_train, y_train) | |
| # Make predictions and calculate confidence scores | |
| y_pred = gnb.predict(X_test) | |
| confidence_scores = gnb.predict_proba(X_test).max(axis=1) | |
| # Evaluate the model | |
| accuracy = accuracy_score(y_test, y_pred) | |
| # Predict confidence scores for the entire dataset | |
| y_prob = gnb.predict_proba(X) | |
| confidence_scores = y_prob.max(axis=1) | |
| df['Confidence_score (%)'] = confidence_scores | |
| df['Confidence_score (%)'] = df['Confidence_score (%)'] * 100 | |
| df =df.drop('All_raters_Score', axis = 1) | |
| df = df[[ 'Title', 'Code', 'Dimensions', 'Boss_score', | |
| 'Colleague_score', 'Colleague_other_score', 'Report_score', | |
| 'Customer_score', 'Benchmark_score','Average_score', | |
| 'Self_score', 'Confidence_score (%)']] | |
| st.write("### Processed Dataset") | |
| st.write(df) | |
| st.write(f"### Model Accuracy: {accuracy:.2f}") | |
| # Download button for the processed dataset | |
| csv = df.to_csv(index=False).encode('utf-8') | |
| st.download_button( | |
| label="Download Processed Dataset", | |
| data=csv, | |
| file_name="processed_dataset.csv", | |
| mime="text/csv" | |
| ) | |
| else: | |
| st.write("Please upload a dataset to begin.") | |