Spaces:

orYx-models
/

scoring-engine

Build error

App Files Files Community

Vineedhar commited on Jan 4, 2025

Commit

5f28baf

verified ·

1 Parent(s): 1a83022

Create app.py

Browse files

Files changed (1) hide show

app.py +85 -0

app.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import streamlit as st
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.naive_bayes import GaussianNB
+from sklearn.metrics import accuracy_score
+from sklearn.preprocessing import LabelEncoder
+# Title of the app
+st.title("Scoring Engine")
+# File upload section
+uploaded_file = st.file_uploader("Upload your dataset (CSV format)", type="csv")
+if uploaded_file is not None:
+    # Load the dataset
+    df = pd.read_csv(uploaded_file)
+    st.write("### Uploaded Dataset")
+    st.write(df)
+    # Dynamically calculate the mean ignoring NaN values
+    df['Average_score'] = df[['Boss_score', 'Colleague_score', 'Colleague_other_score',
+                              'Report_score', 'Customer_score', 'All_raters_Score']].mean(axis=1, skipna=True)
+    # Round the calculated average score to 2 decimal places
+    df['Average_score'] = df['Average_score'].round(1)
+    # Function to calculate self-score
+    def self_score(average, benchmark):
+        if average > benchmark:
+            return "High"
+        elif average < benchmark:
+            return "Low"
+        else:
+            return "Equal"
+    # Apply the function to calculate 'Self_score'
+    df['Self_score'] = df.apply(lambda row: self_score(row['Average_score'], row['Benchmark_score']), axis=1)
+    # Encode object-type columns
+    encoded_df = df.copy()
+    le = LabelEncoder()
+    for column in encoded_df.select_dtypes(include=['object']).columns:
+        encoded_df[column] = le.fit_transform(encoded_df[column].astype(str))
+    # Fill missing values with 0
+    encoded_df = encoded_df.fillna(0)
+    # Prepare features (X) and labels (y)
+    X = encoded_df.drop(columns=['Self_score'])
+    y = encoded_df['Self_score']
+    # Split data into training and testing sets
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
+    # Train the Gaussian Naive Bayes model
+    gnb = GaussianNB()
+    gnb.fit(X_train, y_train)
+    # Make predictions and calculate confidence scores
+    y_pred = gnb.predict(X_test)
+    confidence_scores = gnb.predict_proba(X_test).max(axis=1)
+    # Evaluate the model
+    accuracy = accuracy_score(y_test, y_pred)
+    st.write(f"### Model Accuracy: {accuracy:.2f}")
+    # Predict confidence scores for the entire dataset
+    y_prob = gnb.predict_proba(X)
+    confidence_scores = y_prob.max(axis=1)
+    df['Confidence_score'] = confidence_scores
+    st.write("### Processed Dataset")
+    st.write(df)
+    # Download button for the processed dataset
+    csv = df.to_csv(index=False).encode('utf-8')
+    st.download_button(
+        label="Download Processed Dataset",
+        data=csv,
+        file_name="processed_dataset.csv",
+        mime="text/csv"
+    )
+else:
+    st.write("Please upload a dataset to begin.")