Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """FinalAssignment.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1ZivSjhGy8jDJ0SDnaGdad5seVc0wMbQl | |
| """ | |
| from datasets import load_dataset | |
| # Load the dataset from Hugging Face | |
| dataset = load_dataset("nprak26/remote-worker-productivity") | |
| # Check the first few rows | |
| print(dataset["train"].to_pandas().head()) | |
| # Print column names | |
| print(dataset["train"].column_names) | |
| # Describe numerical features | |
| df = dataset["train"].to_pandas() | |
| print(df.describe()) | |
| import pandas as pd | |
| features = [ | |
| "Task_Completion_Rate", | |
| "Productivity_Score", | |
| "Stress_Level", | |
| "Job_Satisfaction", | |
| "Efficiency_Rating", | |
| "Manager_Support_Level", | |
| "Team_Collaboration_Frequency" | |
| ] | |
| # Convert categorical columns to numerical using one-hot encoding | |
| df_processed = pd.get_dummies(df, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) | |
| # Update the features list to include the new one-hot encoded columns | |
| features_to_scale = [ | |
| "Task_Completion_Rate", | |
| "Productivity_Score", | |
| "Stress_Level", | |
| "Job_Satisfaction", | |
| "Efficiency_Rating" | |
| ] + [col for col in df_processed.columns if "Manager_Support_Level_" in col or "Team_Collaboration_Frequency_" in col] | |
| # Normalize or scale these features for modeling | |
| from sklearn.preprocessing import MinMaxScaler | |
| scaler = MinMaxScaler() | |
| scaled_features = scaler.fit_transform(df_processed[features_to_scale]) | |
| print("Features scaled successfully!") | |
| from sklearn.cluster import KMeans | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| import pandas as pd | |
| # Assume scaled_features from earlier | |
| kmeans = KMeans(n_clusters=4, random_state=42) | |
| clusters = kmeans.fit_predict(scaled_features) | |
| # Add cluster labels to the DataFrame | |
| df["productivity_cluster"] = clusters | |
| # Add the 'productivity_cluster' column to the df_processed DataFrame | |
| df_processed['productivity_cluster'] = df['productivity_cluster'] | |
| # The original 'features' list contains categorical columns that can't be plotted. | |
| # We will use the 'features_to_scale' list which contains the one-hot encoded numerical columns. | |
| sns.pairplot(df_processed, vars=features_to_scale, hue="productivity_cluster", palette="tab10") | |
| plt.suptitle("Productivity Clusters", y=1.02) | |
| plt.show() | |
| from transformers import pipeline | |
| generator = pipeline("text2text-generation", model="google/flan-t5-base") | |
| def generate_tip(cluster_data): | |
| # Determine the manager support level from the one-hot encoded columns | |
| manager_support = "High" # Base case if other flags are false | |
| if cluster_data['Manager_Support_Level_Very High']: | |
| manager_support = "Very High" | |
| elif cluster_data['Manager_Support_Level_Moderate']: | |
| manager_support = "Moderate" | |
| elif cluster_data['Manager_Support_Level_Low']: | |
| manager_support = "Low" | |
| elif cluster_data['Manager_Support_Level_Very Low']: | |
| manager_support = "Very Low" | |
| prompt = f""" | |
| Based on the following productivity profile: | |
| - Task Completion Rate: {cluster_data['Task_Completion_Rate']} | |
| - Stress Level: {cluster_data['Stress_Level']} | |
| - Job Satisfaction: {cluster_data['Job_Satisfaction']} | |
| - Manager Support Level: {manager_support} | |
| Suggest 1 actionable tip to improve productivity. | |
| """ | |
| return generator(prompt)[0]["generated_text"] | |
| sample = df_processed[df_processed["productivity_cluster"] == 2].iloc[0] | |
| tip = generate_tip(sample) | |
| print("π‘ Productivity Tip:", tip) | |
| pip install gradio | |
| import gradio as gr | |
| # Sample cluster profiles (you can expand this later) | |
| cluster_examples = { | |
| 0: "High task completion, low stress, strong manager support", | |
| 1: "Moderate productivity, high stress, weak support", | |
| 2: "Low satisfaction, high collaboration, low efficiency", | |
| 3: "Balanced workload, moderate stress, good support" | |
| } | |
| def get_tip(cluster_id): | |
| # Simulate cluster data (replace with real sample later) | |
| sample = df[df["productivity_cluster"] == cluster_id].iloc[0] | |
| tip = generate_tip(sample) | |
| return cluster_examples[cluster_id], tip | |
| import gradio as gr | |
| import pandas as pd | |
| from sklearn.preprocessing import MinMaxScaler | |
| from sklearn.cluster import KMeans | |
| from transformers import pipeline | |
| from datasets import load_dataset | |
| # Load dataset and model | |
| dataset = load_dataset("nprak26/remote-worker-productivity") | |
| df = dataset["train"].to_pandas() | |
| # Select features and apply one-hot encoding for categorical variables | |
| features = [ | |
| "Task_Completion_Rate", | |
| "Stress_Level", | |
| "Job_Satisfaction", | |
| "Manager_Support_Level", | |
| "Team_Collaboration_Frequency" | |
| ] | |
| df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) | |
| # Scale the numerical features | |
| scaler = MinMaxScaler() | |
| scaled_features = scaler.fit_transform(df_processed) | |
| # Fit clustering model | |
| kmeans = KMeans(n_clusters=4, random_state=42) | |
| kmeans.fit(scaled_features) | |
| # Load text generation model | |
| generator = pipeline("text2text-generation", model="google/flan-t5-base") | |
| # Cluster descriptions | |
| cluster_descriptions = { | |
| 0: "High task completion, low stress, strong manager support", | |
| 1: "Moderate productivity, high stress, weak support", | |
| 2: "Low satisfaction, high collaboration, low efficiency", | |
| 3: "Balanced workload, moderate stress, good support" | |
| } | |
| # Function to process user input | |
| def analyze_productivity(task_completion, stress, satisfaction, support, collaboration): | |
| # Create a DataFrame from user input | |
| user_data = pd.DataFrame([{ | |
| "Task_Completion_Rate": task_completion, | |
| "Stress_Level": stress, | |
| "Job_Satisfaction": satisfaction, | |
| "Manager_Support_Level": support, | |
| "Team_Collaboration_Frequency": collaboration | |
| }]) | |
| # One-hot encode the user input | |
| user_processed = pd.get_dummies(user_data, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) | |
| # Align columns with the training data | |
| user_aligned = user_processed.reindex(columns=df_processed.columns, fill_value=0) | |
| # Scale the user input using the fitted scaler | |
| scaled_input = scaler.transform(user_aligned) | |
| # Predict the cluster | |
| cluster = kmeans.predict(scaled_input)[0] | |
| prompt = f""" | |
| Based on the following productivity profile: | |
| - Task Completion Rate: {task_completion} | |
| - Stress Level: {stress} | |
| - Job Satisfaction: {satisfaction} | |
| - Manager Support Level: {support} | |
| - Collaboration Frequency: {collaboration} | |
| Suggest 1 actionable tip to improve productivity. | |
| """ | |
| tip = generator(prompt)[0]["generated_text"] | |
| return cluster_descriptions[cluster], tip | |
| # Create Work_Life_Balance_Score | |
| df['Work_Life_Balance_Score'] = df['Work_Hours_Per_Week'] * df['Work_Life_Balance'] | |
| # Create Communication_Overhead - Map categorical Team_Collaboration_Frequency to numerical | |
| collaboration_mapping = { | |
| 'Monthly': 1, | |
| 'Weekly': 2, | |
| 'Few times per week': 3, | |
| 'Daily': 4 | |
| } | |
| df['Team_Collaboration_Numerical'] = df['Team_Collaboration_Frequency'].map(collaboration_mapping) | |
| df['Communication_Overhead'] = df['Meetings_Per_Week'] + df['Team_Collaboration_Numerical'] | |
| # Update the features list | |
| features = [ | |
| "Task_Completion_Rate", | |
| "Stress_Level", | |
| "Job_Satisfaction", | |
| "Manager_Support_Level", | |
| "Team_Collaboration_Frequency", | |
| "Work_Life_Balance_Score", | |
| "Communication_Overhead" | |
| ] | |
| print("New features created and features list updated.") | |
| # Redefine df_processed to include new features and one-hot encode | |
| df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) | |
| # Scale all features | |
| scaler = MinMaxScaler() | |
| scaled_features = scaler.fit_transform(df_processed) | |
| print("df_processed updated and features scaled.") | |
| # Handle missing values in 'Communication_Overhead' | |
| df['Communication_Overhead'].fillna(df['Communication_Overhead'].median(), inplace=True) | |
| # Redefine df_processed to include new features and one-hot encode | |
| df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) | |
| # Scale all features | |
| scaler = MinMaxScaler() | |
| scaled_features = scaler.fit_transform(df_processed) | |
| # Implement the elbow method | |
| inertia = [] | |
| for i in range(1, 11): | |
| kmeans = KMeans(n_clusters=i, random_state=42, n_init=10) | |
| kmeans.fit(scaled_features) | |
| inertia.append(kmeans.inertia_) | |
| # Plot the elbow curve | |
| plt.figure(figsize=(10, 6)) | |
| plt.plot(range(1, 11), inertia, marker='o') | |
| plt.title('Elbow Method For Optimal k') | |
| plt.xlabel('Number of clusters') | |
| plt.ylabel('Inertia') | |
| plt.show() | |
| # Fit the KMeans model with the optimal number of clusters (k=3) | |
| kmeans = KMeans(n_clusters=3, random_state=42, n_init=10) | |
| clusters = kmeans.fit_predict(scaled_features) | |
| # Add the new cluster labels to the DataFrame | |
| df['productivity_cluster'] = clusters | |
| print("KMeans model re-fitted with 3 clusters and labels added to the DataFrame.") | |
| def generate_tip(task_completion, stress, satisfaction, support, collaboration, cluster_description): | |
| """ | |
| Generates an actionable productivity tip based on user input and cluster characteristics. | |
| Args: | |
| task_completion (float): User's task completion rate. | |
| stress (int): User's stress level. | |
| satisfaction (float): User's job satisfaction. | |
| support (str): User's manager support level (categorical). | |
| collaboration (str): User's team collaboration frequency (categorical). | |
| cluster_description (str): Description of the predicted productivity cluster. | |
| Returns: | |
| str: An actionable productivity tip. | |
| """ | |
| prompt = f""" | |
| Based on the following productivity profile: | |
| - Task Completion Rate: {task_completion}% | |
| - Stress Level: {stress}/10 | |
| - Job Satisfaction: {satisfaction}% | |
| - Manager Support Level: {support} | |
| - Team Collaboration Frequency: {collaboration} | |
| This profile aligns with a cluster characterized by: {cluster_description} | |
| Suggest one highly actionable tip to improve productivity, specifically tailored to this profile and cluster characteristics. The tip should be concise and practical. | |
| """ | |
| # Assuming 'generator' is already initialized from the previous cell | |
| tip = generator(prompt, max_new_tokens=50)[0]["generated_text"] # Added max_new_tokens for conciseness | |
| return tip | |
| # Update the analyze_productivity function to pass the correct arguments to generate_tip | |
| def analyze_productivity(task_completion, stress, satisfaction, support, collaboration): | |
| """ | |
| Analyzes user productivity metrics, predicts cluster, and generates a tip. | |
| Args: | |
| task_completion (float): User's task completion rate. | |
| stress (int): User's stress level. | |
| satisfaction (float): User's job satisfaction. | |
| support (str): User's manager support level (categorical). | |
| collaboration (str): User's team collaboration frequency (categorical). | |
| Returns: | |
| tuple: (cluster_description, generated_tip) | |
| """ | |
| # Create a DataFrame from user input | |
| user_data = pd.DataFrame([{ | |
| "Task_Completion_Rate": task_completion, | |
| "Stress_Level": stress, | |
| "Job_Satisfaction": satisfaction, | |
| "Manager_Support_Level": support, | |
| "Team_Collaboration_Frequency": collaboration, | |
| # Add placeholder for new features, these are not used for tip generation but needed for alignment | |
| "Work_Life_Balance_Score": 0, | |
| "Communication_Overhead": 0 | |
| }]) | |
| # One-hot encode the user input | |
| user_processed = pd.get_dummies(user_data, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) | |
| # Align columns with the training data - ensure all columns from df_processed are present | |
| # Get the columns used for scaling the training data | |
| scaled_columns_before_imputation = [col for col in df_processed.columns if col not in ['Work_Life_Balance_Score', 'Communication_Overhead']] | |
| # Recreate df_processed with the new features and handle potential NaNs before alignment | |
| temp_df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) | |
| # Impute NaNs in the temporary df_processed before getting columns for alignment | |
| for col in ['Work_Life_Balance_Score', 'Communication_Overhead']: | |
| if col in temp_df_processed.columns: | |
| temp_df_processed[col].fillna(temp_df_processed[col].median(), inplace=True) | |
| # Use the columns from the imputed temp_df_processed for alignment | |
| aligned_columns = temp_df_processed.columns | |
| user_aligned = user_processed.reindex(columns=aligned_columns, fill_value=0) | |
| # Scale the user input | |
| # Ensure the scaler is fitted on the correctly processed and imputed data | |
| # This part should be done once during setup, but ensuring it's based on the final scaled_features | |
| # scaled_features was generated from temp_df_processed after imputation and scaling | |
| scaled_input = scaler.transform(user_aligned) | |
| # Predict the cluster | |
| cluster = kmeans.predict(scaled_input)[0] | |
| # Get the cluster description | |
| cluster_description = cluster_descriptions[cluster] | |
| # Generate the tip using the refined generate_tip function | |
| tip = generate_tip(task_completion, stress, satisfaction, support, collaboration, cluster_description) | |
| return f"Cluster {cluster}: {cluster_description}", tip | |
| # Print a message indicating the functions have been updated | |
| print("generate_tip and analyze_productivity functions updated.") | |
| !pip install datasets | |
| def analyze_what_if(task_completion, stress, satisfaction, support, collaboration): | |
| """ | |
| Analyzes a 'what-if' productivity scenario and generates a tip. | |
| This function is similar to analyze_productivity but is used for the what-if tab. | |
| """ | |
| # Create a DataFrame from user input | |
| user_data = pd.DataFrame([{ | |
| "Task_Completion_Rate": task_completion, | |
| "Stress_Level": stress, | |
| "Job_Satisfaction": satisfaction, | |
| "Manager_Support_Level": support, | |
| "Team_Collaboration_Frequency": collaboration | |
| }]) | |
| # One-hot encode the user input | |
| user_processed = pd.get_dummies(user_data, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) | |
| # Align columns with the training data | |
| user_aligned = user_processed.reindex(columns=df_processed.columns, fill_value=0) | |
| # Scale the user input | |
| scaled_input = scaler.transform(user_aligned) | |
| # Predict the cluster | |
| cluster = kmeans.predict(scaled_input)[0] | |
| # Get the cluster description | |
| cluster_description = cluster_descriptions[cluster] | |
| # Generate the tip | |
| tip = generate_tip(task_completion, stress, satisfaction, support, collaboration, cluster_description) | |
| return f"Cluster {cluster}: {cluster_description}", tip | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π Productivity Profiler") | |
| gr.Markdown("Enter your productivity metrics to get a personalized tip. This tool analyzes your input and provides an AI-generated tip to help improve your productivity.") | |
| with gr.Tabs(): | |
| with gr.TabItem("Your Productivity Profile"): | |
| task_completion = gr.Slider(0, 100, label="Task Completion Rate (%)") | |
| stress = gr.Slider(1, 10, label="Stress Level (1-10)") | |
| satisfaction = gr.Slider(0, 100, label="Job Satisfaction (0-100)") | |
| support = gr.Dropdown( | |
| choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'], | |
| label="Manager Support Level" | |
| ) | |
| collaboration = gr.Dropdown( | |
| choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'], | |
| label="Team Collaboration Frequency" | |
| ) | |
| submit = gr.Button("Analyze") | |
| with gr.TabItem("Results"): | |
| profile_output = gr.Textbox(label="Your Productivity Cluster") | |
| tip_output = gr.Textbox(label="AI-Powered Productivity Tip") | |
| with gr.TabItem("What-If Scenario"): | |
| gr.Markdown("Explore how changing your metrics could affect your productivity profile.") | |
| what_if_task_completion = gr.Slider(0, 100, label="Adjusted Task Completion Rate (%)") | |
| what_if_stress = gr.Slider(1, 10, label="Adjusted Stress Level (1-10)") | |
| what_if_satisfaction = gr.Slider(0, 100, label="Adjusted Job Satisfaction (0-100)") | |
| what_if_support = gr.Dropdown( | |
| choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'], | |
| label="Adjusted Manager Support Level" | |
| ) | |
| what_if_collaboration = gr.Dropdown( | |
| choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'], | |
| label="Adjusted Team Collaboration Frequency" | |
| ) | |
| what_if_submit = gr.Button("Analyze What-If Scenario") | |
| with gr.TabItem("What-If Results"): | |
| what_if_profile_output = gr.Textbox(label="What-If Productivity Cluster") | |
| what_if_tip_output = gr.Textbox(label="What-If AI-Powered Productivity Tip") | |
| submit.click( | |
| fn=analyze_productivity, | |
| inputs=[task_completion, stress, satisfaction, support, collaboration], | |
| outputs=[profile_output, tip_output] | |
| ) | |
| what_if_submit.click( | |
| fn=analyze_what_if, | |
| inputs=[what_if_task_completion, what_if_stress, what_if_satisfaction, what_if_support, what_if_collaboration], | |
| outputs=[what_if_profile_output, what_if_tip_output] | |
| ) | |
| demo.launch(share=True) | |
| def save_feedback(tip, feedback): | |
| """ | |
| Saves the generated tip and user feedback to a CSV file. | |
| """ | |
| feedback_data = pd.DataFrame({"tip": [tip], "feedback": [feedback]}) | |
| with open("feedback.csv", "a") as f: | |
| feedback_data.to_csv(f, header=f.tell() == 0, index=False) | |
| return "Feedback submitted!" | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# π Productivity Profiler") | |
| gr.Markdown("Enter your productivity metrics to get a personalized tip. This tool analyzes your input and provides an AI-generated tip to help improve your productivity.") | |
| with gr.Tabs(): | |
| with gr.TabItem("Your Productivity Profile"): | |
| task_completion = gr.Slider(0, 100, label="Task Completion Rate (%)") | |
| stress = gr.Slider(1, 10, label="Stress Level (1-10)") | |
| satisfaction = gr.Slider(0, 100, label="Job Satisfaction (0-100)") | |
| support = gr.Dropdown( | |
| choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'], | |
| label="Manager Support Level" | |
| ) | |
| collaboration = gr.Dropdown( | |
| choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'], | |
| label="Team Collaboration Frequency" | |
| ) | |
| submit = gr.Button("Analyze") | |
| with gr.TabItem("Results"): | |
| profile_output = gr.Textbox(label="Your Productivity Cluster") | |
| tip_output = gr.Textbox(label="AI-Powered Productivity Tip") | |
| with gr.Row(): | |
| upvote_button = gr.Button("π") | |
| downvote_button = gr.Button("π") | |
| feedback_message = gr.Textbox(label="Feedback Status") | |
| with gr.TabItem("What-If Scenario"): | |
| gr.Markdown("Explore how changing your metrics could affect your productivity profile.") | |
| what_if_task_completion = gr.Slider(0, 100, label="Adjusted Task Completion Rate (%)") | |
| what_if_stress = gr.Slider(1, 10, label="Adjusted Stress Level (1-10)") | |
| what_if_satisfaction = gr.Slider(0, 100, label="Adjusted Job Satisfaction (0-100)") | |
| what_if_support = gr.Dropdown( | |
| choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'], | |
| label="Adjusted Manager Support Level" | |
| ) | |
| what_if_collaboration = gr.Dropdown( | |
| choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'], | |
| label="Adjusted Team Collaboration Frequency" | |
| ) | |
| what_if_submit = gr.Button("Analyze What-If Scenario") | |
| with gr.TabItem("What-If Results"): | |
| what_if_profile_output = gr.Textbox(label="What-If Productivity Cluster") | |
| what_if_tip_output = gr.Textbox(label="What-If AI-Powered Productivity Tip") | |
| with gr.Row(): | |
| what_if_upvote_button = gr.Button("π") | |
| what_if_downvote_button = gr.Button("π") | |
| what_if_feedback_message = gr.Textbox(label="Feedback Status") | |
| submit.click( | |
| fn=analyze_productivity, | |
| inputs=[task_completion, stress, satisfaction, support, collaboration], | |
| outputs=[profile_output, tip_output] | |
| ) | |
| what_if_submit.click( | |
| fn=analyze_what_if, | |
| inputs=[what_if_task_completion, what_if_stress, what_if_satisfaction, what_if_support, what_if_collaboration], | |
| outputs=[what_if_profile_output, what_if_tip_output] | |
| ) | |
| upvote_button.click( | |
| fn=save_feedback, | |
| inputs=[tip_output, gr.Textbox(value="π", visible=False)], | |
| outputs=[feedback_message] | |
| ) | |
| downvote_button.click( | |
| fn=save_feedback, | |
| inputs=[tip_output, gr.Textbox(value="π", visible=False)], | |
| outputs=[feedback_message] | |
| ) | |
| what_if_upvote_button.click( | |
| fn=save_feedback, | |
| inputs=[what_if_tip_output, gr.Textbox(value="π", visible=False)], | |
| outputs=[what_if_feedback_message] | |
| ) | |
| what_if_downvote_button.click( | |
| fn=save_feedback, | |
| inputs=[what_if_tip_output, gr.Textbox(value="π", visible=False)], | |
| outputs=[what_if_feedback_message] | |
| ) | |
| demo.launch(share=True) |