# -*- coding: utf-8 -*- """FinalAssignment.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1ZivSjhGy8jDJ0SDnaGdad5seVc0wMbQl """ from datasets import load_dataset # Load the dataset from Hugging Face dataset = load_dataset("nprak26/remote-worker-productivity") # Check the first few rows print(dataset["train"].to_pandas().head()) # Print column names print(dataset["train"].column_names) # Describe numerical features df = dataset["train"].to_pandas() print(df.describe()) import pandas as pd features = [ "Task_Completion_Rate", "Productivity_Score", "Stress_Level", "Job_Satisfaction", "Efficiency_Rating", "Manager_Support_Level", "Team_Collaboration_Frequency" ] # Convert categorical columns to numerical using one-hot encoding df_processed = pd.get_dummies(df, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) # Update the features list to include the new one-hot encoded columns features_to_scale = [ "Task_Completion_Rate", "Productivity_Score", "Stress_Level", "Job_Satisfaction", "Efficiency_Rating" ] + [col for col in df_processed.columns if "Manager_Support_Level_" in col or "Team_Collaboration_Frequency_" in col] # Normalize or scale these features for modeling from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler() scaled_features = scaler.fit_transform(df_processed[features_to_scale]) print("Features scaled successfully!") from sklearn.cluster import KMeans import matplotlib.pyplot as plt import seaborn as sns import pandas as pd # Assume scaled_features from earlier kmeans = KMeans(n_clusters=4, random_state=42) clusters = kmeans.fit_predict(scaled_features) # Add cluster labels to the DataFrame df["productivity_cluster"] = clusters # Add the 'productivity_cluster' column to the df_processed DataFrame df_processed['productivity_cluster'] = df['productivity_cluster'] # The original 'features' list contains categorical columns that can't be plotted. # We will use the 'features_to_scale' list which contains the one-hot encoded numerical columns. sns.pairplot(df_processed, vars=features_to_scale, hue="productivity_cluster", palette="tab10") plt.suptitle("Productivity Clusters", y=1.02) plt.show() from transformers import pipeline generator = pipeline("text2text-generation", model="google/flan-t5-base") def generate_tip(cluster_data): # Determine the manager support level from the one-hot encoded columns manager_support = "High" # Base case if other flags are false if cluster_data['Manager_Support_Level_Very High']: manager_support = "Very High" elif cluster_data['Manager_Support_Level_Moderate']: manager_support = "Moderate" elif cluster_data['Manager_Support_Level_Low']: manager_support = "Low" elif cluster_data['Manager_Support_Level_Very Low']: manager_support = "Very Low" prompt = f""" Based on the following productivity profile: - Task Completion Rate: {cluster_data['Task_Completion_Rate']} - Stress Level: {cluster_data['Stress_Level']} - Job Satisfaction: {cluster_data['Job_Satisfaction']} - Manager Support Level: {manager_support} Suggest 1 actionable tip to improve productivity. """ return generator(prompt)[0]["generated_text"] sample = df_processed[df_processed["productivity_cluster"] == 2].iloc[0] tip = generate_tip(sample) print("💡 Productivity Tip:", tip) pip install gradio import gradio as gr # Sample cluster profiles (you can expand this later) cluster_examples = { 0: "High task completion, low stress, strong manager support", 1: "Moderate productivity, high stress, weak support", 2: "Low satisfaction, high collaboration, low efficiency", 3: "Balanced workload, moderate stress, good support" } def get_tip(cluster_id): # Simulate cluster data (replace with real sample later) sample = df[df["productivity_cluster"] == cluster_id].iloc[0] tip = generate_tip(sample) return cluster_examples[cluster_id], tip import gradio as gr import pandas as pd from sklearn.preprocessing import MinMaxScaler from sklearn.cluster import KMeans from transformers import pipeline from datasets import load_dataset # Load dataset and model dataset = load_dataset("nprak26/remote-worker-productivity") df = dataset["train"].to_pandas() # Select features and apply one-hot encoding for categorical variables features = [ "Task_Completion_Rate", "Stress_Level", "Job_Satisfaction", "Manager_Support_Level", "Team_Collaboration_Frequency" ] df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) # Scale the numerical features scaler = MinMaxScaler() scaled_features = scaler.fit_transform(df_processed) # Fit clustering model kmeans = KMeans(n_clusters=4, random_state=42) kmeans.fit(scaled_features) # Load text generation model generator = pipeline("text2text-generation", model="google/flan-t5-base") # Cluster descriptions cluster_descriptions = { 0: "High task completion, low stress, strong manager support", 1: "Moderate productivity, high stress, weak support", 2: "Low satisfaction, high collaboration, low efficiency", 3: "Balanced workload, moderate stress, good support" } # Function to process user input def analyze_productivity(task_completion, stress, satisfaction, support, collaboration): # Create a DataFrame from user input user_data = pd.DataFrame([{ "Task_Completion_Rate": task_completion, "Stress_Level": stress, "Job_Satisfaction": satisfaction, "Manager_Support_Level": support, "Team_Collaboration_Frequency": collaboration }]) # One-hot encode the user input user_processed = pd.get_dummies(user_data, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) # Align columns with the training data user_aligned = user_processed.reindex(columns=df_processed.columns, fill_value=0) # Scale the user input using the fitted scaler scaled_input = scaler.transform(user_aligned) # Predict the cluster cluster = kmeans.predict(scaled_input)[0] prompt = f""" Based on the following productivity profile: - Task Completion Rate: {task_completion} - Stress Level: {stress} - Job Satisfaction: {satisfaction} - Manager Support Level: {support} - Collaboration Frequency: {collaboration} Suggest 1 actionable tip to improve productivity. """ tip = generator(prompt)[0]["generated_text"] return cluster_descriptions[cluster], tip # Create Work_Life_Balance_Score df['Work_Life_Balance_Score'] = df['Work_Hours_Per_Week'] * df['Work_Life_Balance'] # Create Communication_Overhead - Map categorical Team_Collaboration_Frequency to numerical collaboration_mapping = { 'Monthly': 1, 'Weekly': 2, 'Few times per week': 3, 'Daily': 4 } df['Team_Collaboration_Numerical'] = df['Team_Collaboration_Frequency'].map(collaboration_mapping) df['Communication_Overhead'] = df['Meetings_Per_Week'] + df['Team_Collaboration_Numerical'] # Update the features list features = [ "Task_Completion_Rate", "Stress_Level", "Job_Satisfaction", "Manager_Support_Level", "Team_Collaboration_Frequency", "Work_Life_Balance_Score", "Communication_Overhead" ] print("New features created and features list updated.") # Redefine df_processed to include new features and one-hot encode df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) # Scale all features scaler = MinMaxScaler() scaled_features = scaler.fit_transform(df_processed) print("df_processed updated and features scaled.") # Handle missing values in 'Communication_Overhead' df['Communication_Overhead'].fillna(df['Communication_Overhead'].median(), inplace=True) # Redefine df_processed to include new features and one-hot encode df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) # Scale all features scaler = MinMaxScaler() scaled_features = scaler.fit_transform(df_processed) # Implement the elbow method inertia = [] for i in range(1, 11): kmeans = KMeans(n_clusters=i, random_state=42, n_init=10) kmeans.fit(scaled_features) inertia.append(kmeans.inertia_) # Plot the elbow curve plt.figure(figsize=(10, 6)) plt.plot(range(1, 11), inertia, marker='o') plt.title('Elbow Method For Optimal k') plt.xlabel('Number of clusters') plt.ylabel('Inertia') plt.show() # Fit the KMeans model with the optimal number of clusters (k=3) kmeans = KMeans(n_clusters=3, random_state=42, n_init=10) clusters = kmeans.fit_predict(scaled_features) # Add the new cluster labels to the DataFrame df['productivity_cluster'] = clusters print("KMeans model re-fitted with 3 clusters and labels added to the DataFrame.") def generate_tip(task_completion, stress, satisfaction, support, collaboration, cluster_description): """ Generates an actionable productivity tip based on user input and cluster characteristics. Args: task_completion (float): User's task completion rate. stress (int): User's stress level. satisfaction (float): User's job satisfaction. support (str): User's manager support level (categorical). collaboration (str): User's team collaboration frequency (categorical). cluster_description (str): Description of the predicted productivity cluster. Returns: str: An actionable productivity tip. """ prompt = f""" Based on the following productivity profile: - Task Completion Rate: {task_completion}% - Stress Level: {stress}/10 - Job Satisfaction: {satisfaction}% - Manager Support Level: {support} - Team Collaboration Frequency: {collaboration} This profile aligns with a cluster characterized by: {cluster_description} Suggest one highly actionable tip to improve productivity, specifically tailored to this profile and cluster characteristics. The tip should be concise and practical. """ # Assuming 'generator' is already initialized from the previous cell tip = generator(prompt, max_new_tokens=50)[0]["generated_text"] # Added max_new_tokens for conciseness return tip # Update the analyze_productivity function to pass the correct arguments to generate_tip def analyze_productivity(task_completion, stress, satisfaction, support, collaboration): """ Analyzes user productivity metrics, predicts cluster, and generates a tip. Args: task_completion (float): User's task completion rate. stress (int): User's stress level. satisfaction (float): User's job satisfaction. support (str): User's manager support level (categorical). collaboration (str): User's team collaboration frequency (categorical). Returns: tuple: (cluster_description, generated_tip) """ # Create a DataFrame from user input user_data = pd.DataFrame([{ "Task_Completion_Rate": task_completion, "Stress_Level": stress, "Job_Satisfaction": satisfaction, "Manager_Support_Level": support, "Team_Collaboration_Frequency": collaboration, # Add placeholder for new features, these are not used for tip generation but needed for alignment "Work_Life_Balance_Score": 0, "Communication_Overhead": 0 }]) # One-hot encode the user input user_processed = pd.get_dummies(user_data, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) # Align columns with the training data - ensure all columns from df_processed are present # Get the columns used for scaling the training data scaled_columns_before_imputation = [col for col in df_processed.columns if col not in ['Work_Life_Balance_Score', 'Communication_Overhead']] # Recreate df_processed with the new features and handle potential NaNs before alignment temp_df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) # Impute NaNs in the temporary df_processed before getting columns for alignment for col in ['Work_Life_Balance_Score', 'Communication_Overhead']: if col in temp_df_processed.columns: temp_df_processed[col].fillna(temp_df_processed[col].median(), inplace=True) # Use the columns from the imputed temp_df_processed for alignment aligned_columns = temp_df_processed.columns user_aligned = user_processed.reindex(columns=aligned_columns, fill_value=0) # Scale the user input # Ensure the scaler is fitted on the correctly processed and imputed data # This part should be done once during setup, but ensuring it's based on the final scaled_features # scaled_features was generated from temp_df_processed after imputation and scaling scaled_input = scaler.transform(user_aligned) # Predict the cluster cluster = kmeans.predict(scaled_input)[0] # Get the cluster description cluster_description = cluster_descriptions[cluster] # Generate the tip using the refined generate_tip function tip = generate_tip(task_completion, stress, satisfaction, support, collaboration, cluster_description) return f"Cluster {cluster}: {cluster_description}", tip # Print a message indicating the functions have been updated print("generate_tip and analyze_productivity functions updated.") !pip install datasets def analyze_what_if(task_completion, stress, satisfaction, support, collaboration): """ Analyzes a 'what-if' productivity scenario and generates a tip. This function is similar to analyze_productivity but is used for the what-if tab. """ # Create a DataFrame from user input user_data = pd.DataFrame([{ "Task_Completion_Rate": task_completion, "Stress_Level": stress, "Job_Satisfaction": satisfaction, "Manager_Support_Level": support, "Team_Collaboration_Frequency": collaboration }]) # One-hot encode the user input user_processed = pd.get_dummies(user_data, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True) # Align columns with the training data user_aligned = user_processed.reindex(columns=df_processed.columns, fill_value=0) # Scale the user input scaled_input = scaler.transform(user_aligned) # Predict the cluster cluster = kmeans.predict(scaled_input)[0] # Get the cluster description cluster_description = cluster_descriptions[cluster] # Generate the tip tip = generate_tip(task_completion, stress, satisfaction, support, collaboration, cluster_description) return f"Cluster {cluster}: {cluster_description}", tip with gr.Blocks() as demo: gr.Markdown("# 🚀 Productivity Profiler") gr.Markdown("Enter your productivity metrics to get a personalized tip. This tool analyzes your input and provides an AI-generated tip to help improve your productivity.") with gr.Tabs(): with gr.TabItem("Your Productivity Profile"): task_completion = gr.Slider(0, 100, label="Task Completion Rate (%)") stress = gr.Slider(1, 10, label="Stress Level (1-10)") satisfaction = gr.Slider(0, 100, label="Job Satisfaction (0-100)") support = gr.Dropdown( choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'], label="Manager Support Level" ) collaboration = gr.Dropdown( choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'], label="Team Collaboration Frequency" ) submit = gr.Button("Analyze") with gr.TabItem("Results"): profile_output = gr.Textbox(label="Your Productivity Cluster") tip_output = gr.Textbox(label="AI-Powered Productivity Tip") with gr.TabItem("What-If Scenario"): gr.Markdown("Explore how changing your metrics could affect your productivity profile.") what_if_task_completion = gr.Slider(0, 100, label="Adjusted Task Completion Rate (%)") what_if_stress = gr.Slider(1, 10, label="Adjusted Stress Level (1-10)") what_if_satisfaction = gr.Slider(0, 100, label="Adjusted Job Satisfaction (0-100)") what_if_support = gr.Dropdown( choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'], label="Adjusted Manager Support Level" ) what_if_collaboration = gr.Dropdown( choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'], label="Adjusted Team Collaboration Frequency" ) what_if_submit = gr.Button("Analyze What-If Scenario") with gr.TabItem("What-If Results"): what_if_profile_output = gr.Textbox(label="What-If Productivity Cluster") what_if_tip_output = gr.Textbox(label="What-If AI-Powered Productivity Tip") submit.click( fn=analyze_productivity, inputs=[task_completion, stress, satisfaction, support, collaboration], outputs=[profile_output, tip_output] ) what_if_submit.click( fn=analyze_what_if, inputs=[what_if_task_completion, what_if_stress, what_if_satisfaction, what_if_support, what_if_collaboration], outputs=[what_if_profile_output, what_if_tip_output] ) demo.launch(share=True) def save_feedback(tip, feedback): """ Saves the generated tip and user feedback to a CSV file. """ feedback_data = pd.DataFrame({"tip": [tip], "feedback": [feedback]}) with open("feedback.csv", "a") as f: feedback_data.to_csv(f, header=f.tell() == 0, index=False) return "Feedback submitted!" with gr.Blocks() as demo: gr.Markdown("# 🚀 Productivity Profiler") gr.Markdown("Enter your productivity metrics to get a personalized tip. This tool analyzes your input and provides an AI-generated tip to help improve your productivity.") with gr.Tabs(): with gr.TabItem("Your Productivity Profile"): task_completion = gr.Slider(0, 100, label="Task Completion Rate (%)") stress = gr.Slider(1, 10, label="Stress Level (1-10)") satisfaction = gr.Slider(0, 100, label="Job Satisfaction (0-100)") support = gr.Dropdown( choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'], label="Manager Support Level" ) collaboration = gr.Dropdown( choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'], label="Team Collaboration Frequency" ) submit = gr.Button("Analyze") with gr.TabItem("Results"): profile_output = gr.Textbox(label="Your Productivity Cluster") tip_output = gr.Textbox(label="AI-Powered Productivity Tip") with gr.Row(): upvote_button = gr.Button("👍") downvote_button = gr.Button("👎") feedback_message = gr.Textbox(label="Feedback Status") with gr.TabItem("What-If Scenario"): gr.Markdown("Explore how changing your metrics could affect your productivity profile.") what_if_task_completion = gr.Slider(0, 100, label="Adjusted Task Completion Rate (%)") what_if_stress = gr.Slider(1, 10, label="Adjusted Stress Level (1-10)") what_if_satisfaction = gr.Slider(0, 100, label="Adjusted Job Satisfaction (0-100)") what_if_support = gr.Dropdown( choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'], label="Adjusted Manager Support Level" ) what_if_collaboration = gr.Dropdown( choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'], label="Adjusted Team Collaboration Frequency" ) what_if_submit = gr.Button("Analyze What-If Scenario") with gr.TabItem("What-If Results"): what_if_profile_output = gr.Textbox(label="What-If Productivity Cluster") what_if_tip_output = gr.Textbox(label="What-If AI-Powered Productivity Tip") with gr.Row(): what_if_upvote_button = gr.Button("👍") what_if_downvote_button = gr.Button("👎") what_if_feedback_message = gr.Textbox(label="Feedback Status") submit.click( fn=analyze_productivity, inputs=[task_completion, stress, satisfaction, support, collaboration], outputs=[profile_output, tip_output] ) what_if_submit.click( fn=analyze_what_if, inputs=[what_if_task_completion, what_if_stress, what_if_satisfaction, what_if_support, what_if_collaboration], outputs=[what_if_profile_output, what_if_tip_output] ) upvote_button.click( fn=save_feedback, inputs=[tip_output, gr.Textbox(value="👍", visible=False)], outputs=[feedback_message] ) downvote_button.click( fn=save_feedback, inputs=[tip_output, gr.Textbox(value="👎", visible=False)], outputs=[feedback_message] ) what_if_upvote_button.click( fn=save_feedback, inputs=[what_if_tip_output, gr.Textbox(value="👍", visible=False)], outputs=[what_if_feedback_message] ) what_if_downvote_button.click( fn=save_feedback, inputs=[what_if_tip_output, gr.Textbox(value="👎", visible=False)], outputs=[what_if_feedback_message] ) demo.launch(share=True)