# -*- coding: utf-8 -*-
"""FinalAssignment.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1ZivSjhGy8jDJ0SDnaGdad5seVc0wMbQl
"""

from datasets import load_dataset

# Load the dataset from Hugging Face
dataset = load_dataset("nprak26/remote-worker-productivity")

# Check the first few rows
print(dataset["train"].to_pandas().head())

# Print column names
print(dataset["train"].column_names)

# Describe numerical features
df = dataset["train"].to_pandas()
print(df.describe())

import pandas as pd

features = [
    "Task_Completion_Rate",
    "Productivity_Score",
    "Stress_Level",
    "Job_Satisfaction",
    "Efficiency_Rating",
    "Manager_Support_Level",
    "Team_Collaboration_Frequency"
]

# Convert categorical columns to numerical using one-hot encoding
df_processed = pd.get_dummies(df, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)

# Update the features list to include the new one-hot encoded columns
features_to_scale = [
    "Task_Completion_Rate",
    "Productivity_Score",
    "Stress_Level",
    "Job_Satisfaction",
    "Efficiency_Rating"
] + [col for col in df_processed.columns if "Manager_Support_Level_" in col or "Team_Collaboration_Frequency_" in col]


# Normalize or scale these features for modeling
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df_processed[features_to_scale])

print("Features scaled successfully!")

from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

# Assume scaled_features from earlier
kmeans = KMeans(n_clusters=4, random_state=42)
clusters = kmeans.fit_predict(scaled_features)

# Add cluster labels to the DataFrame
df["productivity_cluster"] = clusters

# Add the 'productivity_cluster' column to the df_processed DataFrame
df_processed['productivity_cluster'] = df['productivity_cluster']

# The original 'features' list contains categorical columns that can't be plotted.
# We will use the 'features_to_scale' list which contains the one-hot encoded numerical columns.
sns.pairplot(df_processed, vars=features_to_scale, hue="productivity_cluster", palette="tab10")
plt.suptitle("Productivity Clusters", y=1.02)
plt.show()

from transformers import pipeline

generator = pipeline("text2text-generation", model="google/flan-t5-base")

def generate_tip(cluster_data):
    # Determine the manager support level from the one-hot encoded columns
    manager_support = "High"  # Base case if other flags are false
    if cluster_data['Manager_Support_Level_Very High']:
        manager_support = "Very High"
    elif cluster_data['Manager_Support_Level_Moderate']:
        manager_support = "Moderate"
    elif cluster_data['Manager_Support_Level_Low']:
        manager_support = "Low"
    elif cluster_data['Manager_Support_Level_Very Low']:
        manager_support = "Very Low"

    prompt = f"""
    Based on the following productivity profile:
    - Task Completion Rate: {cluster_data['Task_Completion_Rate']}
    - Stress Level: {cluster_data['Stress_Level']}
    - Job Satisfaction: {cluster_data['Job_Satisfaction']}
    - Manager Support Level: {manager_support}

    Suggest 1 actionable tip to improve productivity.
    """
    return generator(prompt)[0]["generated_text"]

sample = df_processed[df_processed["productivity_cluster"] == 2].iloc[0]
tip = generate_tip(sample)
print("💡 Productivity Tip:", tip)

pip install gradio

import gradio as gr

# Sample cluster profiles (you can expand this later)
cluster_examples = {
    0: "High task completion, low stress, strong manager support",
    1: "Moderate productivity, high stress, weak support",
    2: "Low satisfaction, high collaboration, low efficiency",
    3: "Balanced workload, moderate stress, good support"
}

def get_tip(cluster_id):
    # Simulate cluster data (replace with real sample later)
    sample = df[df["productivity_cluster"] == cluster_id].iloc[0]
    tip = generate_tip(sample)
    return cluster_examples[cluster_id], tip

import gradio as gr
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
from transformers import pipeline
from datasets import load_dataset

# Load dataset and model
dataset = load_dataset("nprak26/remote-worker-productivity")
df = dataset["train"].to_pandas()

# Select features and apply one-hot encoding for categorical variables
features = [
    "Task_Completion_Rate",
    "Stress_Level",
    "Job_Satisfaction",
    "Manager_Support_Level",
    "Team_Collaboration_Frequency"
]
df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)


# Scale the numerical features
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df_processed)

# Fit clustering model
kmeans = KMeans(n_clusters=4, random_state=42)
kmeans.fit(scaled_features)

# Load text generation model
generator = pipeline("text2text-generation", model="google/flan-t5-base")

# Cluster descriptions
cluster_descriptions = {
    0: "High task completion, low stress, strong manager support",
    1: "Moderate productivity, high stress, weak support",
    2: "Low satisfaction, high collaboration, low efficiency",
    3: "Balanced workload, moderate stress, good support"
}

# Function to process user input
def analyze_productivity(task_completion, stress, satisfaction, support, collaboration):
    # Create a DataFrame from user input
    user_data = pd.DataFrame([{
        "Task_Completion_Rate": task_completion,
        "Stress_Level": stress,
        "Job_Satisfaction": satisfaction,
        "Manager_Support_Level": support,
        "Team_Collaboration_Frequency": collaboration
    }])

    # One-hot encode the user input
    user_processed = pd.get_dummies(user_data, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)

    # Align columns with the training data
    user_aligned = user_processed.reindex(columns=df_processed.columns, fill_value=0)

    # Scale the user input using the fitted scaler
    scaled_input = scaler.transform(user_aligned)

    # Predict the cluster
    cluster = kmeans.predict(scaled_input)[0]

    prompt = f"""
    Based on the following productivity profile:
    - Task Completion Rate: {task_completion}
    - Stress Level: {stress}
    - Job Satisfaction: {satisfaction}
    - Manager Support Level: {support}
    - Collaboration Frequency: {collaboration}

    Suggest 1 actionable tip to improve productivity.
    """
    tip = generator(prompt)[0]["generated_text"]
    return cluster_descriptions[cluster], tip

# Create Work_Life_Balance_Score
df['Work_Life_Balance_Score'] = df['Work_Hours_Per_Week'] * df['Work_Life_Balance']

# Create Communication_Overhead - Map categorical Team_Collaboration_Frequency to numerical
collaboration_mapping = {
    'Monthly': 1,
    'Weekly': 2,
    'Few times per week': 3,
    'Daily': 4
}
df['Team_Collaboration_Numerical'] = df['Team_Collaboration_Frequency'].map(collaboration_mapping)
df['Communication_Overhead'] = df['Meetings_Per_Week'] + df['Team_Collaboration_Numerical']

# Update the features list
features = [
    "Task_Completion_Rate",
    "Stress_Level",
    "Job_Satisfaction",
    "Manager_Support_Level",
    "Team_Collaboration_Frequency",
    "Work_Life_Balance_Score",
    "Communication_Overhead"
]

print("New features created and features list updated.")

# Redefine df_processed to include new features and one-hot encode
df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)

# Scale all features
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df_processed)

print("df_processed updated and features scaled.")

# Handle missing values in 'Communication_Overhead'
df['Communication_Overhead'].fillna(df['Communication_Overhead'].median(), inplace=True)

# Redefine df_processed to include new features and one-hot encode
df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)

# Scale all features
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df_processed)

# Implement the elbow method
inertia = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, random_state=42, n_init=10)
    kmeans.fit(scaled_features)
    inertia.append(kmeans.inertia_)

# Plot the elbow curve
plt.figure(figsize=(10, 6))
plt.plot(range(1, 11), inertia, marker='o')
plt.title('Elbow Method For Optimal k')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()

# Fit the KMeans model with the optimal number of clusters (k=3)
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
clusters = kmeans.fit_predict(scaled_features)

# Add the new cluster labels to the DataFrame
df['productivity_cluster'] = clusters

print("KMeans model re-fitted with 3 clusters and labels added to the DataFrame.")

def generate_tip(task_completion, stress, satisfaction, support, collaboration, cluster_description):
    """
    Generates an actionable productivity tip based on user input and cluster characteristics.

    Args:
        task_completion (float): User's task completion rate.
        stress (int): User's stress level.
        satisfaction (float): User's job satisfaction.
        support (str): User's manager support level (categorical).
        collaboration (str): User's team collaboration frequency (categorical).
        cluster_description (str): Description of the predicted productivity cluster.

    Returns:
        str: An actionable productivity tip.
    """
    prompt = f"""
    Based on the following productivity profile:
    - Task Completion Rate: {task_completion}%
    - Stress Level: {stress}/10
    - Job Satisfaction: {satisfaction}%
    - Manager Support Level: {support}
    - Team Collaboration Frequency: {collaboration}

    This profile aligns with a cluster characterized by: {cluster_description}

    Suggest one highly actionable tip to improve productivity, specifically tailored to this profile and cluster characteristics. The tip should be concise and practical.
    """
    # Assuming 'generator' is already initialized from the previous cell
    tip = generator(prompt, max_new_tokens=50)[0]["generated_text"] # Added max_new_tokens for conciseness
    return tip

# Update the analyze_productivity function to pass the correct arguments to generate_tip
def analyze_productivity(task_completion, stress, satisfaction, support, collaboration):
    """
    Analyzes user productivity metrics, predicts cluster, and generates a tip.

    Args:
        task_completion (float): User's task completion rate.
        stress (int): User's stress level.
        satisfaction (float): User's job satisfaction.
        support (str): User's manager support level (categorical).
        collaboration (str): User's team collaboration frequency (categorical).

    Returns:
        tuple: (cluster_description, generated_tip)
    """
    # Create a DataFrame from user input
    user_data = pd.DataFrame([{
        "Task_Completion_Rate": task_completion,
        "Stress_Level": stress,
        "Job_Satisfaction": satisfaction,
        "Manager_Support_Level": support,
        "Team_Collaboration_Frequency": collaboration,
        # Add placeholder for new features, these are not used for tip generation but needed for alignment
        "Work_Life_Balance_Score": 0,
        "Communication_Overhead": 0
    }])

    # One-hot encode the user input
    user_processed = pd.get_dummies(user_data, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)

    # Align columns with the training data - ensure all columns from df_processed are present
    # Get the columns used for scaling the training data
    scaled_columns_before_imputation = [col for col in df_processed.columns if col not in ['Work_Life_Balance_Score', 'Communication_Overhead']]

    # Recreate df_processed with the new features and handle potential NaNs before alignment
    temp_df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)
    # Impute NaNs in the temporary df_processed before getting columns for alignment
    for col in ['Work_Life_Balance_Score', 'Communication_Overhead']:
        if col in temp_df_processed.columns:
             temp_df_processed[col].fillna(temp_df_processed[col].median(), inplace=True)

    # Use the columns from the imputed temp_df_processed for alignment
    aligned_columns = temp_df_processed.columns

    user_aligned = user_processed.reindex(columns=aligned_columns, fill_value=0)

    # Scale the user input
    # Ensure the scaler is fitted on the correctly processed and imputed data
    # This part should be done once during setup, but ensuring it's based on the final scaled_features
    # scaled_features was generated from temp_df_processed after imputation and scaling
    scaled_input = scaler.transform(user_aligned)


    # Predict the cluster
    cluster = kmeans.predict(scaled_input)[0]

    # Get the cluster description
    cluster_description = cluster_descriptions[cluster]

    # Generate the tip using the refined generate_tip function
    tip = generate_tip(task_completion, stress, satisfaction, support, collaboration, cluster_description)

    return f"Cluster {cluster}: {cluster_description}", tip

# Print a message indicating the functions have been updated
print("generate_tip and analyze_productivity functions updated.")

!pip install datasets

def analyze_what_if(task_completion, stress, satisfaction, support, collaboration):
    """
    Analyzes a 'what-if' productivity scenario and generates a tip.
    This function is similar to analyze_productivity but is used for the what-if tab.
    """
    # Create a DataFrame from user input
    user_data = pd.DataFrame([{
        "Task_Completion_Rate": task_completion,
        "Stress_Level": stress,
        "Job_Satisfaction": satisfaction,
        "Manager_Support_Level": support,
        "Team_Collaboration_Frequency": collaboration
    }])

    # One-hot encode the user input
    user_processed = pd.get_dummies(user_data, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)

    # Align columns with the training data
    user_aligned = user_processed.reindex(columns=df_processed.columns, fill_value=0)

    # Scale the user input
    scaled_input = scaler.transform(user_aligned)

    # Predict the cluster
    cluster = kmeans.predict(scaled_input)[0]

    # Get the cluster description
    cluster_description = cluster_descriptions[cluster]

    # Generate the tip
    tip = generate_tip(task_completion, stress, satisfaction, support, collaboration, cluster_description)

    return f"Cluster {cluster}: {cluster_description}", tip


with gr.Blocks() as demo:
    gr.Markdown("# 🚀 Productivity Profiler")
    gr.Markdown("Enter your productivity metrics to get a personalized tip. This tool analyzes your input and provides an AI-generated tip to help improve your productivity.")

    with gr.Tabs():
        with gr.TabItem("Your Productivity Profile"):
            task_completion = gr.Slider(0, 100, label="Task Completion Rate (%)")
            stress = gr.Slider(1, 10, label="Stress Level (1-10)")
            satisfaction = gr.Slider(0, 100, label="Job Satisfaction (0-100)")
            support = gr.Dropdown(
                choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'],
                label="Manager Support Level"
            )
            collaboration = gr.Dropdown(
                choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'],
                label="Team Collaboration Frequency"
            )
            submit = gr.Button("Analyze")

        with gr.TabItem("Results"):
            profile_output = gr.Textbox(label="Your Productivity Cluster")
            tip_output = gr.Textbox(label="AI-Powered Productivity Tip")

        with gr.TabItem("What-If Scenario"):
            gr.Markdown("Explore how changing your metrics could affect your productivity profile.")
            what_if_task_completion = gr.Slider(0, 100, label="Adjusted Task Completion Rate (%)")
            what_if_stress = gr.Slider(1, 10, label="Adjusted Stress Level (1-10)")
            what_if_satisfaction = gr.Slider(0, 100, label="Adjusted Job Satisfaction (0-100)")
            what_if_support = gr.Dropdown(
                choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'],
                label="Adjusted Manager Support Level"
            )
            what_if_collaboration = gr.Dropdown(
                choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'],
                label="Adjusted Team Collaboration Frequency"
            )
            what_if_submit = gr.Button("Analyze What-If Scenario")

        with gr.TabItem("What-If Results"):
            what_if_profile_output = gr.Textbox(label="What-If Productivity Cluster")
            what_if_tip_output = gr.Textbox(label="What-If AI-Powered Productivity Tip")


    submit.click(
        fn=analyze_productivity,
        inputs=[task_completion, stress, satisfaction, support, collaboration],
        outputs=[profile_output, tip_output]
    )

    what_if_submit.click(
        fn=analyze_what_if,
        inputs=[what_if_task_completion, what_if_stress, what_if_satisfaction, what_if_support, what_if_collaboration],
        outputs=[what_if_profile_output, what_if_tip_output]
    )

demo.launch(share=True)

def save_feedback(tip, feedback):
    """
    Saves the generated tip and user feedback to a CSV file.
    """
    feedback_data = pd.DataFrame({"tip": [tip], "feedback": [feedback]})
    with open("feedback.csv", "a") as f:
        feedback_data.to_csv(f, header=f.tell() == 0, index=False)
    return "Feedback submitted!"


with gr.Blocks() as demo:
    gr.Markdown("# 🚀 Productivity Profiler")
    gr.Markdown("Enter your productivity metrics to get a personalized tip. This tool analyzes your input and provides an AI-generated tip to help improve your productivity.")

    with gr.Tabs():
        with gr.TabItem("Your Productivity Profile"):
            task_completion = gr.Slider(0, 100, label="Task Completion Rate (%)")
            stress = gr.Slider(1, 10, label="Stress Level (1-10)")
            satisfaction = gr.Slider(0, 100, label="Job Satisfaction (0-100)")
            support = gr.Dropdown(
                choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'],
                label="Manager Support Level"
            )
            collaboration = gr.Dropdown(
                choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'],
                label="Team Collaboration Frequency"
            )
            submit = gr.Button("Analyze")

        with gr.TabItem("Results"):
            profile_output = gr.Textbox(label="Your Productivity Cluster")
            tip_output = gr.Textbox(label="AI-Powered Productivity Tip")
            with gr.Row():
                upvote_button = gr.Button("👍")
                downvote_button = gr.Button("👎")
            feedback_message = gr.Textbox(label="Feedback Status")

        with gr.TabItem("What-If Scenario"):
            gr.Markdown("Explore how changing your metrics could affect your productivity profile.")
            what_if_task_completion = gr.Slider(0, 100, label="Adjusted Task Completion Rate (%)")
            what_if_stress = gr.Slider(1, 10, label="Adjusted Stress Level (1-10)")
            what_if_satisfaction = gr.Slider(0, 100, label="Adjusted Job Satisfaction (0-100)")
            what_if_support = gr.Dropdown(
                choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'],
                label="Adjusted Manager Support Level"
            )
            what_if_collaboration = gr.Dropdown(
                choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'],
                label="Adjusted Team Collaboration Frequency"
            )
            what_if_submit = gr.Button("Analyze What-If Scenario")

        with gr.TabItem("What-If Results"):
            what_if_profile_output = gr.Textbox(label="What-If Productivity Cluster")
            what_if_tip_output = gr.Textbox(label="What-If AI-Powered Productivity Tip")
            with gr.Row():
                what_if_upvote_button = gr.Button("👍")
                what_if_downvote_button = gr.Button("👎")
            what_if_feedback_message = gr.Textbox(label="Feedback Status")


    submit.click(
        fn=analyze_productivity,
        inputs=[task_completion, stress, satisfaction, support, collaboration],
        outputs=[profile_output, tip_output]
    )

    what_if_submit.click(
        fn=analyze_what_if,
        inputs=[what_if_task_completion, what_if_stress, what_if_satisfaction, what_if_support, what_if_collaboration],
        outputs=[what_if_profile_output, what_if_tip_output]
    )

    upvote_button.click(
        fn=save_feedback,
        inputs=[tip_output, gr.Textbox(value="👍", visible=False)],
        outputs=[feedback_message]
    )
    downvote_button.click(
        fn=save_feedback,
        inputs=[tip_output, gr.Textbox(value="👎", visible=False)],
        outputs=[feedback_message]
    )
    what_if_upvote_button.click(
        fn=save_feedback,
        inputs=[what_if_tip_output, gr.Textbox(value="👍", visible=False)],
        outputs=[what_if_feedback_message]
    )
    what_if_downvote_button.click(
        fn=save_feedback,
        inputs=[what_if_tip_output, gr.Textbox(value="👎", visible=False)],
        outputs=[what_if_feedback_message]
    )


demo.launch(share=True)