finale / finalassignment.py
liry1312's picture
Upload 2 files
1693924 verified
raw
history blame
21.8 kB
# -*- coding: utf-8 -*-
"""FinalAssignment.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1ZivSjhGy8jDJ0SDnaGdad5seVc0wMbQl
"""
from datasets import load_dataset
# Load the dataset from Hugging Face
dataset = load_dataset("nprak26/remote-worker-productivity")
# Check the first few rows
print(dataset["train"].to_pandas().head())
# Print column names
print(dataset["train"].column_names)
# Describe numerical features
df = dataset["train"].to_pandas()
print(df.describe())
import pandas as pd
features = [
"Task_Completion_Rate",
"Productivity_Score",
"Stress_Level",
"Job_Satisfaction",
"Efficiency_Rating",
"Manager_Support_Level",
"Team_Collaboration_Frequency"
]
# Convert categorical columns to numerical using one-hot encoding
df_processed = pd.get_dummies(df, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)
# Update the features list to include the new one-hot encoded columns
features_to_scale = [
"Task_Completion_Rate",
"Productivity_Score",
"Stress_Level",
"Job_Satisfaction",
"Efficiency_Rating"
] + [col for col in df_processed.columns if "Manager_Support_Level_" in col or "Team_Collaboration_Frequency_" in col]
# Normalize or scale these features for modeling
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df_processed[features_to_scale])
print("Features scaled successfully!")
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
# Assume scaled_features from earlier
kmeans = KMeans(n_clusters=4, random_state=42)
clusters = kmeans.fit_predict(scaled_features)
# Add cluster labels to the DataFrame
df["productivity_cluster"] = clusters
# Add the 'productivity_cluster' column to the df_processed DataFrame
df_processed['productivity_cluster'] = df['productivity_cluster']
# The original 'features' list contains categorical columns that can't be plotted.
# We will use the 'features_to_scale' list which contains the one-hot encoded numerical columns.
sns.pairplot(df_processed, vars=features_to_scale, hue="productivity_cluster", palette="tab10")
plt.suptitle("Productivity Clusters", y=1.02)
plt.show()
from transformers import pipeline
generator = pipeline("text2text-generation", model="google/flan-t5-base")
def generate_tip(cluster_data):
# Determine the manager support level from the one-hot encoded columns
manager_support = "High" # Base case if other flags are false
if cluster_data['Manager_Support_Level_Very High']:
manager_support = "Very High"
elif cluster_data['Manager_Support_Level_Moderate']:
manager_support = "Moderate"
elif cluster_data['Manager_Support_Level_Low']:
manager_support = "Low"
elif cluster_data['Manager_Support_Level_Very Low']:
manager_support = "Very Low"
prompt = f"""
Based on the following productivity profile:
- Task Completion Rate: {cluster_data['Task_Completion_Rate']}
- Stress Level: {cluster_data['Stress_Level']}
- Job Satisfaction: {cluster_data['Job_Satisfaction']}
- Manager Support Level: {manager_support}
Suggest 1 actionable tip to improve productivity.
"""
return generator(prompt)[0]["generated_text"]
sample = df_processed[df_processed["productivity_cluster"] == 2].iloc[0]
tip = generate_tip(sample)
print("πŸ’‘ Productivity Tip:", tip)
pip install gradio
import gradio as gr
# Sample cluster profiles (you can expand this later)
cluster_examples = {
0: "High task completion, low stress, strong manager support",
1: "Moderate productivity, high stress, weak support",
2: "Low satisfaction, high collaboration, low efficiency",
3: "Balanced workload, moderate stress, good support"
}
def get_tip(cluster_id):
# Simulate cluster data (replace with real sample later)
sample = df[df["productivity_cluster"] == cluster_id].iloc[0]
tip = generate_tip(sample)
return cluster_examples[cluster_id], tip
import gradio as gr
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import KMeans
from transformers import pipeline
from datasets import load_dataset
# Load dataset and model
dataset = load_dataset("nprak26/remote-worker-productivity")
df = dataset["train"].to_pandas()
# Select features and apply one-hot encoding for categorical variables
features = [
"Task_Completion_Rate",
"Stress_Level",
"Job_Satisfaction",
"Manager_Support_Level",
"Team_Collaboration_Frequency"
]
df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)
# Scale the numerical features
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df_processed)
# Fit clustering model
kmeans = KMeans(n_clusters=4, random_state=42)
kmeans.fit(scaled_features)
# Load text generation model
generator = pipeline("text2text-generation", model="google/flan-t5-base")
# Cluster descriptions
cluster_descriptions = {
0: "High task completion, low stress, strong manager support",
1: "Moderate productivity, high stress, weak support",
2: "Low satisfaction, high collaboration, low efficiency",
3: "Balanced workload, moderate stress, good support"
}
# Function to process user input
def analyze_productivity(task_completion, stress, satisfaction, support, collaboration):
# Create a DataFrame from user input
user_data = pd.DataFrame([{
"Task_Completion_Rate": task_completion,
"Stress_Level": stress,
"Job_Satisfaction": satisfaction,
"Manager_Support_Level": support,
"Team_Collaboration_Frequency": collaboration
}])
# One-hot encode the user input
user_processed = pd.get_dummies(user_data, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)
# Align columns with the training data
user_aligned = user_processed.reindex(columns=df_processed.columns, fill_value=0)
# Scale the user input using the fitted scaler
scaled_input = scaler.transform(user_aligned)
# Predict the cluster
cluster = kmeans.predict(scaled_input)[0]
prompt = f"""
Based on the following productivity profile:
- Task Completion Rate: {task_completion}
- Stress Level: {stress}
- Job Satisfaction: {satisfaction}
- Manager Support Level: {support}
- Collaboration Frequency: {collaboration}
Suggest 1 actionable tip to improve productivity.
"""
tip = generator(prompt)[0]["generated_text"]
return cluster_descriptions[cluster], tip
# Create Work_Life_Balance_Score
df['Work_Life_Balance_Score'] = df['Work_Hours_Per_Week'] * df['Work_Life_Balance']
# Create Communication_Overhead - Map categorical Team_Collaboration_Frequency to numerical
collaboration_mapping = {
'Monthly': 1,
'Weekly': 2,
'Few times per week': 3,
'Daily': 4
}
df['Team_Collaboration_Numerical'] = df['Team_Collaboration_Frequency'].map(collaboration_mapping)
df['Communication_Overhead'] = df['Meetings_Per_Week'] + df['Team_Collaboration_Numerical']
# Update the features list
features = [
"Task_Completion_Rate",
"Stress_Level",
"Job_Satisfaction",
"Manager_Support_Level",
"Team_Collaboration_Frequency",
"Work_Life_Balance_Score",
"Communication_Overhead"
]
print("New features created and features list updated.")
# Redefine df_processed to include new features and one-hot encode
df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)
# Scale all features
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df_processed)
print("df_processed updated and features scaled.")
# Handle missing values in 'Communication_Overhead'
df['Communication_Overhead'].fillna(df['Communication_Overhead'].median(), inplace=True)
# Redefine df_processed to include new features and one-hot encode
df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)
# Scale all features
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(df_processed)
# Implement the elbow method
inertia = []
for i in range(1, 11):
kmeans = KMeans(n_clusters=i, random_state=42, n_init=10)
kmeans.fit(scaled_features)
inertia.append(kmeans.inertia_)
# Plot the elbow curve
plt.figure(figsize=(10, 6))
plt.plot(range(1, 11), inertia, marker='o')
plt.title('Elbow Method For Optimal k')
plt.xlabel('Number of clusters')
plt.ylabel('Inertia')
plt.show()
# Fit the KMeans model with the optimal number of clusters (k=3)
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
clusters = kmeans.fit_predict(scaled_features)
# Add the new cluster labels to the DataFrame
df['productivity_cluster'] = clusters
print("KMeans model re-fitted with 3 clusters and labels added to the DataFrame.")
def generate_tip(task_completion, stress, satisfaction, support, collaboration, cluster_description):
"""
Generates an actionable productivity tip based on user input and cluster characteristics.
Args:
task_completion (float): User's task completion rate.
stress (int): User's stress level.
satisfaction (float): User's job satisfaction.
support (str): User's manager support level (categorical).
collaboration (str): User's team collaboration frequency (categorical).
cluster_description (str): Description of the predicted productivity cluster.
Returns:
str: An actionable productivity tip.
"""
prompt = f"""
Based on the following productivity profile:
- Task Completion Rate: {task_completion}%
- Stress Level: {stress}/10
- Job Satisfaction: {satisfaction}%
- Manager Support Level: {support}
- Team Collaboration Frequency: {collaboration}
This profile aligns with a cluster characterized by: {cluster_description}
Suggest one highly actionable tip to improve productivity, specifically tailored to this profile and cluster characteristics. The tip should be concise and practical.
"""
# Assuming 'generator' is already initialized from the previous cell
tip = generator(prompt, max_new_tokens=50)[0]["generated_text"] # Added max_new_tokens for conciseness
return tip
# Update the analyze_productivity function to pass the correct arguments to generate_tip
def analyze_productivity(task_completion, stress, satisfaction, support, collaboration):
"""
Analyzes user productivity metrics, predicts cluster, and generates a tip.
Args:
task_completion (float): User's task completion rate.
stress (int): User's stress level.
satisfaction (float): User's job satisfaction.
support (str): User's manager support level (categorical).
collaboration (str): User's team collaboration frequency (categorical).
Returns:
tuple: (cluster_description, generated_tip)
"""
# Create a DataFrame from user input
user_data = pd.DataFrame([{
"Task_Completion_Rate": task_completion,
"Stress_Level": stress,
"Job_Satisfaction": satisfaction,
"Manager_Support_Level": support,
"Team_Collaboration_Frequency": collaboration,
# Add placeholder for new features, these are not used for tip generation but needed for alignment
"Work_Life_Balance_Score": 0,
"Communication_Overhead": 0
}])
# One-hot encode the user input
user_processed = pd.get_dummies(user_data, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)
# Align columns with the training data - ensure all columns from df_processed are present
# Get the columns used for scaling the training data
scaled_columns_before_imputation = [col for col in df_processed.columns if col not in ['Work_Life_Balance_Score', 'Communication_Overhead']]
# Recreate df_processed with the new features and handle potential NaNs before alignment
temp_df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)
# Impute NaNs in the temporary df_processed before getting columns for alignment
for col in ['Work_Life_Balance_Score', 'Communication_Overhead']:
if col in temp_df_processed.columns:
temp_df_processed[col].fillna(temp_df_processed[col].median(), inplace=True)
# Use the columns from the imputed temp_df_processed for alignment
aligned_columns = temp_df_processed.columns
user_aligned = user_processed.reindex(columns=aligned_columns, fill_value=0)
# Scale the user input
# Ensure the scaler is fitted on the correctly processed and imputed data
# This part should be done once during setup, but ensuring it's based on the final scaled_features
# scaled_features was generated from temp_df_processed after imputation and scaling
scaled_input = scaler.transform(user_aligned)
# Predict the cluster
cluster = kmeans.predict(scaled_input)[0]
# Get the cluster description
cluster_description = cluster_descriptions[cluster]
# Generate the tip using the refined generate_tip function
tip = generate_tip(task_completion, stress, satisfaction, support, collaboration, cluster_description)
return f"Cluster {cluster}: {cluster_description}", tip
# Print a message indicating the functions have been updated
print("generate_tip and analyze_productivity functions updated.")
!pip install datasets
def analyze_what_if(task_completion, stress, satisfaction, support, collaboration):
"""
Analyzes a 'what-if' productivity scenario and generates a tip.
This function is similar to analyze_productivity but is used for the what-if tab.
"""
# Create a DataFrame from user input
user_data = pd.DataFrame([{
"Task_Completion_Rate": task_completion,
"Stress_Level": stress,
"Job_Satisfaction": satisfaction,
"Manager_Support_Level": support,
"Team_Collaboration_Frequency": collaboration
}])
# One-hot encode the user input
user_processed = pd.get_dummies(user_data, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)
# Align columns with the training data
user_aligned = user_processed.reindex(columns=df_processed.columns, fill_value=0)
# Scale the user input
scaled_input = scaler.transform(user_aligned)
# Predict the cluster
cluster = kmeans.predict(scaled_input)[0]
# Get the cluster description
cluster_description = cluster_descriptions[cluster]
# Generate the tip
tip = generate_tip(task_completion, stress, satisfaction, support, collaboration, cluster_description)
return f"Cluster {cluster}: {cluster_description}", tip
with gr.Blocks() as demo:
gr.Markdown("# πŸš€ Productivity Profiler")
gr.Markdown("Enter your productivity metrics to get a personalized tip. This tool analyzes your input and provides an AI-generated tip to help improve your productivity.")
with gr.Tabs():
with gr.TabItem("Your Productivity Profile"):
task_completion = gr.Slider(0, 100, label="Task Completion Rate (%)")
stress = gr.Slider(1, 10, label="Stress Level (1-10)")
satisfaction = gr.Slider(0, 100, label="Job Satisfaction (0-100)")
support = gr.Dropdown(
choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'],
label="Manager Support Level"
)
collaboration = gr.Dropdown(
choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'],
label="Team Collaboration Frequency"
)
submit = gr.Button("Analyze")
with gr.TabItem("Results"):
profile_output = gr.Textbox(label="Your Productivity Cluster")
tip_output = gr.Textbox(label="AI-Powered Productivity Tip")
with gr.TabItem("What-If Scenario"):
gr.Markdown("Explore how changing your metrics could affect your productivity profile.")
what_if_task_completion = gr.Slider(0, 100, label="Adjusted Task Completion Rate (%)")
what_if_stress = gr.Slider(1, 10, label="Adjusted Stress Level (1-10)")
what_if_satisfaction = gr.Slider(0, 100, label="Adjusted Job Satisfaction (0-100)")
what_if_support = gr.Dropdown(
choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'],
label="Adjusted Manager Support Level"
)
what_if_collaboration = gr.Dropdown(
choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'],
label="Adjusted Team Collaboration Frequency"
)
what_if_submit = gr.Button("Analyze What-If Scenario")
with gr.TabItem("What-If Results"):
what_if_profile_output = gr.Textbox(label="What-If Productivity Cluster")
what_if_tip_output = gr.Textbox(label="What-If AI-Powered Productivity Tip")
submit.click(
fn=analyze_productivity,
inputs=[task_completion, stress, satisfaction, support, collaboration],
outputs=[profile_output, tip_output]
)
what_if_submit.click(
fn=analyze_what_if,
inputs=[what_if_task_completion, what_if_stress, what_if_satisfaction, what_if_support, what_if_collaboration],
outputs=[what_if_profile_output, what_if_tip_output]
)
demo.launch(share=True)
def save_feedback(tip, feedback):
"""
Saves the generated tip and user feedback to a CSV file.
"""
feedback_data = pd.DataFrame({"tip": [tip], "feedback": [feedback]})
with open("feedback.csv", "a") as f:
feedback_data.to_csv(f, header=f.tell() == 0, index=False)
return "Feedback submitted!"
with gr.Blocks() as demo:
gr.Markdown("# πŸš€ Productivity Profiler")
gr.Markdown("Enter your productivity metrics to get a personalized tip. This tool analyzes your input and provides an AI-generated tip to help improve your productivity.")
with gr.Tabs():
with gr.TabItem("Your Productivity Profile"):
task_completion = gr.Slider(0, 100, label="Task Completion Rate (%)")
stress = gr.Slider(1, 10, label="Stress Level (1-10)")
satisfaction = gr.Slider(0, 100, label="Job Satisfaction (0-100)")
support = gr.Dropdown(
choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'],
label="Manager Support Level"
)
collaboration = gr.Dropdown(
choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'],
label="Team Collaboration Frequency"
)
submit = gr.Button("Analyze")
with gr.TabItem("Results"):
profile_output = gr.Textbox(label="Your Productivity Cluster")
tip_output = gr.Textbox(label="AI-Powered Productivity Tip")
with gr.Row():
upvote_button = gr.Button("πŸ‘")
downvote_button = gr.Button("πŸ‘Ž")
feedback_message = gr.Textbox(label="Feedback Status")
with gr.TabItem("What-If Scenario"):
gr.Markdown("Explore how changing your metrics could affect your productivity profile.")
what_if_task_completion = gr.Slider(0, 100, label="Adjusted Task Completion Rate (%)")
what_if_stress = gr.Slider(1, 10, label="Adjusted Stress Level (1-10)")
what_if_satisfaction = gr.Slider(0, 100, label="Adjusted Job Satisfaction (0-100)")
what_if_support = gr.Dropdown(
choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'],
label="Adjusted Manager Support Level"
)
what_if_collaboration = gr.Dropdown(
choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'],
label="Adjusted Team Collaboration Frequency"
)
what_if_submit = gr.Button("Analyze What-If Scenario")
with gr.TabItem("What-If Results"):
what_if_profile_output = gr.Textbox(label="What-If Productivity Cluster")
what_if_tip_output = gr.Textbox(label="What-If AI-Powered Productivity Tip")
with gr.Row():
what_if_upvote_button = gr.Button("πŸ‘")
what_if_downvote_button = gr.Button("πŸ‘Ž")
what_if_feedback_message = gr.Textbox(label="Feedback Status")
submit.click(
fn=analyze_productivity,
inputs=[task_completion, stress, satisfaction, support, collaboration],
outputs=[profile_output, tip_output]
)
what_if_submit.click(
fn=analyze_what_if,
inputs=[what_if_task_completion, what_if_stress, what_if_satisfaction, what_if_support, what_if_collaboration],
outputs=[what_if_profile_output, what_if_tip_output]
)
upvote_button.click(
fn=save_feedback,
inputs=[tip_output, gr.Textbox(value="πŸ‘", visible=False)],
outputs=[feedback_message]
)
downvote_button.click(
fn=save_feedback,
inputs=[tip_output, gr.Textbox(value="πŸ‘Ž", visible=False)],
outputs=[feedback_message]
)
what_if_upvote_button.click(
fn=save_feedback,
inputs=[what_if_tip_output, gr.Textbox(value="πŸ‘", visible=False)],
outputs=[what_if_feedback_message]
)
what_if_downvote_button.click(
fn=save_feedback,
inputs=[what_if_tip_output, gr.Textbox(value="πŸ‘Ž", visible=False)],
outputs=[what_if_feedback_message]
)
demo.launch(share=True)