Spaces:

liry1312
/

finale

Sleeping

App Files Files Community

finale / finalassignment.py

liry1312

Upload 2 files

1693924 verified 7 months ago

raw

history blame

21.8 kB

	# -- coding: utf-8 --
	"""FinalAssignment.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1ZivSjhGy8jDJ0SDnaGdad5seVc0wMbQl
	"""

	from datasets import load_dataset

	# Load the dataset from Hugging Face
	dataset = load_dataset("nprak26/remote-worker-productivity")

	# Check the first few rows
	print(dataset["train"].to_pandas().head())

	# Print column names
	print(dataset["train"].column_names)

	# Describe numerical features
	df = dataset["train"].to_pandas()
	print(df.describe())

	import pandas as pd

	features = [
	"Task_Completion_Rate",
	"Productivity_Score",
	"Stress_Level",
	"Job_Satisfaction",
	"Efficiency_Rating",
	"Manager_Support_Level",
	"Team_Collaboration_Frequency"
	]

	# Convert categorical columns to numerical using one-hot encoding
	df_processed = pd.get_dummies(df, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)

	# Update the features list to include the new one-hot encoded columns
	features_to_scale = [
	"Task_Completion_Rate",
	"Productivity_Score",
	"Stress_Level",
	"Job_Satisfaction",
	"Efficiency_Rating"
	] + [col for col in df_processed.columns if "Manager_Support_Level_" in col or "Team_Collaboration_Frequency_" in col]


	# Normalize or scale these features for modeling
	from sklearn.preprocessing import MinMaxScaler

	scaler = MinMaxScaler()
	scaled_features = scaler.fit_transform(df_processed[features_to_scale])

	print("Features scaled successfully!")

	from sklearn.cluster import KMeans
	import matplotlib.pyplot as plt
	import seaborn as sns
	import pandas as pd

	# Assume scaled_features from earlier
	kmeans = KMeans(n_clusters=4, random_state=42)
	clusters = kmeans.fit_predict(scaled_features)

	# Add cluster labels to the DataFrame
	df["productivity_cluster"] = clusters

	# Add the 'productivity_cluster' column to the df_processed DataFrame
	df_processed['productivity_cluster'] = df['productivity_cluster']

	# The original 'features' list contains categorical columns that can't be plotted.
	# We will use the 'features_to_scale' list which contains the one-hot encoded numerical columns.
	sns.pairplot(df_processed, vars=features_to_scale, hue="productivity_cluster", palette="tab10")
	plt.suptitle("Productivity Clusters", y=1.02)
	plt.show()

	from transformers import pipeline

	generator = pipeline("text2text-generation", model="google/flan-t5-base")

	def generate_tip(cluster_data):
	# Determine the manager support level from the one-hot encoded columns
	manager_support = "High" # Base case if other flags are false
	if cluster_data['Manager_Support_Level_Very High']:
	manager_support = "Very High"
	elif cluster_data['Manager_Support_Level_Moderate']:
	manager_support = "Moderate"
	elif cluster_data['Manager_Support_Level_Low']:
	manager_support = "Low"
	elif cluster_data['Manager_Support_Level_Very Low']:
	manager_support = "Very Low"

	prompt = f"""
	Based on the following productivity profile:
	- Task Completion Rate: {cluster_data['Task_Completion_Rate']}
	- Stress Level: {cluster_data['Stress_Level']}
	- Job Satisfaction: {cluster_data['Job_Satisfaction']}
	- Manager Support Level: {manager_support}

	Suggest 1 actionable tip to improve productivity.
	"""
	return generator(prompt)[0]["generated_text"]

	sample = df_processed[df_processed["productivity_cluster"] == 2].iloc[0]
	tip = generate_tip(sample)
	print("💡 Productivity Tip:", tip)

	pip install gradio

	import gradio as gr

	# Sample cluster profiles (you can expand this later)
	cluster_examples = {
	0: "High task completion, low stress, strong manager support",
	1: "Moderate productivity, high stress, weak support",
	2: "Low satisfaction, high collaboration, low efficiency",
	3: "Balanced workload, moderate stress, good support"
	}

	def get_tip(cluster_id):
	# Simulate cluster data (replace with real sample later)
	sample = df[df["productivity_cluster"] == cluster_id].iloc[0]
	tip = generate_tip(sample)
	return cluster_examples[cluster_id], tip

	import gradio as gr
	import pandas as pd
	from sklearn.preprocessing import MinMaxScaler
	from sklearn.cluster import KMeans
	from transformers import pipeline
	from datasets import load_dataset

	# Load dataset and model
	dataset = load_dataset("nprak26/remote-worker-productivity")
	df = dataset["train"].to_pandas()

	# Select features and apply one-hot encoding for categorical variables
	features = [
	"Task_Completion_Rate",
	"Stress_Level",
	"Job_Satisfaction",
	"Manager_Support_Level",
	"Team_Collaboration_Frequency"
	]
	df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)


	# Scale the numerical features
	scaler = MinMaxScaler()
	scaled_features = scaler.fit_transform(df_processed)

	# Fit clustering model
	kmeans = KMeans(n_clusters=4, random_state=42)
	kmeans.fit(scaled_features)

	# Load text generation model
	generator = pipeline("text2text-generation", model="google/flan-t5-base")

	# Cluster descriptions
	cluster_descriptions = {
	0: "High task completion, low stress, strong manager support",
	1: "Moderate productivity, high stress, weak support",
	2: "Low satisfaction, high collaboration, low efficiency",
	3: "Balanced workload, moderate stress, good support"
	}

	# Function to process user input
	def analyze_productivity(task_completion, stress, satisfaction, support, collaboration):
	# Create a DataFrame from user input
	user_data = pd.DataFrame([{
	"Task_Completion_Rate": task_completion,
	"Stress_Level": stress,
	"Job_Satisfaction": satisfaction,
	"Manager_Support_Level": support,
	"Team_Collaboration_Frequency": collaboration
	}])

	# One-hot encode the user input
	user_processed = pd.get_dummies(user_data, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)

	# Align columns with the training data
	user_aligned = user_processed.reindex(columns=df_processed.columns, fill_value=0)

	# Scale the user input using the fitted scaler
	scaled_input = scaler.transform(user_aligned)

	# Predict the cluster
	cluster = kmeans.predict(scaled_input)[0]

	prompt = f"""
	Based on the following productivity profile:
	- Task Completion Rate: {task_completion}
	- Stress Level: {stress}
	- Job Satisfaction: {satisfaction}
	- Manager Support Level: {support}
	- Collaboration Frequency: {collaboration}

	Suggest 1 actionable tip to improve productivity.
	"""
	tip = generator(prompt)[0]["generated_text"]
	return cluster_descriptions[cluster], tip

	# Create Work_Life_Balance_Score
	df['Work_Life_Balance_Score'] = df['Work_Hours_Per_Week'] * df['Work_Life_Balance']

	# Create Communication_Overhead - Map categorical Team_Collaboration_Frequency to numerical
	collaboration_mapping = {
	'Monthly': 1,
	'Weekly': 2,
	'Few times per week': 3,
	'Daily': 4
	}
	df['Team_Collaboration_Numerical'] = df['Team_Collaboration_Frequency'].map(collaboration_mapping)
	df['Communication_Overhead'] = df['Meetings_Per_Week'] + df['Team_Collaboration_Numerical']

	# Update the features list
	features = [
	"Task_Completion_Rate",
	"Stress_Level",
	"Job_Satisfaction",
	"Manager_Support_Level",
	"Team_Collaboration_Frequency",
	"Work_Life_Balance_Score",
	"Communication_Overhead"
	]

	print("New features created and features list updated.")

	# Redefine df_processed to include new features and one-hot encode
	df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)

	# Scale all features
	scaler = MinMaxScaler()
	scaled_features = scaler.fit_transform(df_processed)

	print("df_processed updated and features scaled.")

	# Handle missing values in 'Communication_Overhead'
	df['Communication_Overhead'].fillna(df['Communication_Overhead'].median(), inplace=True)

	# Redefine df_processed to include new features and one-hot encode
	df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)

	# Scale all features
	scaler = MinMaxScaler()
	scaled_features = scaler.fit_transform(df_processed)

	# Implement the elbow method
	inertia = []
	for i in range(1, 11):
	kmeans = KMeans(n_clusters=i, random_state=42, n_init=10)
	kmeans.fit(scaled_features)
	inertia.append(kmeans.inertia_)

	# Plot the elbow curve
	plt.figure(figsize=(10, 6))
	plt.plot(range(1, 11), inertia, marker='o')
	plt.title('Elbow Method For Optimal k')
	plt.xlabel('Number of clusters')
	plt.ylabel('Inertia')
	plt.show()

	# Fit the KMeans model with the optimal number of clusters (k=3)
	kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
	clusters = kmeans.fit_predict(scaled_features)

	# Add the new cluster labels to the DataFrame
	df['productivity_cluster'] = clusters

	print("KMeans model re-fitted with 3 clusters and labels added to the DataFrame.")

	def generate_tip(task_completion, stress, satisfaction, support, collaboration, cluster_description):
	"""
	Generates an actionable productivity tip based on user input and cluster characteristics.

	Args:
	task_completion (float): User's task completion rate.
	stress (int): User's stress level.
	satisfaction (float): User's job satisfaction.
	support (str): User's manager support level (categorical).
	collaboration (str): User's team collaboration frequency (categorical).
	cluster_description (str): Description of the predicted productivity cluster.

	Returns:
	str: An actionable productivity tip.
	"""
	prompt = f"""
	Based on the following productivity profile:
	- Task Completion Rate: {task_completion}%
	- Stress Level: {stress}/10
	- Job Satisfaction: {satisfaction}%
	- Manager Support Level: {support}
	- Team Collaboration Frequency: {collaboration}

	This profile aligns with a cluster characterized by: {cluster_description}

	Suggest one highly actionable tip to improve productivity, specifically tailored to this profile and cluster characteristics. The tip should be concise and practical.
	"""
	# Assuming 'generator' is already initialized from the previous cell
	tip = generator(prompt, max_new_tokens=50)[0]["generated_text"] # Added max_new_tokens for conciseness
	return tip

	# Update the analyze_productivity function to pass the correct arguments to generate_tip
	def analyze_productivity(task_completion, stress, satisfaction, support, collaboration):
	"""
	Analyzes user productivity metrics, predicts cluster, and generates a tip.

	Args:
	task_completion (float): User's task completion rate.
	stress (int): User's stress level.
	satisfaction (float): User's job satisfaction.
	support (str): User's manager support level (categorical).
	collaboration (str): User's team collaboration frequency (categorical).

	Returns:
	tuple: (cluster_description, generated_tip)
	"""
	# Create a DataFrame from user input
	user_data = pd.DataFrame([{
	"Task_Completion_Rate": task_completion,
	"Stress_Level": stress,
	"Job_Satisfaction": satisfaction,
	"Manager_Support_Level": support,
	"Team_Collaboration_Frequency": collaboration,
	# Add placeholder for new features, these are not used for tip generation but needed for alignment
	"Work_Life_Balance_Score": 0,
	"Communication_Overhead": 0
	}])

	# One-hot encode the user input
	user_processed = pd.get_dummies(user_data, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)

	# Align columns with the training data - ensure all columns from df_processed are present
	# Get the columns used for scaling the training data
	scaled_columns_before_imputation = [col for col in df_processed.columns if col not in ['Work_Life_Balance_Score', 'Communication_Overhead']]

	# Recreate df_processed with the new features and handle potential NaNs before alignment
	temp_df_processed = pd.get_dummies(df[features], columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)
	# Impute NaNs in the temporary df_processed before getting columns for alignment
	for col in ['Work_Life_Balance_Score', 'Communication_Overhead']:
	if col in temp_df_processed.columns:
	temp_df_processed[col].fillna(temp_df_processed[col].median(), inplace=True)

	# Use the columns from the imputed temp_df_processed for alignment
	aligned_columns = temp_df_processed.columns

	user_aligned = user_processed.reindex(columns=aligned_columns, fill_value=0)

	# Scale the user input
	# Ensure the scaler is fitted on the correctly processed and imputed data
	# This part should be done once during setup, but ensuring it's based on the final scaled_features
	# scaled_features was generated from temp_df_processed after imputation and scaling
	scaled_input = scaler.transform(user_aligned)


	# Predict the cluster
	cluster = kmeans.predict(scaled_input)[0]

	# Get the cluster description
	cluster_description = cluster_descriptions[cluster]

	# Generate the tip using the refined generate_tip function
	tip = generate_tip(task_completion, stress, satisfaction, support, collaboration, cluster_description)

	return f"Cluster {cluster}: {cluster_description}", tip

	# Print a message indicating the functions have been updated
	print("generate_tip and analyze_productivity functions updated.")

	!pip install datasets

	def analyze_what_if(task_completion, stress, satisfaction, support, collaboration):
	"""
	Analyzes a 'what-if' productivity scenario and generates a tip.
	This function is similar to analyze_productivity but is used for the what-if tab.
	"""
	# Create a DataFrame from user input
	user_data = pd.DataFrame([{
	"Task_Completion_Rate": task_completion,
	"Stress_Level": stress,
	"Job_Satisfaction": satisfaction,
	"Manager_Support_Level": support,
	"Team_Collaboration_Frequency": collaboration
	}])

	# One-hot encode the user input
	user_processed = pd.get_dummies(user_data, columns=["Manager_Support_Level", "Team_Collaboration_Frequency"], drop_first=True)

	# Align columns with the training data
	user_aligned = user_processed.reindex(columns=df_processed.columns, fill_value=0)

	# Scale the user input
	scaled_input = scaler.transform(user_aligned)

	# Predict the cluster
	cluster = kmeans.predict(scaled_input)[0]

	# Get the cluster description
	cluster_description = cluster_descriptions[cluster]

	# Generate the tip
	tip = generate_tip(task_completion, stress, satisfaction, support, collaboration, cluster_description)

	return f"Cluster {cluster}: {cluster_description}", tip


	with gr.Blocks() as demo:
	gr.Markdown("# 🚀 Productivity Profiler")
	gr.Markdown("Enter your productivity metrics to get a personalized tip. This tool analyzes your input and provides an AI-generated tip to help improve your productivity.")

	with gr.Tabs():
	with gr.TabItem("Your Productivity Profile"):
	task_completion = gr.Slider(0, 100, label="Task Completion Rate (%)")
	stress = gr.Slider(1, 10, label="Stress Level (1-10)")
	satisfaction = gr.Slider(0, 100, label="Job Satisfaction (0-100)")
	support = gr.Dropdown(
	choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'],
	label="Manager Support Level"
	)
	collaboration = gr.Dropdown(
	choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'],
	label="Team Collaboration Frequency"
	)
	submit = gr.Button("Analyze")

	with gr.TabItem("Results"):
	profile_output = gr.Textbox(label="Your Productivity Cluster")
	tip_output = gr.Textbox(label="AI-Powered Productivity Tip")

	with gr.TabItem("What-If Scenario"):
	gr.Markdown("Explore how changing your metrics could affect your productivity profile.")
	what_if_task_completion = gr.Slider(0, 100, label="Adjusted Task Completion Rate (%)")
	what_if_stress = gr.Slider(1, 10, label="Adjusted Stress Level (1-10)")
	what_if_satisfaction = gr.Slider(0, 100, label="Adjusted Job Satisfaction (0-100)")
	what_if_support = gr.Dropdown(
	choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'],
	label="Adjusted Manager Support Level"
	)
	what_if_collaboration = gr.Dropdown(
	choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'],
	label="Adjusted Team Collaboration Frequency"
	)
	what_if_submit = gr.Button("Analyze What-If Scenario")

	with gr.TabItem("What-If Results"):
	what_if_profile_output = gr.Textbox(label="What-If Productivity Cluster")
	what_if_tip_output = gr.Textbox(label="What-If AI-Powered Productivity Tip")


	submit.click(
	fn=analyze_productivity,
	inputs=[task_completion, stress, satisfaction, support, collaboration],
	outputs=[profile_output, tip_output]
	)

	what_if_submit.click(
	fn=analyze_what_if,
	inputs=[what_if_task_completion, what_if_stress, what_if_satisfaction, what_if_support, what_if_collaboration],
	outputs=[what_if_profile_output, what_if_tip_output]
	)

	demo.launch(share=True)

	def save_feedback(tip, feedback):
	"""
	Saves the generated tip and user feedback to a CSV file.
	"""
	feedback_data = pd.DataFrame({"tip": [tip], "feedback": [feedback]})
	with open("feedback.csv", "a") as f:
	feedback_data.to_csv(f, header=f.tell() == 0, index=False)
	return "Feedback submitted!"


	with gr.Blocks() as demo:
	gr.Markdown("# 🚀 Productivity Profiler")
	gr.Markdown("Enter your productivity metrics to get a personalized tip. This tool analyzes your input and provides an AI-generated tip to help improve your productivity.")

	with gr.Tabs():
	with gr.TabItem("Your Productivity Profile"):
	task_completion = gr.Slider(0, 100, label="Task Completion Rate (%)")
	stress = gr.Slider(1, 10, label="Stress Level (1-10)")
	satisfaction = gr.Slider(0, 100, label="Job Satisfaction (0-100)")
	support = gr.Dropdown(
	choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'],
	label="Manager Support Level"
	)
	collaboration = gr.Dropdown(
	choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'],
	label="Team Collaboration Frequency"
	)
	submit = gr.Button("Analyze")

	with gr.TabItem("Results"):
	profile_output = gr.Textbox(label="Your Productivity Cluster")
	tip_output = gr.Textbox(label="AI-Powered Productivity Tip")
	with gr.Row():
	upvote_button = gr.Button("👍")
	downvote_button = gr.Button("👎")
	feedback_message = gr.Textbox(label="Feedback Status")

	with gr.TabItem("What-If Scenario"):
	gr.Markdown("Explore how changing your metrics could affect your productivity profile.")
	what_if_task_completion = gr.Slider(0, 100, label="Adjusted Task Completion Rate (%)")
	what_if_stress = gr.Slider(1, 10, label="Adjusted Stress Level (1-10)")
	what_if_satisfaction = gr.Slider(0, 100, label="Adjusted Job Satisfaction (0-100)")
	what_if_support = gr.Dropdown(
	choices=['Very Low', 'Low', 'Moderate', 'High', 'Very High'],
	label="Adjusted Manager Support Level"
	)
	what_if_collaboration = gr.Dropdown(
	choices=['Monthly', 'Weekly', 'Few times per week', 'Daily'],
	label="Adjusted Team Collaboration Frequency"
	)
	what_if_submit = gr.Button("Analyze What-If Scenario")

	with gr.TabItem("What-If Results"):
	what_if_profile_output = gr.Textbox(label="What-If Productivity Cluster")
	what_if_tip_output = gr.Textbox(label="What-If AI-Powered Productivity Tip")
	with gr.Row():
	what_if_upvote_button = gr.Button("👍")
	what_if_downvote_button = gr.Button("👎")
	what_if_feedback_message = gr.Textbox(label="Feedback Status")


	submit.click(
	fn=analyze_productivity,
	inputs=[task_completion, stress, satisfaction, support, collaboration],
	outputs=[profile_output, tip_output]
	)

	what_if_submit.click(
	fn=analyze_what_if,
	inputs=[what_if_task_completion, what_if_stress, what_if_satisfaction, what_if_support, what_if_collaboration],
	outputs=[what_if_profile_output, what_if_tip_output]
	)

	upvote_button.click(
	fn=save_feedback,
	inputs=[tip_output, gr.Textbox(value="👍", visible=False)],
	outputs=[feedback_message]
	)
	downvote_button.click(
	fn=save_feedback,
	inputs=[tip_output, gr.Textbox(value="👎", visible=False)],
	outputs=[feedback_message]
	)
	what_if_upvote_button.click(
	fn=save_feedback,
	inputs=[what_if_tip_output, gr.Textbox(value="👍", visible=False)],
	outputs=[what_if_feedback_message]
	)
	what_if_downvote_button.click(
	fn=save_feedback,
	inputs=[what_if_tip_output, gr.Textbox(value="👎", visible=False)],
	outputs=[what_if_feedback_message]
	)


	demo.launch(share=True)