Spaces:

Pratik333
/

Efficiency_Report

Sleeping

App Files Files Community

Efficiency_Report / app.py

Pratik333

Update app.py (#1)

3075c3f verified 10 months ago

raw

history blame contribute delete

36.4 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	import io
	import base64
	import plotly.express as px
	import datetime
	import re
	from collections import Counter

	def process_genai_data(df):
	# Create a new dataframe with unique users
	unique_users = df['User'].drop_duplicates().reset_index(drop=True)
	result_df = pd.DataFrame(unique_users, columns=['User'])

	# For each unique user, find all their GenAI use case descriptions and join them
	def get_descriptions(user):
	# Filter descriptions for the current user and drop empty values
	descriptions = df[df['User'] == user]['GenAI use case description'].dropna().unique()

	# Join the descriptions with a newline and add a dash at the beginning of each line
	if len(descriptions) > 0:
	# Format each description to start with "- "
	formatted_descriptions = ["- " + desc for desc in descriptions]
	return "\n".join(formatted_descriptions)
	return ""

	# Apply the function to get concatenated descriptions for each user
	result_df['GenAI_Descriptions'] = result_df['User'].apply(get_descriptions)

	# Get the GenAI Efficiency value for the first occurrence of each user
	def get_first_efficiency(user):
	efficiency_values = df[df['User'] == user]['GenAI Efficiency (Log time in hours)']
	if len(efficiency_values) > 0:
	return efficiency_values.iloc[0]
	return np.nan

	result_df['GenAI_Efficiency'] = result_df['User'].apply(get_first_efficiency)

	# Calculate total logged hours for each user
	def get_total_logged(user):
	return df[df['User'] == user]['Logged'].sum()

	result_df['Total_Logged_Hours'] = result_df['User'].apply(get_total_logged)

	# Calculate required hours for each user
	def get_total_required(user):
	return df[df['User'] == user]['Required'].sum()

	result_df['Total_Required_Hours'] = result_df['User'].apply(get_total_required)

	# Calculate utilization percentage
	result_df['Utilization_Percentage'] = (result_df['Total_Logged_Hours'] / result_df['Total_Required_Hours'] * 100).round(2)

	# Get date range for each user
	def get_date_range(user):
	user_logs = df[df['User'] == user]
	if 'Date' in user_logs.columns and not user_logs['Date'].empty:
	dates = user_logs['Date'].dropna()
	if len(dates) > 0:
	return f"{min(dates)} to {max(dates)}"
	return "N/A"

	result_df['Date_Range'] = result_df['User'].apply(get_date_range)

	# Add description quality score
	result_df['Description_Quality_Score'] = calculate_description_quality(result_df)

	# Get project data if available
	if 'Project' in df.columns:
	# Get a list of projects for each user
	def get_projects(user):
	projects = df[df['User'] == user]['Project'].dropna().unique()
	return list(projects)

	result_df['Projects'] = result_df['User'].apply(get_projects)

	return result_df

	def analyze_projects_by_genai_hours(df):
	"""
	Analyzes which projects have the highest GenAI efficiency log hours
	Returns a dataframe with projects and their total GenAI hours

	Fix: Ensure we don't double-count hours by first getting unique Project-User combinations
	"""
	if 'Project' not in df.columns:
	return None

	# First, get unique Project-User combinations with their GenAI hours
	# This avoids double-counting hours for the same user on the same project
	user_project_hours = df.groupby(['Project', 'User'])['GenAI Efficiency (Log time in hours)'].first().reset_index()

	# Now sum up the hours by project
	project_hours = user_project_hours.groupby('Project')['GenAI Efficiency (Log time in hours)'].sum().reset_index()
	project_hours = project_hours.sort_values('GenAI Efficiency (Log time in hours)', ascending=False)
	project_hours.columns = ['Project', 'Total_GenAI_Hours']

	# Add user count per project
	project_users = df.groupby('Project')['User'].nunique().reset_index()
	project_users.columns = ['Project', 'User_Count']

	# Merge the dataframes
	project_analysis = pd.merge(project_hours, project_users, on='Project')

	return project_analysis

	def extract_ai_tools_from_descriptions(df):
	"""
	Extracts and counts AI tools mentioned in GenAI descriptions
	Returns a Counter object with tools and their frequencies
	"""
	# Common AI tools and platforms to look for
	ai_tools = [
	'chatgpt', 'gpt-4', 'gpt-3', 'gpt', 'openai',
	'claude', 'anthropic',
	'gemini', 'bard', 'google ai',
	'copilot', 'github copilot', 'microsoft copilot',
	'dall-e', 'midjourney', 'stable diffusion',
	'hugging face', 'transformers',
	'bert', 'llama', 'mistral',
	'tensorflow', 'pytorch', 'ml',
	'jupyter', 'colab',
	'langchain', 'llm', 'rag'
	]

	# Dictionary to store normalized tool names
	tool_mapping = {
	'gpt': 'ChatGPT/GPT',
	'gpt-3': 'ChatGPT/GPT',
	'gpt-4': 'ChatGPT/GPT',
	'chatgpt': 'ChatGPT/GPT',
	'openai': 'OpenAI',
	'claude': 'Claude',
	'anthropic': 'Claude',
	'gemini': 'Google AI',
	'bard': 'Google AI',
	'google ai': 'Google AI',
	'copilot': 'GitHub Copilot',
	'github copilot': 'GitHub Copilot',
	'microsoft copilot': 'Microsoft Copilot',
	'dall-e': 'DALL-E',
	'midjourney': 'Midjourney',
	'stable diffusion': 'Stable Diffusion',
	'hugging face': 'Hugging Face',
	'transformers': 'Transformers',
	'bert': 'BERT',
	'llama': 'LLaMA',
	'mistral': 'Mistral AI',
	'tensorflow': 'TensorFlow',
	'pytorch': 'PyTorch',
	'ml': 'Machine Learning',
	'jupyter': 'Jupyter',
	'colab': 'Google Colab',
	'langchain': 'LangChain',
	'llm': 'Large Language Models',
	'rag': 'Retrieval Augmented Generation'
	}

	# Extract all GenAI descriptions
	all_descriptions = " ".join(df['GenAI use case description'].dropna().astype(str).tolist()).lower()

	# Count occurrences of each tool
	tool_counts = Counter()
	for tool in ai_tools:
	count = len(re.findall(r'\b' + re.escape(tool) + r'\b', all_descriptions))
	if count > 0:
	normalized_tool = tool_mapping.get(tool, tool)
	tool_counts[normalized_tool] += count

	return tool_counts

	def extract_use_cases_from_descriptions(df):
	"""
	Analyzes GenAI descriptions to identify common use cases
	Returns a Counter object with use cases and their frequencies
	"""
	# Common use case categories to look for
	use_case_keywords = {
	'Code Generation': ['code', 'coding', 'programming', 'script', 'develop', 'algorithm'],
	'Content Creation': ['content', 'write', 'writing', 'draft', 'article', 'blog'],
	'Data Analysis': ['data', 'analysis', 'analyze', 'analytics', 'statistics', 'insights'],
	'Documentation': ['document', 'documentation', 'manual', 'guide', 'readme'],
	'Research': ['research', 'study', 'investigate', 'explore', 'literature'],
	'Summarization': ['summary', 'summarize', 'summarization', 'extract key points'],
	'Translation': ['translate', 'translation', 'language', 'localize'],
	'Image Generation': ['image', 'picture', 'graphic', 'design', 'draw', 'art'],
	'Chatbot': ['chatbot', 'chat', 'conversation', 'dialogue', 'assistant'],
	'Automation': ['automate', 'automation', 'workflow', 'process', 'routine'],
	'Training': ['train', 'training', 'learn', 'learning', 'education'],
	'Testing': ['test', 'testing', 'QA', 'quality assurance', 'debug']
	}

	# Extract all GenAI descriptions
	descriptions = df['GenAI use case description'].dropna().astype(str).tolist()

	# Count occurrences of each use case
	use_case_counts = Counter()

	for description in descriptions:
	description_lower = description.lower()
	for use_case, keywords in use_case_keywords.items():
	for keyword in keywords:
	if re.search(r'\b' + re.escape(keyword) + r'\b', description_lower):
	use_case_counts[use_case] += 1
	break # Count each use case only once per description

	return use_case_counts

	def calculate_description_quality(df):
	"""
	Calculates a quality score for each user's GenAI description
	Score is based on length, specificity, and uniqueness
	Returns a Series with quality scores
	"""
	# Get descriptions column
	descriptions = df['GenAI_Descriptions']

	# Initialize scores
	scores = pd.Series(0, index=df.index)

	# Factor 1: Length score (longer descriptions get more points)
	char_counts = descriptions.str.len()
	max_char_count = char_counts.max() if not char_counts.empty else 1
	length_score = (char_counts / max_char_count) * 40 # 40% weight to length

	# Factor 2: Specificity score (mentions of specific tools or numbers)
	def specificity_score(desc):
	if not isinstance(desc, str) or desc.strip() == "":
	return 0

	score = 0
	# Check for specific AI tools
	ai_tools = ['gpt', 'chatgpt', 'claude', 'gemini', 'copilot', 'dall-e', 'midjourney']
	for tool in ai_tools:
	if re.search(r'\b' + re.escape(tool) + r'\b', desc.lower()):
	score += 5

	# Check for numbers (could indicate metrics or specific examples)
	if re.search(r'\d+', desc):
	score += 5

	# Check for detailed explanations
	if len(desc.split()) > 50: # Long descriptions
	score += 10

	return min(score, 30) # Cap at 30% weight

	specificity_scores = descriptions.apply(specificity_score)

	# Factor 3: Uniqueness score
	def uniqueness_score(desc):
	if not isinstance(desc, str) or desc.strip() == "":
	return 0

	# Simple word tokenization by splitting on whitespace
	words = desc.lower().split()

	# Remove common stop words and short words
	common_stopwords = {"a", "an", "the", "and", "or", "but", "is", "are", "was", "were",
	"in", "on", "at", "to", "for", "with", "by", "about", "of", "this",
	"that", "i", "we", "you", "he", "she", "they", "it", "have", "has"}

	# Filter out stopwords and short words
	filtered_words = [word for word in words if word not in common_stopwords and len(word) > 2]

	# Unique words ratio
	if filtered_words:
	uniqueness = len(set(filtered_words)) / len(filtered_words)
	return uniqueness * 30 # 30% weight to uniqueness
	return 0

	uniqueness_scores = descriptions.apply(uniqueness_score)

	# Combine scores
	total_scores = length_score + specificity_scores + uniqueness_scores

	# Normalize to 0-100 scale
	max_score = total_scores.max() if not total_scores.empty else 1
	normalized_scores = (total_scores / max_score * 100).round(1)

	return normalized_scores

	def get_download_link(df, filename):
	"""Generate a download link for the dataframe as an Excel file"""
	output = io.BytesIO()
	with pd.ExcelWriter(output, engine='openpyxl') as writer:
	df.to_excel(writer, index=False, sheet_name='Processed Data')

	# Add a summary sheet
	summary = pd.DataFrame({
	'Metric': ['Total Users', 'Average GenAI Efficiency (hours)', 'Average Utilization (%)',
	'Top GenAI User', 'Top Quality Score'],
	'Value': [
	len(df),
	round(df['GenAI_Efficiency'].mean(), 2),
	round(df['Utilization_Percentage'].mean(), 2),
	df.loc[df['GenAI_Efficiency'].idxmax(), 'User'] if not df['GenAI_Efficiency'].isna().all() else 'N/A',
	df.loc[df['Description_Quality_Score'].idxmax(), 'User'] if not df['Description_Quality_Score'].isna().all() else 'N/A'
	]
	})
	summary.to_excel(writer, index=False, sheet_name='Summary')

	binary_data = output.getvalue()
	b64 = base64.b64encode(binary_data).decode()
	href = f'<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" download="{filename}" class="download-button">Download Excel file</a>'
	return href

	# Custom CSS
	def local_css():
	st.markdown("""
	<style>
	.download-button {
	display: inline-block;
	padding: 0.5em 1em;
	color: #ffffff;
	background-color: #4CAF50;
	border-radius: 4px;
	text-decoration: none;
	font-weight: bold;
	transition: background-color 0.3s;
	}
	.download-button:hover {
	background-color: #45a049;
	}
	.stMetric {
	background-color: #f0f2f6;
	padding: 15px;
	border-radius: 5px;
	}
	.highlight-box {
	background-color: #f8f9fa;
	border-left: 5px solid #4CAF50;
	padding: 15px;
	margin: 10px 0;
	border-radius: 0 5px 5px 0;
	}
	.quality-high {
	color: #4CAF50;
	font-weight: bold;
	}
	.quality-medium {
	color: #FFC107;
	font-weight: bold;
	}
	.quality-low {
	color: #F44336;
	font-weight: bold;
	}
	/* Team category styling */
	table {
	width: 100%;
	border-collapse: collapse;
	margin-bottom: 20px;
	}
	th {
	background-color: #f2f2f2;
	padding: 8px;
	text-align: left;
	border: 1px solid #ddd;
	}
	td {
	padding: 8px;
	border: 1px solid #ddd;
	}
	tr:nth-child(even) {
	background-color: #f9f9f9;
	}
	tr:hover {
	background-color: #f0f0f0;
	}
	</style>
	""", unsafe_allow_html=True)

	# Main app
	st.set_page_config(page_title="GenAI Worklog Processor", layout="wide")
	local_css()

	st.title("GenAI Worklog Data Processor")
	st.markdown("""
	This app processes worklog data to extract insights about GenAI usage:
	1. Creates a list of unique users
	2. Concatenates GenAI use case descriptions for each user with proper formatting
	3. Captures GenAI efficiency values and other metrics
	4. Identifies projects with highest GenAI usage
	5. Analyzes most common AI tools and use cases
	6. Identifies prompt champions based on quality metrics
	""")

	# File uploader
	uploaded_file = st.file_uploader("Upload your worklog CSV or Excel file", type=["csv", "xlsx", "xls"])

	if uploaded_file is not None:
	try:
	# Read the file
	if uploaded_file.name.endswith('.csv'):
	df = pd.read_csv(uploaded_file)
	else:
	df = pd.read_excel(uploaded_file)

	# Display the original data
	st.subheader("Original Data Preview")
	st.dataframe(df.head())

	# Check if required columns exist
	required_columns = ['User', 'GenAI use case description', 'GenAI Efficiency (Log time in hours)']
	for col in ['Required', 'Logged', 'Date', 'Project']:
	if col in df.columns:
	required_columns.append(col)

	missing_columns = [col for col in required_columns[:3] if col not in df.columns]

	if missing_columns:
	st.warning(f"The following required columns are missing: {', '.join(missing_columns)}")
	st.markdown("""
	For full functionality, your file should contain these columns:
	- User
	- GenAI use case description
	- GenAI Efficiency (Log time in hours)
	- Required
	- Logged
	- Date
	- Project (optional but recommended for project analysis)
	""")

	# Stop if essential columns are missing
	if any(col in missing_columns for col in ['User', 'GenAI use case description']):
	st.error("Cannot continue without essential columns.")
	st.stop()

	# Continue with available columns
	st.info("Continuing with available columns...")

	# Process the data
	if st.button("Process Data"):
	with st.spinner("Processing data..."):
	result_df = process_genai_data(df)

	# Get project analysis if available
	project_analysis = None
	if 'Project' in df.columns:
	project_analysis = analyze_projects_by_genai_hours(df)

	# Get AI tools usage
	ai_tool_counts = extract_ai_tools_from_descriptions(df)

	# Get use case analysis
	use_case_counts = extract_use_cases_from_descriptions(df)

	# Display the result
	st.subheader("Processed Data")
	st.dataframe(result_df)

	# Download link
	timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
	st.subheader("Download Processed Data")
	st.markdown(get_download_link(result_df, f"genai_processed_data_{timestamp}.xlsx"), unsafe_allow_html=True)

	# NEW INSIGHTS SECTION
	st.header("🔍 Advanced GenAI Insights")

	# 1. Project with highest GenAI efficacy log hours
	if project_analysis is not None and not project_analysis.empty:
	st.subheader("🏆 Project with Highest GenAI Efficacy Hours")

	top_project = project_analysis.iloc[0]

	col1, col2 = st.columns(2)
	with col1:
	st.markdown(f"""
	<div class="highlight-box">
	<h3>{top_project['Project']}</h3>
	<p>Total GenAI Hours: <b>{round(top_project['Total_GenAI_Hours'], 2)}</b></p>
	<p>Number of Users: <b>{top_project['User_Count']}</b></p>
	<p>Average Hours per User: <b>{round(top_project['Total_GenAI_Hours'] / top_project['User_Count'], 2)}</b></p>
	</div>
	""", unsafe_allow_html=True)

	with col2:
	# Bar chart of top 5 projects
	top_projects = project_analysis.head(5)
	fig = px.bar(
	top_projects,
	x='Project',
	y='Total_GenAI_Hours',
	title='Top 5 Projects by GenAI Hours',
	color='Total_GenAI_Hours',
	color_continuous_scale='Viridis'
	)
	fig.update_layout(xaxis_title="Project", yaxis_title="Total GenAI Hours")
	st.plotly_chart(fig, use_container_width=True)

	# Full project analysis
	st.markdown("### All Projects Analysis")
	st.dataframe(project_analysis)

	# 2. Most prominent use cases of AI tools
	st.subheader("📊 Most Prominent AI Use Cases")

	col1, col2 = st.columns(2)

	with col1:
	# AI Tools Analysis
	st.markdown("### Top AI Tools Mentioned")

	if ai_tool_counts:
	# Convert to dataframe for visualization
	ai_tools_df = pd.DataFrame({
	'Tool': list(ai_tool_counts.keys()),
	'Mentions': list(ai_tool_counts.values())
	}).sort_values('Mentions', ascending=False)

	fig = px.bar(
	ai_tools_df.head(10),
	x='Tool',
	y='Mentions',
	title='Top 10 AI Tools Mentioned',
	color='Mentions',
	color_continuous_scale='Blues'
	)
	fig.update_layout(xaxis_title="AI Tool", yaxis_title="Number of Mentions")
	st.plotly_chart(fig, use_container_width=True)

	# Top tool insight
	if not ai_tools_df.empty:
	top_tool = ai_tools_df.iloc[0]
	st.markdown(f"""
	<div class="highlight-box">
	<p>Most used AI tool: <b>{top_tool['Tool']}</b> with {top_tool['Mentions']} mentions</p>
	</div>
	""", unsafe_allow_html=True)
	else:
	st.info("No specific AI tools were identified in the descriptions.")

	with col2:
	# Use Cases Analysis
	st.markdown("### Top Use Cases")

	if use_case_counts:
	# Convert to dataframe for visualization
	use_cases_df = pd.DataFrame({
	'Use Case': list(use_case_counts.keys()),
	'Count': list(use_case_counts.values())
	}).sort_values('Count', ascending=False)

	fig = px.pie(
	use_cases_df.head(5),
	names='Use Case',
	values='Count',
	title='Top 5 GenAI Use Cases',
	hole=0.4
	)
	st.plotly_chart(fig, use_container_width=True)

	# Top use case insight
	if not use_cases_df.empty:
	top_use_case = use_cases_df.iloc[0]
	st.markdown(f"""
	<div class="highlight-box">
	<p>Most common use case: <b>{top_use_case['Use Case']}</b> mentioned in {top_use_case['Count']} descriptions</p>
	</div>
	""", unsafe_allow_html=True)
	else:
	st.info("No specific use cases were identified in the descriptions.")

	# 3. Champion of the prompt with quality GenAI Description
	st.subheader("👑 GenAI Prompt Champions")

	if 'Description_Quality_Score' in result_df.columns:
	# Get top 3 users by quality score
	top_quality_users = result_df.sort_values('Description_Quality_Score', ascending=False).head(3)

	# Display top champion
	if not top_quality_users.empty:
	champion = top_quality_users.iloc[0]
	st.markdown(f"""
	<div class="highlight-box">
	<h3>🏆 Prompt Champion: {champion['User']}</h3>
	<p>Quality Score: <span class="quality-high">{champion['Description_Quality_Score']}/100</span></p>
	<p>GenAI Efficiency: {round(champion['GenAI_Efficiency'], 2)} hours</p>
	<p><b>GenAI Descriptions:</b></p>
	<pre>{champion['GenAI_Descriptions']}</pre>
	</div>
	""", unsafe_allow_html=True)

	# Quality score distribution
	st.markdown("### Quality Score Distribution")
	fig = px.histogram(
	result_df,
	x='Description_Quality_Score',
	nbins=10,
	title='Distribution of GenAI Description Quality Scores',
	color_discrete_sequence=['#4CAF50']
	)
	fig.update_layout(xaxis_title="Quality Score", yaxis_title="Number of Users")
	st.plotly_chart(fig, use_container_width=True)

	# Quality score by user with team categorization
	st.markdown("### Quality Scores by User & Team Categories")

	# Create a more comprehensive dataframe for team identification
	team_df = result_df[['User', 'Description_Quality_Score', 'GenAI_Efficiency', 'Total_Logged_Hours']].copy()

	# Ensure we have numeric values for calculations
	team_df['Description_Quality_Score'] = pd.to_numeric(team_df['Description_Quality_Score'], errors='coerce').fillna(0)
	team_df['GenAI_Efficiency'] = pd.to_numeric(team_df['GenAI_Efficiency'], errors='coerce').fillna(0)
	team_df['Total_Logged_Hours'] = pd.to_numeric(team_df['Total_Logged_Hours'], errors='coerce').fillna(0)

	# Calculate a combined score (weighted average of quality and hours)
	# Weight: 60% quality, 40% efficiency hours
	max_quality = team_df['Description_Quality_Score'].max() if not team_df.empty and team_df['Description_Quality_Score'].max() > 0 else 100
	max_hours = team_df['GenAI_Efficiency'].max() if not team_df.empty and team_df['GenAI_Efficiency'].max() > 0 else 1

	team_df['Quality_Normalized'] = team_df['Description_Quality_Score'] / max_quality * 100
	team_df['Hours_Normalized'] = team_df['GenAI_Efficiency'] / max_hours * 100
	team_df['Combined_Score'] = (team_df['Quality_Normalized'] * 0.6) + (team_df['Hours_Normalized'] * 0.4)

	# Assign team categories based on combined score and individual metrics
	def assign_team_category(row):
	quality = row['Description_Quality_Score']
	hours = row['GenAI_Efficiency']
	combined = row['Combined_Score']

	if quality >= 80 and hours >= (max_hours * 0.7):
	return "🔥 GenAI Champion", "Masters of both quality and quantity"
	elif quality >= 70:
	return "✨ Prompt Expert", "High-quality prompt crafters"
	elif hours >= (max_hours * 0.8):
	return "⚡ Power User", "High volume GenAI users"
	elif combined >= 60:
	return "🌟 Balanced Performer", "Good balance of quality and usage"
	elif quality >= 50:
	return "📝 Quality Focused", "Focuses on quality over quantity"
	elif hours > 0:
	return "🔍 Exploring User", "Beginning GenAI journey"
	else:
	return "❓ Inactive", "Little to no GenAI usage"

	# Apply the team categorization
	team_df[['Team_Category', 'Category_Description']] = team_df.apply(assign_team_category, axis=1, result_type='expand')

	# Sort by combined score
	team_df = team_df.sort_values('Combined_Score', ascending=False)

	# Add color coding based on quality score
	def quality_color(score):
	if score >= 70:
	return 'quality-high'
	elif score >= 40:
	return 'quality-medium'
	else:
	return 'quality-low'

	team_df['Score_Display'] = team_df['Description_Quality_Score'].apply(
	lambda x: f'<span class="{quality_color(x)}">{x}</span>'
	)

	# Create a display dataframe with the relevant columns
	display_df = team_df[['User', 'Score_Display', 'GenAI_Efficiency', 'Team_Category', 'Category_Description']]
	display_df.columns = ['User', 'Quality Score', 'GenAI Hours', 'Team Category', 'Description']

	# Display as a styled dataframe
	st.write(display_df.to_html(escape=False), unsafe_allow_html=True)

	# Team distribution pie chart
	st.markdown("### Team Category Distribution")
	team_counts = team_df['Team_Category'].value_counts().reset_index()
	team_counts.columns = ['Team_Category', 'Count']

	fig = px.pie(
	team_counts,
	names='Team_Category',
	values='Count',
	title='Distribution of Team Categories',
	color_discrete_sequence=px.colors.qualitative.Bold
	)
	st.plotly_chart(fig, use_container_width=True)

	# Quality factors explanation
	st.markdown("""
	### How Quality Scores Are Calculated

	The quality score is based on these factors:

	1. Length & Detail (40%): Longer, more detailed descriptions score higher
	2. Specificity (30%): Mentions of specific AI tools, metrics, and technical details
	3. Uniqueness (30%): Variety of terms and concepts used

	Scores range from 0-100, with higher scores indicating more comprehensive and useful GenAI descriptions.
	""")

	# Data visualization section (original visualizations)
	st.header("📈 Data Visualization")

	# Tab layout for visualizations
	tab1, tab2, tab3, tab4 = st.tabs(["GenAI Efficiency", "Utilization", "User Analysis", "Tools & Use Cases"])

	with tab1:
	# GenAI Efficiency by User
	if 'GenAI_Efficiency' in result_df.columns:
	st.subheader("GenAI Efficiency by User")
	sorted_df = result_df.sort_values('GenAI_Efficiency', ascending=False)
	fig = px.bar(
	sorted_df,
	x='User',
	y='GenAI_Efficiency',
	title='GenAI Efficiency Hours by User',
	color='GenAI_Efficiency',
	color_continuous_scale='Viridis'
	)
	fig.update_layout(xaxis_title="User", yaxis_title="Hours")
	st.plotly_chart(fig, use_container_width=True)

	with tab2:
	# Utilization Percentage
	if 'Utilization_Percentage' in result_df.columns:
	st.subheader("Utilization Percentage by User")
	sorted_df = result_df.sort_values('Utilization_Percentage', ascending=False)
	fig = px.bar(
	sorted_df,
	x='User',
	y='Utilization_Percentage',
	title='Utilization Percentage by User',
	color='Utilization_Percentage',
	color_continuous_scale='RdYlGn'
	)
	fig.update_layout(xaxis_title="User", yaxis_title="Utilization %")
	st.plotly_chart(fig, use_container_width=True)

	# Required vs Logged Hours
	if 'Total_Required_Hours' in result_df.columns and 'Total_Logged_Hours' in result_df.columns:
	st.subheader("Required vs Logged Hours by User")
	fig = px.bar(
	result_df,
	x='User',
	y=['Total_Required_Hours', 'Total_Logged_Hours'],
	title='Required vs Logged Hours by User',
	barmode='group'
	)
	fig.update_layout(xaxis_title="User", yaxis_title="Hours")
	st.plotly_chart(fig, use_container_width=True)

	with tab3:
	# User with GenAI descriptions
	st.subheader("Users with GenAI Use Cases")
	has_description = result_df['GenAI_Descriptions'] != ""
	fig = px.pie(
	names=['Has GenAI Use Cases', 'No GenAI Use Cases'],
	values=[result_df[has_description].shape[0], result_df[~has_description].shape[0]],
	title='Users with GenAI Use Cases'
	)
	st.plotly_chart(fig, use_container_width=True)

	with tab4:
	# Combined tools and use cases view
	st.subheader("AI Tools and Use Cases")

	if ai_tool_counts and use_case_counts:
	col1, col2 = st.columns(2)

	with col1:
	# Word cloud of AI tools (text representation)
	st.markdown("### AI Tools Word Cloud")
	ai_tools_text = " ".join([f"{tool} " * count for tool, count in ai_tool_counts.items()])
	st.text_area("", ai_tools_text, height=200)

	with col2:
	# Use cases bar chart
	use_cases_df = pd.DataFrame({
	'Use Case': list(use_case_counts.keys()),
	'Count': list(use_case_counts.values())
	}).sort_values('Count', ascending=False)

	fig = px.bar(
	use_cases_df,
	x='Use Case',
	y='Count',
	title='All GenAI Use Cases',
	color='Count',
	color_continuous_scale='YlOrRd'
	)
	fig.update_layout(xaxis_title="Use Case", yaxis_title="Count")
	st.plotly_chart(fig, use_container_width=True)

	# Summary statistics
	st.subheader("Summary Statistics")

	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric("Total Users", len(result_df))

	with col2:
	# Average efficiency
	avg_efficiency = result_df['GenAI_Efficiency'].mean()
	if not pd.isna(avg_efficiency):
	st.metric("Avg GenAI Efficiency (hours)", round(avg_efficiency, 2))

	with col3:
	# Average utilization
	if 'Utilization_Percentage' in result_df.columns:
	avg_util = result_df['Utilization_Percentage'].mean()
	st.metric("Avg Utilization %", f"{round(avg_util, 2)}%")

	# New row of metrics
	col1, col2, col3 = st.columns(3)

	with col1:
	if ai_tool_counts:
	top_tool = max(ai_tool_counts.items(), key=lambda x: x[1])[0]
	st.metric("Most Used AI Tool", top_tool)

	with col2:
	if use_case_counts:
	top_use_case = max(use_case_counts.items(), key=lambda x: x[1])[0]
	st.metric("Top Use Case", top_use_case)

	with col3:
	if 'Description_Quality_Score' in result_df.columns and not result_df.empty:
	avg_quality = result_df['Description_Quality_Score'].mean()
	st.metric("Avg Description Quality", f"{round(avg_quality, 1)}/100")

	except Exception as e:
	st.error(f"An error occurred: {str(e)}")
	st.markdown("Please check your file format and try again.")

	# Footer
	st.markdown("---")
	st.markdown("Enhanced GenAI Worklog Processor • Built with Streamlit and Pandas")