Pratik333's picture
Update app.py (#1)
3075c3f verified
import streamlit as st
import pandas as pd
import numpy as np
import io
import base64
import plotly.express as px
import datetime
import re
from collections import Counter
def process_genai_data(df):
# Create a new dataframe with unique users
unique_users = df['User'].drop_duplicates().reset_index(drop=True)
result_df = pd.DataFrame(unique_users, columns=['User'])
# For each unique user, find all their GenAI use case descriptions and join them
def get_descriptions(user):
# Filter descriptions for the current user and drop empty values
descriptions = df[df['User'] == user]['GenAI use case description'].dropna().unique()
# Join the descriptions with a newline and add a dash at the beginning of each line
if len(descriptions) > 0:
# Format each description to start with "- "
formatted_descriptions = ["- " + desc for desc in descriptions]
return "\n".join(formatted_descriptions)
return ""
# Apply the function to get concatenated descriptions for each user
result_df['GenAI_Descriptions'] = result_df['User'].apply(get_descriptions)
# Get the GenAI Efficiency value for the first occurrence of each user
def get_first_efficiency(user):
efficiency_values = df[df['User'] == user]['GenAI Efficiency (Log time in hours)']
if len(efficiency_values) > 0:
return efficiency_values.iloc[0]
return np.nan
result_df['GenAI_Efficiency'] = result_df['User'].apply(get_first_efficiency)
# Calculate total logged hours for each user
def get_total_logged(user):
return df[df['User'] == user]['Logged'].sum()
result_df['Total_Logged_Hours'] = result_df['User'].apply(get_total_logged)
# Calculate required hours for each user
def get_total_required(user):
return df[df['User'] == user]['Required'].sum()
result_df['Total_Required_Hours'] = result_df['User'].apply(get_total_required)
# Calculate utilization percentage
result_df['Utilization_Percentage'] = (result_df['Total_Logged_Hours'] / result_df['Total_Required_Hours'] * 100).round(2)
# Get date range for each user
def get_date_range(user):
user_logs = df[df['User'] == user]
if 'Date' in user_logs.columns and not user_logs['Date'].empty:
dates = user_logs['Date'].dropna()
if len(dates) > 0:
return f"{min(dates)} to {max(dates)}"
return "N/A"
result_df['Date_Range'] = result_df['User'].apply(get_date_range)
# Add description quality score
result_df['Description_Quality_Score'] = calculate_description_quality(result_df)
# Get project data if available
if 'Project' in df.columns:
# Get a list of projects for each user
def get_projects(user):
projects = df[df['User'] == user]['Project'].dropna().unique()
return list(projects)
result_df['Projects'] = result_df['User'].apply(get_projects)
return result_df
def analyze_projects_by_genai_hours(df):
"""
Analyzes which projects have the highest GenAI efficiency log hours
Returns a dataframe with projects and their total GenAI hours
Fix: Ensure we don't double-count hours by first getting unique Project-User combinations
"""
if 'Project' not in df.columns:
return None
# First, get unique Project-User combinations with their GenAI hours
# This avoids double-counting hours for the same user on the same project
user_project_hours = df.groupby(['Project', 'User'])['GenAI Efficiency (Log time in hours)'].first().reset_index()
# Now sum up the hours by project
project_hours = user_project_hours.groupby('Project')['GenAI Efficiency (Log time in hours)'].sum().reset_index()
project_hours = project_hours.sort_values('GenAI Efficiency (Log time in hours)', ascending=False)
project_hours.columns = ['Project', 'Total_GenAI_Hours']
# Add user count per project
project_users = df.groupby('Project')['User'].nunique().reset_index()
project_users.columns = ['Project', 'User_Count']
# Merge the dataframes
project_analysis = pd.merge(project_hours, project_users, on='Project')
return project_analysis
def extract_ai_tools_from_descriptions(df):
"""
Extracts and counts AI tools mentioned in GenAI descriptions
Returns a Counter object with tools and their frequencies
"""
# Common AI tools and platforms to look for
ai_tools = [
'chatgpt', 'gpt-4', 'gpt-3', 'gpt', 'openai',
'claude', 'anthropic',
'gemini', 'bard', 'google ai',
'copilot', 'github copilot', 'microsoft copilot',
'dall-e', 'midjourney', 'stable diffusion',
'hugging face', 'transformers',
'bert', 'llama', 'mistral',
'tensorflow', 'pytorch', 'ml',
'jupyter', 'colab',
'langchain', 'llm', 'rag'
]
# Dictionary to store normalized tool names
tool_mapping = {
'gpt': 'ChatGPT/GPT',
'gpt-3': 'ChatGPT/GPT',
'gpt-4': 'ChatGPT/GPT',
'chatgpt': 'ChatGPT/GPT',
'openai': 'OpenAI',
'claude': 'Claude',
'anthropic': 'Claude',
'gemini': 'Google AI',
'bard': 'Google AI',
'google ai': 'Google AI',
'copilot': 'GitHub Copilot',
'github copilot': 'GitHub Copilot',
'microsoft copilot': 'Microsoft Copilot',
'dall-e': 'DALL-E',
'midjourney': 'Midjourney',
'stable diffusion': 'Stable Diffusion',
'hugging face': 'Hugging Face',
'transformers': 'Transformers',
'bert': 'BERT',
'llama': 'LLaMA',
'mistral': 'Mistral AI',
'tensorflow': 'TensorFlow',
'pytorch': 'PyTorch',
'ml': 'Machine Learning',
'jupyter': 'Jupyter',
'colab': 'Google Colab',
'langchain': 'LangChain',
'llm': 'Large Language Models',
'rag': 'Retrieval Augmented Generation'
}
# Extract all GenAI descriptions
all_descriptions = " ".join(df['GenAI use case description'].dropna().astype(str).tolist()).lower()
# Count occurrences of each tool
tool_counts = Counter()
for tool in ai_tools:
count = len(re.findall(r'\b' + re.escape(tool) + r'\b', all_descriptions))
if count > 0:
normalized_tool = tool_mapping.get(tool, tool)
tool_counts[normalized_tool] += count
return tool_counts
def extract_use_cases_from_descriptions(df):
"""
Analyzes GenAI descriptions to identify common use cases
Returns a Counter object with use cases and their frequencies
"""
# Common use case categories to look for
use_case_keywords = {
'Code Generation': ['code', 'coding', 'programming', 'script', 'develop', 'algorithm'],
'Content Creation': ['content', 'write', 'writing', 'draft', 'article', 'blog'],
'Data Analysis': ['data', 'analysis', 'analyze', 'analytics', 'statistics', 'insights'],
'Documentation': ['document', 'documentation', 'manual', 'guide', 'readme'],
'Research': ['research', 'study', 'investigate', 'explore', 'literature'],
'Summarization': ['summary', 'summarize', 'summarization', 'extract key points'],
'Translation': ['translate', 'translation', 'language', 'localize'],
'Image Generation': ['image', 'picture', 'graphic', 'design', 'draw', 'art'],
'Chatbot': ['chatbot', 'chat', 'conversation', 'dialogue', 'assistant'],
'Automation': ['automate', 'automation', 'workflow', 'process', 'routine'],
'Training': ['train', 'training', 'learn', 'learning', 'education'],
'Testing': ['test', 'testing', 'QA', 'quality assurance', 'debug']
}
# Extract all GenAI descriptions
descriptions = df['GenAI use case description'].dropna().astype(str).tolist()
# Count occurrences of each use case
use_case_counts = Counter()
for description in descriptions:
description_lower = description.lower()
for use_case, keywords in use_case_keywords.items():
for keyword in keywords:
if re.search(r'\b' + re.escape(keyword) + r'\b', description_lower):
use_case_counts[use_case] += 1
break # Count each use case only once per description
return use_case_counts
def calculate_description_quality(df):
"""
Calculates a quality score for each user's GenAI description
Score is based on length, specificity, and uniqueness
Returns a Series with quality scores
"""
# Get descriptions column
descriptions = df['GenAI_Descriptions']
# Initialize scores
scores = pd.Series(0, index=df.index)
# Factor 1: Length score (longer descriptions get more points)
char_counts = descriptions.str.len()
max_char_count = char_counts.max() if not char_counts.empty else 1
length_score = (char_counts / max_char_count) * 40 # 40% weight to length
# Factor 2: Specificity score (mentions of specific tools or numbers)
def specificity_score(desc):
if not isinstance(desc, str) or desc.strip() == "":
return 0
score = 0
# Check for specific AI tools
ai_tools = ['gpt', 'chatgpt', 'claude', 'gemini', 'copilot', 'dall-e', 'midjourney']
for tool in ai_tools:
if re.search(r'\b' + re.escape(tool) + r'\b', desc.lower()):
score += 5
# Check for numbers (could indicate metrics or specific examples)
if re.search(r'\d+', desc):
score += 5
# Check for detailed explanations
if len(desc.split()) > 50: # Long descriptions
score += 10
return min(score, 30) # Cap at 30% weight
specificity_scores = descriptions.apply(specificity_score)
# Factor 3: Uniqueness score
def uniqueness_score(desc):
if not isinstance(desc, str) or desc.strip() == "":
return 0
# Simple word tokenization by splitting on whitespace
words = desc.lower().split()
# Remove common stop words and short words
common_stopwords = {"a", "an", "the", "and", "or", "but", "is", "are", "was", "were",
"in", "on", "at", "to", "for", "with", "by", "about", "of", "this",
"that", "i", "we", "you", "he", "she", "they", "it", "have", "has"}
# Filter out stopwords and short words
filtered_words = [word for word in words if word not in common_stopwords and len(word) > 2]
# Unique words ratio
if filtered_words:
uniqueness = len(set(filtered_words)) / len(filtered_words)
return uniqueness * 30 # 30% weight to uniqueness
return 0
uniqueness_scores = descriptions.apply(uniqueness_score)
# Combine scores
total_scores = length_score + specificity_scores + uniqueness_scores
# Normalize to 0-100 scale
max_score = total_scores.max() if not total_scores.empty else 1
normalized_scores = (total_scores / max_score * 100).round(1)
return normalized_scores
def get_download_link(df, filename):
"""Generate a download link for the dataframe as an Excel file"""
output = io.BytesIO()
with pd.ExcelWriter(output, engine='openpyxl') as writer:
df.to_excel(writer, index=False, sheet_name='Processed Data')
# Add a summary sheet
summary = pd.DataFrame({
'Metric': ['Total Users', 'Average GenAI Efficiency (hours)', 'Average Utilization (%)',
'Top GenAI User', 'Top Quality Score'],
'Value': [
len(df),
round(df['GenAI_Efficiency'].mean(), 2),
round(df['Utilization_Percentage'].mean(), 2),
df.loc[df['GenAI_Efficiency'].idxmax(), 'User'] if not df['GenAI_Efficiency'].isna().all() else 'N/A',
df.loc[df['Description_Quality_Score'].idxmax(), 'User'] if not df['Description_Quality_Score'].isna().all() else 'N/A'
]
})
summary.to_excel(writer, index=False, sheet_name='Summary')
binary_data = output.getvalue()
b64 = base64.b64encode(binary_data).decode()
href = f'<a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{b64}" download="{filename}" class="download-button">Download Excel file</a>'
return href
# Custom CSS
def local_css():
st.markdown("""
<style>
.download-button {
display: inline-block;
padding: 0.5em 1em;
color: #ffffff;
background-color: #4CAF50;
border-radius: 4px;
text-decoration: none;
font-weight: bold;
transition: background-color 0.3s;
}
.download-button:hover {
background-color: #45a049;
}
.stMetric {
background-color: #f0f2f6;
padding: 15px;
border-radius: 5px;
}
.highlight-box {
background-color: #f8f9fa;
border-left: 5px solid #4CAF50;
padding: 15px;
margin: 10px 0;
border-radius: 0 5px 5px 0;
}
.quality-high {
color: #4CAF50;
font-weight: bold;
}
.quality-medium {
color: #FFC107;
font-weight: bold;
}
.quality-low {
color: #F44336;
font-weight: bold;
}
/* Team category styling */
table {
width: 100%;
border-collapse: collapse;
margin-bottom: 20px;
}
th {
background-color: #f2f2f2;
padding: 8px;
text-align: left;
border: 1px solid #ddd;
}
td {
padding: 8px;
border: 1px solid #ddd;
}
tr:nth-child(even) {
background-color: #f9f9f9;
}
tr:hover {
background-color: #f0f0f0;
}
</style>
""", unsafe_allow_html=True)
# Main app
st.set_page_config(page_title="GenAI Worklog Processor", layout="wide")
local_css()
st.title("GenAI Worklog Data Processor")
st.markdown("""
This app processes worklog data to extract insights about GenAI usage:
1. Creates a list of unique users
2. Concatenates GenAI use case descriptions for each user with proper formatting
3. Captures GenAI efficiency values and other metrics
4. Identifies projects with highest GenAI usage
5. Analyzes most common AI tools and use cases
6. Identifies prompt champions based on quality metrics
""")
# File uploader
uploaded_file = st.file_uploader("Upload your worklog CSV or Excel file", type=["csv", "xlsx", "xls"])
if uploaded_file is not None:
try:
# Read the file
if uploaded_file.name.endswith('.csv'):
df = pd.read_csv(uploaded_file)
else:
df = pd.read_excel(uploaded_file)
# Display the original data
st.subheader("Original Data Preview")
st.dataframe(df.head())
# Check if required columns exist
required_columns = ['User', 'GenAI use case description', 'GenAI Efficiency (Log time in hours)']
for col in ['Required', 'Logged', 'Date', 'Project']:
if col in df.columns:
required_columns.append(col)
missing_columns = [col for col in required_columns[:3] if col not in df.columns]
if missing_columns:
st.warning(f"The following required columns are missing: {', '.join(missing_columns)}")
st.markdown("""
For full functionality, your file should contain these columns:
- User
- GenAI use case description
- GenAI Efficiency (Log time in hours)
- Required
- Logged
- Date
- Project (optional but recommended for project analysis)
""")
# Stop if essential columns are missing
if any(col in missing_columns for col in ['User', 'GenAI use case description']):
st.error("Cannot continue without essential columns.")
st.stop()
# Continue with available columns
st.info("Continuing with available columns...")
# Process the data
if st.button("Process Data"):
with st.spinner("Processing data..."):
result_df = process_genai_data(df)
# Get project analysis if available
project_analysis = None
if 'Project' in df.columns:
project_analysis = analyze_projects_by_genai_hours(df)
# Get AI tools usage
ai_tool_counts = extract_ai_tools_from_descriptions(df)
# Get use case analysis
use_case_counts = extract_use_cases_from_descriptions(df)
# Display the result
st.subheader("Processed Data")
st.dataframe(result_df)
# Download link
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
st.subheader("Download Processed Data")
st.markdown(get_download_link(result_df, f"genai_processed_data_{timestamp}.xlsx"), unsafe_allow_html=True)
# NEW INSIGHTS SECTION
st.header("🔍 Advanced GenAI Insights")
# 1. Project with highest GenAI efficacy log hours
if project_analysis is not None and not project_analysis.empty:
st.subheader("🏆 Project with Highest GenAI Efficacy Hours")
top_project = project_analysis.iloc[0]
col1, col2 = st.columns(2)
with col1:
st.markdown(f"""
<div class="highlight-box">
<h3>{top_project['Project']}</h3>
<p>Total GenAI Hours: <b>{round(top_project['Total_GenAI_Hours'], 2)}</b></p>
<p>Number of Users: <b>{top_project['User_Count']}</b></p>
<p>Average Hours per User: <b>{round(top_project['Total_GenAI_Hours'] / top_project['User_Count'], 2)}</b></p>
</div>
""", unsafe_allow_html=True)
with col2:
# Bar chart of top 5 projects
top_projects = project_analysis.head(5)
fig = px.bar(
top_projects,
x='Project',
y='Total_GenAI_Hours',
title='Top 5 Projects by GenAI Hours',
color='Total_GenAI_Hours',
color_continuous_scale='Viridis'
)
fig.update_layout(xaxis_title="Project", yaxis_title="Total GenAI Hours")
st.plotly_chart(fig, use_container_width=True)
# Full project analysis
st.markdown("### All Projects Analysis")
st.dataframe(project_analysis)
# 2. Most prominent use cases of AI tools
st.subheader("📊 Most Prominent AI Use Cases")
col1, col2 = st.columns(2)
with col1:
# AI Tools Analysis
st.markdown("### Top AI Tools Mentioned")
if ai_tool_counts:
# Convert to dataframe for visualization
ai_tools_df = pd.DataFrame({
'Tool': list(ai_tool_counts.keys()),
'Mentions': list(ai_tool_counts.values())
}).sort_values('Mentions', ascending=False)
fig = px.bar(
ai_tools_df.head(10),
x='Tool',
y='Mentions',
title='Top 10 AI Tools Mentioned',
color='Mentions',
color_continuous_scale='Blues'
)
fig.update_layout(xaxis_title="AI Tool", yaxis_title="Number of Mentions")
st.plotly_chart(fig, use_container_width=True)
# Top tool insight
if not ai_tools_df.empty:
top_tool = ai_tools_df.iloc[0]
st.markdown(f"""
<div class="highlight-box">
<p>Most used AI tool: <b>{top_tool['Tool']}</b> with {top_tool['Mentions']} mentions</p>
</div>
""", unsafe_allow_html=True)
else:
st.info("No specific AI tools were identified in the descriptions.")
with col2:
# Use Cases Analysis
st.markdown("### Top Use Cases")
if use_case_counts:
# Convert to dataframe for visualization
use_cases_df = pd.DataFrame({
'Use Case': list(use_case_counts.keys()),
'Count': list(use_case_counts.values())
}).sort_values('Count', ascending=False)
fig = px.pie(
use_cases_df.head(5),
names='Use Case',
values='Count',
title='Top 5 GenAI Use Cases',
hole=0.4
)
st.plotly_chart(fig, use_container_width=True)
# Top use case insight
if not use_cases_df.empty:
top_use_case = use_cases_df.iloc[0]
st.markdown(f"""
<div class="highlight-box">
<p>Most common use case: <b>{top_use_case['Use Case']}</b> mentioned in {top_use_case['Count']} descriptions</p>
</div>
""", unsafe_allow_html=True)
else:
st.info("No specific use cases were identified in the descriptions.")
# 3. Champion of the prompt with quality GenAI Description
st.subheader("👑 GenAI Prompt Champions")
if 'Description_Quality_Score' in result_df.columns:
# Get top 3 users by quality score
top_quality_users = result_df.sort_values('Description_Quality_Score', ascending=False).head(3)
# Display top champion
if not top_quality_users.empty:
champion = top_quality_users.iloc[0]
st.markdown(f"""
<div class="highlight-box">
<h3>🏆 Prompt Champion: {champion['User']}</h3>
<p>Quality Score: <span class="quality-high">{champion['Description_Quality_Score']}/100</span></p>
<p>GenAI Efficiency: {round(champion['GenAI_Efficiency'], 2)} hours</p>
<p><b>GenAI Descriptions:</b></p>
<pre>{champion['GenAI_Descriptions']}</pre>
</div>
""", unsafe_allow_html=True)
# Quality score distribution
st.markdown("### Quality Score Distribution")
fig = px.histogram(
result_df,
x='Description_Quality_Score',
nbins=10,
title='Distribution of GenAI Description Quality Scores',
color_discrete_sequence=['#4CAF50']
)
fig.update_layout(xaxis_title="Quality Score", yaxis_title="Number of Users")
st.plotly_chart(fig, use_container_width=True)
# Quality score by user with team categorization
st.markdown("### Quality Scores by User & Team Categories")
# Create a more comprehensive dataframe for team identification
team_df = result_df[['User', 'Description_Quality_Score', 'GenAI_Efficiency', 'Total_Logged_Hours']].copy()
# Ensure we have numeric values for calculations
team_df['Description_Quality_Score'] = pd.to_numeric(team_df['Description_Quality_Score'], errors='coerce').fillna(0)
team_df['GenAI_Efficiency'] = pd.to_numeric(team_df['GenAI_Efficiency'], errors='coerce').fillna(0)
team_df['Total_Logged_Hours'] = pd.to_numeric(team_df['Total_Logged_Hours'], errors='coerce').fillna(0)
# Calculate a combined score (weighted average of quality and hours)
# Weight: 60% quality, 40% efficiency hours
max_quality = team_df['Description_Quality_Score'].max() if not team_df.empty and team_df['Description_Quality_Score'].max() > 0 else 100
max_hours = team_df['GenAI_Efficiency'].max() if not team_df.empty and team_df['GenAI_Efficiency'].max() > 0 else 1
team_df['Quality_Normalized'] = team_df['Description_Quality_Score'] / max_quality * 100
team_df['Hours_Normalized'] = team_df['GenAI_Efficiency'] / max_hours * 100
team_df['Combined_Score'] = (team_df['Quality_Normalized'] * 0.6) + (team_df['Hours_Normalized'] * 0.4)
# Assign team categories based on combined score and individual metrics
def assign_team_category(row):
quality = row['Description_Quality_Score']
hours = row['GenAI_Efficiency']
combined = row['Combined_Score']
if quality >= 80 and hours >= (max_hours * 0.7):
return "🔥 GenAI Champion", "Masters of both quality and quantity"
elif quality >= 70:
return "✨ Prompt Expert", "High-quality prompt crafters"
elif hours >= (max_hours * 0.8):
return "⚡ Power User", "High volume GenAI users"
elif combined >= 60:
return "🌟 Balanced Performer", "Good balance of quality and usage"
elif quality >= 50:
return "📝 Quality Focused", "Focuses on quality over quantity"
elif hours > 0:
return "🔍 Exploring User", "Beginning GenAI journey"
else:
return "❓ Inactive", "Little to no GenAI usage"
# Apply the team categorization
team_df[['Team_Category', 'Category_Description']] = team_df.apply(assign_team_category, axis=1, result_type='expand')
# Sort by combined score
team_df = team_df.sort_values('Combined_Score', ascending=False)
# Add color coding based on quality score
def quality_color(score):
if score >= 70:
return 'quality-high'
elif score >= 40:
return 'quality-medium'
else:
return 'quality-low'
team_df['Score_Display'] = team_df['Description_Quality_Score'].apply(
lambda x: f'<span class="{quality_color(x)}">{x}</span>'
)
# Create a display dataframe with the relevant columns
display_df = team_df[['User', 'Score_Display', 'GenAI_Efficiency', 'Team_Category', 'Category_Description']]
display_df.columns = ['User', 'Quality Score', 'GenAI Hours', 'Team Category', 'Description']
# Display as a styled dataframe
st.write(display_df.to_html(escape=False), unsafe_allow_html=True)
# Team distribution pie chart
st.markdown("### Team Category Distribution")
team_counts = team_df['Team_Category'].value_counts().reset_index()
team_counts.columns = ['Team_Category', 'Count']
fig = px.pie(
team_counts,
names='Team_Category',
values='Count',
title='Distribution of Team Categories',
color_discrete_sequence=px.colors.qualitative.Bold
)
st.plotly_chart(fig, use_container_width=True)
# Quality factors explanation
st.markdown("""
### How Quality Scores Are Calculated
The quality score is based on these factors:
1. **Length & Detail (40%)**: Longer, more detailed descriptions score higher
2. **Specificity (30%)**: Mentions of specific AI tools, metrics, and technical details
3. **Uniqueness (30%)**: Variety of terms and concepts used
Scores range from 0-100, with higher scores indicating more comprehensive and useful GenAI descriptions.
""")
# Data visualization section (original visualizations)
st.header("📈 Data Visualization")
# Tab layout for visualizations
tab1, tab2, tab3, tab4 = st.tabs(["GenAI Efficiency", "Utilization", "User Analysis", "Tools & Use Cases"])
with tab1:
# GenAI Efficiency by User
if 'GenAI_Efficiency' in result_df.columns:
st.subheader("GenAI Efficiency by User")
sorted_df = result_df.sort_values('GenAI_Efficiency', ascending=False)
fig = px.bar(
sorted_df,
x='User',
y='GenAI_Efficiency',
title='GenAI Efficiency Hours by User',
color='GenAI_Efficiency',
color_continuous_scale='Viridis'
)
fig.update_layout(xaxis_title="User", yaxis_title="Hours")
st.plotly_chart(fig, use_container_width=True)
with tab2:
# Utilization Percentage
if 'Utilization_Percentage' in result_df.columns:
st.subheader("Utilization Percentage by User")
sorted_df = result_df.sort_values('Utilization_Percentage', ascending=False)
fig = px.bar(
sorted_df,
x='User',
y='Utilization_Percentage',
title='Utilization Percentage by User',
color='Utilization_Percentage',
color_continuous_scale='RdYlGn'
)
fig.update_layout(xaxis_title="User", yaxis_title="Utilization %")
st.plotly_chart(fig, use_container_width=True)
# Required vs Logged Hours
if 'Total_Required_Hours' in result_df.columns and 'Total_Logged_Hours' in result_df.columns:
st.subheader("Required vs Logged Hours by User")
fig = px.bar(
result_df,
x='User',
y=['Total_Required_Hours', 'Total_Logged_Hours'],
title='Required vs Logged Hours by User',
barmode='group'
)
fig.update_layout(xaxis_title="User", yaxis_title="Hours")
st.plotly_chart(fig, use_container_width=True)
with tab3:
# User with GenAI descriptions
st.subheader("Users with GenAI Use Cases")
has_description = result_df['GenAI_Descriptions'] != ""
fig = px.pie(
names=['Has GenAI Use Cases', 'No GenAI Use Cases'],
values=[result_df[has_description].shape[0], result_df[~has_description].shape[0]],
title='Users with GenAI Use Cases'
)
st.plotly_chart(fig, use_container_width=True)
with tab4:
# Combined tools and use cases view
st.subheader("AI Tools and Use Cases")
if ai_tool_counts and use_case_counts:
col1, col2 = st.columns(2)
with col1:
# Word cloud of AI tools (text representation)
st.markdown("### AI Tools Word Cloud")
ai_tools_text = " ".join([f"{tool} " * count for tool, count in ai_tool_counts.items()])
st.text_area("", ai_tools_text, height=200)
with col2:
# Use cases bar chart
use_cases_df = pd.DataFrame({
'Use Case': list(use_case_counts.keys()),
'Count': list(use_case_counts.values())
}).sort_values('Count', ascending=False)
fig = px.bar(
use_cases_df,
x='Use Case',
y='Count',
title='All GenAI Use Cases',
color='Count',
color_continuous_scale='YlOrRd'
)
fig.update_layout(xaxis_title="Use Case", yaxis_title="Count")
st.plotly_chart(fig, use_container_width=True)
# Summary statistics
st.subheader("Summary Statistics")
col1, col2, col3 = st.columns(3)
with col1:
st.metric("Total Users", len(result_df))
with col2:
# Average efficiency
avg_efficiency = result_df['GenAI_Efficiency'].mean()
if not pd.isna(avg_efficiency):
st.metric("Avg GenAI Efficiency (hours)", round(avg_efficiency, 2))
with col3:
# Average utilization
if 'Utilization_Percentage' in result_df.columns:
avg_util = result_df['Utilization_Percentage'].mean()
st.metric("Avg Utilization %", f"{round(avg_util, 2)}%")
# New row of metrics
col1, col2, col3 = st.columns(3)
with col1:
if ai_tool_counts:
top_tool = max(ai_tool_counts.items(), key=lambda x: x[1])[0]
st.metric("Most Used AI Tool", top_tool)
with col2:
if use_case_counts:
top_use_case = max(use_case_counts.items(), key=lambda x: x[1])[0]
st.metric("Top Use Case", top_use_case)
with col3:
if 'Description_Quality_Score' in result_df.columns and not result_df.empty:
avg_quality = result_df['Description_Quality_Score'].mean()
st.metric("Avg Description Quality", f"{round(avg_quality, 1)}/100")
except Exception as e:
st.error(f"An error occurred: {str(e)}")
st.markdown("Please check your file format and try again.")
# Footer
st.markdown("---")
st.markdown("**Enhanced GenAI Worklog Processor** • Built with Streamlit and Pandas")