Spaces:

Pratik333
/

Efficiency_Report

Sleeping

App Files Files Community

Update app.py

by Pratik333 - opened Mar 11, 2025

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+573

-10

Files changed (1) hide show

app.py +573 -10

app.py CHANGED Viewed

@@ -5,6 +5,8 @@ import io
 import base64
 import plotly.express as px
 import datetime
 def process_genai_data(df):
     # Create a new dataframe with unique users
@@ -61,8 +63,224 @@ def process_genai_data(df):
     result_df['Date_Range'] = result_df['User'].apply(get_date_range)
     return result_df
 def get_download_link(df, filename):
     """Generate a download link for the dataframe as an Excel file"""
     output = io.BytesIO()
@@ -71,11 +289,14 @@ def get_download_link(df, filename):
         # Add a summary sheet
         summary = pd.DataFrame({
-            'Metric': ['Total Users', 'Average GenAI Efficiency (hours)', 'Average Utilization (%)'],
             'Value': [
                 len(df),
                 round(df['GenAI_Efficiency'].mean(), 2),
-                round(df['Utilization_Percentage'].mean(), 2)
             ]
         })
         summary.to_excel(writer, index=False, sheet_name='Summary')
@@ -107,6 +328,47 @@ def local_css():
         padding: 15px;
         border-radius: 5px;
     }
     </style>
     """, unsafe_allow_html=True)
@@ -120,7 +382,9 @@ This app processes worklog data to extract insights about GenAI usage:
 1. Creates a list of unique users
 2. Concatenates GenAI use case descriptions for each user with proper formatting
 3. Captures GenAI efficiency values and other metrics
-4. Generates visualizations of the processed data
 """)
 # File uploader
@@ -140,14 +404,14 @@ if uploaded_file is not None:
         # Check if required columns exist
         required_columns = ['User', 'GenAI use case description', 'GenAI Efficiency (Log time in hours)']
-        for col in ['Required', 'Logged', 'Date']:
             if col in df.columns:
                 required_columns.append(col)
-        missing_columns = [col for col in required_columns if col not in df.columns]
         if missing_columns:
-            st.warning(f"The following columns are missing: {', '.join(missing_columns)}")
             st.markdown("""
             For full functionality, your file should contain these columns:
             - User
@@ -156,8 +420,14 @@ if uploaded_file is not None:
             - Required
             - Logged
             - Date
             """)
             # Continue with available columns
             st.info("Continuing with available columns...")
@@ -165,6 +435,17 @@ if uploaded_file is not None:
         if st.button("Process Data"):
             with st.spinner("Processing data..."):
                 result_df = process_genai_data(df)
             # Display the result
             st.subheader("Processed Data")
@@ -175,11 +456,244 @@ if uploaded_file is not None:
             st.subheader("Download Processed Data")
             st.markdown(get_download_link(result_df, f"genai_processed_data_{timestamp}.xlsx"), unsafe_allow_html=True)
-            # Data visualization section
-            st.subheader("Data Visualization")
             # Tab layout for visualizations
-            tab1, tab2, tab3 = st.tabs(["GenAI Efficiency", "Utilization", "User Analysis"])
             with tab1:
                 # GenAI Efficiency by User
@@ -237,6 +751,37 @@ if uploaded_file is not None:
                 )
                 st.plotly_chart(fig, use_container_width=True)
             # Summary statistics
             st.subheader("Summary Statistics")
@@ -257,10 +802,28 @@ if uploaded_file is not None:
                     avg_util = result_df['Utilization_Percentage'].mean()
                     st.metric("Avg Utilization %", f"{round(avg_util, 2)}%")
     except Exception as e:
         st.error(f"An error occurred: {str(e)}")
         st.markdown("Please check your file format and try again.")
 # Footer
 st.markdown("---")
-st.markdown("**GenAI Worklog Processor** • Built with Streamlit and Pandas")

 import base64
 import plotly.express as px
 import datetime
+import re
+from collections import Counter
 def process_genai_data(df):
     # Create a new dataframe with unique users
     result_df['Date_Range'] = result_df['User'].apply(get_date_range)
+    # Add description quality score
+    result_df['Description_Quality_Score'] = calculate_description_quality(result_df)
+    # Get project data if available
+    if 'Project' in df.columns:
+        # Get a list of projects for each user
+        def get_projects(user):
+            projects = df[df['User'] == user]['Project'].dropna().unique()
+            return list(projects)
+        result_df['Projects'] = result_df['User'].apply(get_projects)
     return result_df
+def analyze_projects_by_genai_hours(df):
+    """
+    Analyzes which projects have the highest GenAI efficiency log hours
+    Returns a dataframe with projects and their total GenAI hours
+    Fix: Ensure we don't double-count hours by first getting unique Project-User combinations
+    """
+    if 'Project' not in df.columns:
+        return None
+    # First, get unique Project-User combinations with their GenAI hours
+    # This avoids double-counting hours for the same user on the same project
+    user_project_hours = df.groupby(['Project', 'User'])['GenAI Efficiency (Log time in hours)'].first().reset_index()
+    # Now sum up the hours by project
+    project_hours = user_project_hours.groupby('Project')['GenAI Efficiency (Log time in hours)'].sum().reset_index()
+    project_hours = project_hours.sort_values('GenAI Efficiency (Log time in hours)', ascending=False)
+    project_hours.columns = ['Project', 'Total_GenAI_Hours']
+    # Add user count per project
+    project_users = df.groupby('Project')['User'].nunique().reset_index()
+    project_users.columns = ['Project', 'User_Count']
+    # Merge the dataframes
+    project_analysis = pd.merge(project_hours, project_users, on='Project')
+    return project_analysis
+def extract_ai_tools_from_descriptions(df):
+    """
+    Extracts and counts AI tools mentioned in GenAI descriptions
+    Returns a Counter object with tools and their frequencies
+    """
+    # Common AI tools and platforms to look for
+    ai_tools = [
+        'chatgpt', 'gpt-4', 'gpt-3', 'gpt', 'openai',
+        'claude', 'anthropic',
+        'gemini', 'bard', 'google ai',
+        'copilot', 'github copilot', 'microsoft copilot',
+        'dall-e', 'midjourney', 'stable diffusion',
+        'hugging face', 'transformers',
+        'bert', 'llama', 'mistral',
+        'tensorflow', 'pytorch', 'ml',
+        'jupyter', 'colab',
+        'langchain', 'llm', 'rag'
+    ]
+    # Dictionary to store normalized tool names
+    tool_mapping = {
+        'gpt': 'ChatGPT/GPT',
+        'gpt-3': 'ChatGPT/GPT',
+        'gpt-4': 'ChatGPT/GPT',
+        'chatgpt': 'ChatGPT/GPT',
+        'openai': 'OpenAI',
+        'claude': 'Claude',
+        'anthropic': 'Claude',
+        'gemini': 'Google AI',
+        'bard': 'Google AI',
+        'google ai': 'Google AI',
+        'copilot': 'GitHub Copilot',
+        'github copilot': 'GitHub Copilot',
+        'microsoft copilot': 'Microsoft Copilot',
+        'dall-e': 'DALL-E',
+        'midjourney': 'Midjourney',
+        'stable diffusion': 'Stable Diffusion',
+        'hugging face': 'Hugging Face',
+        'transformers': 'Transformers',
+        'bert': 'BERT',
+        'llama': 'LLaMA',
+        'mistral': 'Mistral AI',
+        'tensorflow': 'TensorFlow',
+        'pytorch': 'PyTorch',
+        'ml': 'Machine Learning',
+        'jupyter': 'Jupyter',
+        'colab': 'Google Colab',
+        'langchain': 'LangChain',
+        'llm': 'Large Language Models',
+        'rag': 'Retrieval Augmented Generation'
+    }
+    # Extract all GenAI descriptions
+    all_descriptions = " ".join(df['GenAI use case description'].dropna().astype(str).tolist()).lower()
+    # Count occurrences of each tool
+    tool_counts = Counter()
+    for tool in ai_tools:
+        count = len(re.findall(r'\b' + re.escape(tool) + r'\b', all_descriptions))
+        if count > 0:
+            normalized_tool = tool_mapping.get(tool, tool)
+            tool_counts[normalized_tool] += count
+    return tool_counts
+def extract_use_cases_from_descriptions(df):
+    """
+    Analyzes GenAI descriptions to identify common use cases
+    Returns a Counter object with use cases and their frequencies
+    """
+    # Common use case categories to look for
+    use_case_keywords = {
+        'Code Generation': ['code', 'coding', 'programming', 'script', 'develop', 'algorithm'],
+        'Content Creation': ['content', 'write', 'writing', 'draft', 'article', 'blog'],
+        'Data Analysis': ['data', 'analysis', 'analyze', 'analytics', 'statistics', 'insights'],
+        'Documentation': ['document', 'documentation', 'manual', 'guide', 'readme'],
+        'Research': ['research', 'study', 'investigate', 'explore', 'literature'],
+        'Summarization': ['summary', 'summarize', 'summarization', 'extract key points'],
+        'Translation': ['translate', 'translation', 'language', 'localize'],
+        'Image Generation': ['image', 'picture', 'graphic', 'design', 'draw', 'art'],
+        'Chatbot': ['chatbot', 'chat', 'conversation', 'dialogue', 'assistant'],
+        'Automation': ['automate', 'automation', 'workflow', 'process', 'routine'],
+        'Training': ['train', 'training', 'learn', 'learning', 'education'],
+        'Testing': ['test', 'testing', 'QA', 'quality assurance', 'debug']
+    }
+    # Extract all GenAI descriptions
+    descriptions = df['GenAI use case description'].dropna().astype(str).tolist()
+    # Count occurrences of each use case
+    use_case_counts = Counter()
+    for description in descriptions:
+        description_lower = description.lower()
+        for use_case, keywords in use_case_keywords.items():
+            for keyword in keywords:
+                if re.search(r'\b' + re.escape(keyword) + r'\b', description_lower):
+                    use_case_counts[use_case] += 1
+                    break  # Count each use case only once per description
+    return use_case_counts
+def calculate_description_quality(df):
+    """
+    Calculates a quality score for each user's GenAI description
+    Score is based on length, specificity, and uniqueness
+    Returns a Series with quality scores
+    """
+    # Get descriptions column
+    descriptions = df['GenAI_Descriptions']
+    # Initialize scores
+    scores = pd.Series(0, index=df.index)
+    # Factor 1: Length score (longer descriptions get more points)
+    char_counts = descriptions.str.len()
+    max_char_count = char_counts.max() if not char_counts.empty else 1
+    length_score = (char_counts / max_char_count) * 40  # 40% weight to length
+    # Factor 2: Specificity score (mentions of specific tools or numbers)
+    def specificity_score(desc):
+        if not isinstance(desc, str) or desc.strip() == "":
+            return 0
+        score = 0
+        # Check for specific AI tools
+        ai_tools = ['gpt', 'chatgpt', 'claude', 'gemini', 'copilot', 'dall-e', 'midjourney']
+        for tool in ai_tools:
+            if re.search(r'\b' + re.escape(tool) + r'\b', desc.lower()):
+                score += 5
+        # Check for numbers (could indicate metrics or specific examples)
+        if re.search(r'\d+', desc):
+            score += 5
+        # Check for detailed explanations
+        if len(desc.split()) > 50:  # Long descriptions
+            score += 10
+        return min(score, 30)  # Cap at 30% weight
+    specificity_scores = descriptions.apply(specificity_score)
+    # Factor 3: Uniqueness score
+    def uniqueness_score(desc):
+        if not isinstance(desc, str) or desc.strip() == "":
+            return 0
+        # Simple word tokenization by splitting on whitespace
+        words = desc.lower().split()
+        # Remove common stop words and short words
+        common_stopwords = {"a", "an", "the", "and", "or", "but", "is", "are", "was", "were",
+                            "in", "on", "at", "to", "for", "with", "by", "about", "of", "this",
+                            "that", "i", "we", "you", "he", "she", "they", "it", "have", "has"}
+        # Filter out stopwords and short words
+        filtered_words = [word for word in words if word not in common_stopwords and len(word) > 2]
+        # Unique words ratio
+        if filtered_words:
+            uniqueness = len(set(filtered_words)) / len(filtered_words)
+            return uniqueness * 30  # 30% weight to uniqueness
+        return 0
+    uniqueness_scores = descriptions.apply(uniqueness_score)
+    # Combine scores
+    total_scores = length_score + specificity_scores + uniqueness_scores
+    # Normalize to 0-100 scale
+    max_score = total_scores.max() if not total_scores.empty else 1
+    normalized_scores = (total_scores / max_score * 100).round(1)
+    return normalized_scores
 def get_download_link(df, filename):
     """Generate a download link for the dataframe as an Excel file"""
     output = io.BytesIO()
         # Add a summary sheet
         summary = pd.DataFrame({
+            'Metric': ['Total Users', 'Average GenAI Efficiency (hours)', 'Average Utilization (%)',
+                      'Top GenAI User', 'Top Quality Score'],
             'Value': [
                 len(df),
                 round(df['GenAI_Efficiency'].mean(), 2),
+                round(df['Utilization_Percentage'].mean(), 2),
+                df.loc[df['GenAI_Efficiency'].idxmax(), 'User'] if not df['GenAI_Efficiency'].isna().all() else 'N/A',
+                df.loc[df['Description_Quality_Score'].idxmax(), 'User'] if not df['Description_Quality_Score'].isna().all() else 'N/A'
             ]
         })
         summary.to_excel(writer, index=False, sheet_name='Summary')
         padding: 15px;
         border-radius: 5px;
     }
+    .highlight-box {
+        background-color: #f8f9fa;
+        border-left: 5px solid #4CAF50;
+        padding: 15px;
+        margin: 10px 0;
+        border-radius: 0 5px 5px 0;
+    }
+    .quality-high {
+        color: #4CAF50;
+        font-weight: bold;
+    }
+    .quality-medium {
+        color: #FFC107;
+        font-weight: bold;
+    }
+    .quality-low {
+        color: #F44336;
+        font-weight: bold;
+    }
+    /* Team category styling */
+    table {
+        width: 100%;
+        border-collapse: collapse;
+        margin-bottom: 20px;
+    }
+    th {
+        background-color: #f2f2f2;
+        padding: 8px;
+        text-align: left;
+        border: 1px solid #ddd;
+    }
+    td {
+        padding: 8px;
+        border: 1px solid #ddd;
+    }
+    tr:nth-child(even) {
+        background-color: #f9f9f9;
+    }
+    tr:hover {
+        background-color: #f0f0f0;
+    }
     </style>
     """, unsafe_allow_html=True)
 1. Creates a list of unique users
 2. Concatenates GenAI use case descriptions for each user with proper formatting
 3. Captures GenAI efficiency values and other metrics
+4. Identifies projects with highest GenAI usage
+5. Analyzes most common AI tools and use cases
+6. Identifies prompt champions based on quality metrics
 """)
 # File uploader
         # Check if required columns exist
         required_columns = ['User', 'GenAI use case description', 'GenAI Efficiency (Log time in hours)']
+        for col in ['Required', 'Logged', 'Date', 'Project']:
             if col in df.columns:
                 required_columns.append(col)
+        missing_columns = [col for col in required_columns[:3] if col not in df.columns]
         if missing_columns:
+            st.warning(f"The following required columns are missing: {', '.join(missing_columns)}")
             st.markdown("""
             For full functionality, your file should contain these columns:
             - User
             - Required
             - Logged
             - Date
+            - Project (optional but recommended for project analysis)
             """)
+            # Stop if essential columns are missing
+            if any(col in missing_columns for col in ['User', 'GenAI use case description']):
+                st.error("Cannot continue without essential columns.")
+                st.stop()
             # Continue with available columns
             st.info("Continuing with available columns...")
         if st.button("Process Data"):
             with st.spinner("Processing data..."):
                 result_df = process_genai_data(df)
+                # Get project analysis if available
+                project_analysis = None
+                if 'Project' in df.columns:
+                    project_analysis = analyze_projects_by_genai_hours(df)
+                # Get AI tools usage
+                ai_tool_counts = extract_ai_tools_from_descriptions(df)
+                # Get use case analysis
+                use_case_counts = extract_use_cases_from_descriptions(df)
             # Display the result
             st.subheader("Processed Data")
             st.subheader("Download Processed Data")
             st.markdown(get_download_link(result_df, f"genai_processed_data_{timestamp}.xlsx"), unsafe_allow_html=True)
+            # NEW INSIGHTS SECTION
+            st.header("🔍 Advanced GenAI Insights")
+            # 1. Project with highest GenAI efficacy log hours
+            if project_analysis is not None and not project_analysis.empty:
+                st.subheader("🏆 Project with Highest GenAI Efficacy Hours")
+                top_project = project_analysis.iloc[0]
+                col1, col2 = st.columns(2)
+                with col1:
+                    st.markdown(f"""
+                    <div class="highlight-box">
+                        <h3>{top_project['Project']}</h3>
+                        <p>Total GenAI Hours: <b>{round(top_project['Total_GenAI_Hours'], 2)}</b></p>
+                        <p>Number of Users: <b>{top_project['User_Count']}</b></p>
+                        <p>Average Hours per User: <b>{round(top_project['Total_GenAI_Hours'] / top_project['User_Count'], 2)}</b></p>
+                    </div>
+                    """, unsafe_allow_html=True)
+                with col2:
+                    # Bar chart of top 5 projects
+                    top_projects = project_analysis.head(5)
+                    fig = px.bar(
+                        top_projects,
+                        x='Project',
+                        y='Total_GenAI_Hours',
+                        title='Top 5 Projects by GenAI Hours',
+                        color='Total_GenAI_Hours',
+                        color_continuous_scale='Viridis'
+                    )
+                    fig.update_layout(xaxis_title="Project", yaxis_title="Total GenAI Hours")
+                    st.plotly_chart(fig, use_container_width=True)
+                # Full project analysis
+                st.markdown("### All Projects Analysis")
+                st.dataframe(project_analysis)
+            # 2. Most prominent use cases of AI tools
+            st.subheader("📊 Most Prominent AI Use Cases")
+            col1, col2 = st.columns(2)
+            with col1:
+                # AI Tools Analysis
+                st.markdown("### Top AI Tools Mentioned")
+                if ai_tool_counts:
+                    # Convert to dataframe for visualization
+                    ai_tools_df = pd.DataFrame({
+                        'Tool': list(ai_tool_counts.keys()),
+                        'Mentions': list(ai_tool_counts.values())
+                    }).sort_values('Mentions', ascending=False)
+                    fig = px.bar(
+                        ai_tools_df.head(10),
+                        x='Tool',
+                        y='Mentions',
+                        title='Top 10 AI Tools Mentioned',
+                        color='Mentions',
+                        color_continuous_scale='Blues'
+                    )
+                    fig.update_layout(xaxis_title="AI Tool", yaxis_title="Number of Mentions")
+                    st.plotly_chart(fig, use_container_width=True)
+                    # Top tool insight
+                    if not ai_tools_df.empty:
+                        top_tool = ai_tools_df.iloc[0]
+                        st.markdown(f"""
+                        <div class="highlight-box">
+                            <p>Most used AI tool: <b>{top_tool['Tool']}</b> with {top_tool['Mentions']} mentions</p>
+                        </div>
+                        """, unsafe_allow_html=True)
+                else:
+                    st.info("No specific AI tools were identified in the descriptions.")
+            with col2:
+                # Use Cases Analysis
+                st.markdown("### Top Use Cases")
+                if use_case_counts:
+                    # Convert to dataframe for visualization
+                    use_cases_df = pd.DataFrame({
+                        'Use Case': list(use_case_counts.keys()),
+                        'Count': list(use_case_counts.values())
+                    }).sort_values('Count', ascending=False)
+                    fig = px.pie(
+                        use_cases_df.head(5),
+                        names='Use Case',
+                        values='Count',
+                        title='Top 5 GenAI Use Cases',
+                        hole=0.4
+                    )
+                    st.plotly_chart(fig, use_container_width=True)
+                    # Top use case insight
+                    if not use_cases_df.empty:
+                        top_use_case = use_cases_df.iloc[0]
+                        st.markdown(f"""
+                        <div class="highlight-box">
+                            <p>Most common use case: <b>{top_use_case['Use Case']}</b> mentioned in {top_use_case['Count']} descriptions</p>
+                        </div>
+                        """, unsafe_allow_html=True)
+                else:
+                    st.info("No specific use cases were identified in the descriptions.")
+            # 3. Champion of the prompt with quality GenAI Description
+            st.subheader("👑 GenAI Prompt Champions")
+            if 'Description_Quality_Score' in result_df.columns:
+                # Get top 3 users by quality score
+                top_quality_users = result_df.sort_values('Description_Quality_Score', ascending=False).head(3)
+                # Display top champion
+                if not top_quality_users.empty:
+                    champion = top_quality_users.iloc[0]
+                    st.markdown(f"""
+                    <div class="highlight-box">
+                        <h3>���� Prompt Champion: {champion['User']}</h3>
+                        <p>Quality Score: <span class="quality-high">{champion['Description_Quality_Score']}/100</span></p>
+                        <p>GenAI Efficiency: {round(champion['GenAI_Efficiency'], 2)} hours</p>
+                        <p><b>GenAI Descriptions:</b></p>
+                        <pre>{champion['GenAI_Descriptions']}</pre>
+                    </div>
+                    """, unsafe_allow_html=True)
+                # Quality score distribution
+                st.markdown("### Quality Score Distribution")
+                fig = px.histogram(
+                    result_df,
+                    x='Description_Quality_Score',
+                    nbins=10,
+                    title='Distribution of GenAI Description Quality Scores',
+                    color_discrete_sequence=['#4CAF50']
+                )
+                fig.update_layout(xaxis_title="Quality Score", yaxis_title="Number of Users")
+                st.plotly_chart(fig, use_container_width=True)
+                # Quality score by user with team categorization
+                st.markdown("### Quality Scores by User & Team Categories")
+                # Create a more comprehensive dataframe for team identification
+                team_df = result_df[['User', 'Description_Quality_Score', 'GenAI_Efficiency', 'Total_Logged_Hours']].copy()
+                # Ensure we have numeric values for calculations
+                team_df['Description_Quality_Score'] = pd.to_numeric(team_df['Description_Quality_Score'], errors='coerce').fillna(0)
+                team_df['GenAI_Efficiency'] = pd.to_numeric(team_df['GenAI_Efficiency'], errors='coerce').fillna(0)
+                team_df['Total_Logged_Hours'] = pd.to_numeric(team_df['Total_Logged_Hours'], errors='coerce').fillna(0)
+                # Calculate a combined score (weighted average of quality and hours)
+                # Weight: 60% quality, 40% efficiency hours
+                max_quality = team_df['Description_Quality_Score'].max() if not team_df.empty and team_df['Description_Quality_Score'].max() > 0 else 100
+                max_hours = team_df['GenAI_Efficiency'].max() if not team_df.empty and team_df['GenAI_Efficiency'].max() > 0 else 1
+                team_df['Quality_Normalized'] = team_df['Description_Quality_Score'] / max_quality * 100
+                team_df['Hours_Normalized'] = team_df['GenAI_Efficiency'] / max_hours * 100
+                team_df['Combined_Score'] = (team_df['Quality_Normalized'] * 0.6) + (team_df['Hours_Normalized'] * 0.4)
+                # Assign team categories based on combined score and individual metrics
+                def assign_team_category(row):
+                    quality = row['Description_Quality_Score']
+                    hours = row['GenAI_Efficiency']
+                    combined = row['Combined_Score']
+                    if quality >= 80 and hours >= (max_hours * 0.7):
+                        return "🔥 GenAI Champion", "Masters of both quality and quantity"
+                    elif quality >= 70:
+                        return "✨ Prompt Expert", "High-quality prompt crafters"
+                    elif hours >= (max_hours * 0.8):
+                        return "⚡ Power User", "High volume GenAI users"
+                    elif combined >= 60:
+                        return "🌟 Balanced Performer", "Good balance of quality and usage"
+                    elif quality >= 50:
+                        return "📝 Quality Focused", "Focuses on quality over quantity"
+                    elif hours > 0:
+                        return "🔍 Exploring User", "Beginning GenAI journey"
+                    else:
+                        return "❓ Inactive", "Little to no GenAI usage"
+                # Apply the team categorization
+                team_df[['Team_Category', 'Category_Description']] = team_df.apply(assign_team_category, axis=1, result_type='expand')
+                # Sort by combined score
+                team_df = team_df.sort_values('Combined_Score', ascending=False)
+                # Add color coding based on quality score
+                def quality_color(score):
+                    if score >= 70:
+                        return 'quality-high'
+                    elif score >= 40:
+                        return 'quality-medium'
+                    else:
+                        return 'quality-low'
+                team_df['Score_Display'] = team_df['Description_Quality_Score'].apply(
+                    lambda x: f'<span class="{quality_color(x)}">{x}</span>'
+                )
+                # Create a display dataframe with the relevant columns
+                display_df = team_df[['User', 'Score_Display', 'GenAI_Efficiency', 'Team_Category', 'Category_Description']]
+                display_df.columns = ['User', 'Quality Score', 'GenAI Hours', 'Team Category', 'Description']
+                # Display as a styled dataframe
+                st.write(display_df.to_html(escape=False), unsafe_allow_html=True)
+                # Team distribution pie chart
+                st.markdown("### Team Category Distribution")
+                team_counts = team_df['Team_Category'].value_counts().reset_index()
+                team_counts.columns = ['Team_Category', 'Count']
+                fig = px.pie(
+                    team_counts,
+                    names='Team_Category',
+                    values='Count',
+                    title='Distribution of Team Categories',
+                    color_discrete_sequence=px.colors.qualitative.Bold
+                )
+                st.plotly_chart(fig, use_container_width=True)
+                # Quality factors explanation
+                st.markdown("""
+                ### How Quality Scores Are Calculated
+                The quality score is based on these factors:
+                1. **Length & Detail (40%)**: Longer, more detailed descriptions score higher
+                2. **Specificity (30%)**: Mentions of specific AI tools, metrics, and technical details
+                3. **Uniqueness (30%)**: Variety of terms and concepts used
+                Scores range from 0-100, with higher scores indicating more comprehensive and useful GenAI descriptions.
+                """)
+            # Data visualization section (original visualizations)
+            st.header("📈 Data Visualization")
             # Tab layout for visualizations
+            tab1, tab2, tab3, tab4 = st.tabs(["GenAI Efficiency", "Utilization", "User Analysis", "Tools & Use Cases"])
             with tab1:
                 # GenAI Efficiency by User
                 )
                 st.plotly_chart(fig, use_container_width=True)
+            with tab4:
+                # Combined tools and use cases view
+                st.subheader("AI Tools and Use Cases")
+                if ai_tool_counts and use_case_counts:
+                    col1, col2 = st.columns(2)
+                    with col1:
+                        # Word cloud of AI tools (text representation)
+                        st.markdown("### AI Tools Word Cloud")
+                        ai_tools_text = " ".join([f"{tool} " * count for tool, count in ai_tool_counts.items()])
+                        st.text_area("", ai_tools_text, height=200)
+                    with col2:
+                        # Use cases bar chart
+                        use_cases_df = pd.DataFrame({
+                            'Use Case': list(use_case_counts.keys()),
+                            'Count': list(use_case_counts.values())
+                        }).sort_values('Count', ascending=False)
+                        fig = px.bar(
+                            use_cases_df,
+                            x='Use Case',
+                            y='Count',
+                            title='All GenAI Use Cases',
+                            color='Count',
+                            color_continuous_scale='YlOrRd'
+                        )
+                        fig.update_layout(xaxis_title="Use Case", yaxis_title="Count")
+                        st.plotly_chart(fig, use_container_width=True)
             # Summary statistics
             st.subheader("Summary Statistics")
                     avg_util = result_df['Utilization_Percentage'].mean()
                     st.metric("Avg Utilization %", f"{round(avg_util, 2)}%")
+            # New row of metrics
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                if ai_tool_counts:
+                    top_tool = max(ai_tool_counts.items(), key=lambda x: x[1])[0]
+                    st.metric("Most Used AI Tool", top_tool)
+            with col2:
+                if use_case_counts:
+                    top_use_case = max(use_case_counts.items(), key=lambda x: x[1])[0]
+                    st.metric("Top Use Case", top_use_case)
+            with col3:
+                if 'Description_Quality_Score' in result_df.columns and not result_df.empty:
+                    avg_quality = result_df['Description_Quality_Score'].mean()
+                    st.metric("Avg Description Quality", f"{round(avg_quality, 1)}/100")
     except Exception as e:
         st.error(f"An error occurred: {str(e)}")
         st.markdown("Please check your file format and try again.")
 # Footer
 st.markdown("---")
+st.markdown("**Enhanced GenAI Worklog Processor** • Built with Streamlit and Pandas")