Files changed (1) hide show
  1. app.py +573 -10
app.py CHANGED
@@ -5,6 +5,8 @@ import io
5
  import base64
6
  import plotly.express as px
7
  import datetime
 
 
8
 
9
  def process_genai_data(df):
10
  # Create a new dataframe with unique users
@@ -61,8 +63,224 @@ def process_genai_data(df):
61
 
62
  result_df['Date_Range'] = result_df['User'].apply(get_date_range)
63
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  return result_df
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  def get_download_link(df, filename):
67
  """Generate a download link for the dataframe as an Excel file"""
68
  output = io.BytesIO()
@@ -71,11 +289,14 @@ def get_download_link(df, filename):
71
 
72
  # Add a summary sheet
73
  summary = pd.DataFrame({
74
- 'Metric': ['Total Users', 'Average GenAI Efficiency (hours)', 'Average Utilization (%)'],
 
75
  'Value': [
76
  len(df),
77
  round(df['GenAI_Efficiency'].mean(), 2),
78
- round(df['Utilization_Percentage'].mean(), 2)
 
 
79
  ]
80
  })
81
  summary.to_excel(writer, index=False, sheet_name='Summary')
@@ -107,6 +328,47 @@ def local_css():
107
  padding: 15px;
108
  border-radius: 5px;
109
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  </style>
111
  """, unsafe_allow_html=True)
112
 
@@ -120,7 +382,9 @@ This app processes worklog data to extract insights about GenAI usage:
120
  1. Creates a list of unique users
121
  2. Concatenates GenAI use case descriptions for each user with proper formatting
122
  3. Captures GenAI efficiency values and other metrics
123
- 4. Generates visualizations of the processed data
 
 
124
  """)
125
 
126
  # File uploader
@@ -140,14 +404,14 @@ if uploaded_file is not None:
140
 
141
  # Check if required columns exist
142
  required_columns = ['User', 'GenAI use case description', 'GenAI Efficiency (Log time in hours)']
143
- for col in ['Required', 'Logged', 'Date']:
144
  if col in df.columns:
145
  required_columns.append(col)
146
 
147
- missing_columns = [col for col in required_columns if col not in df.columns]
148
 
149
  if missing_columns:
150
- st.warning(f"The following columns are missing: {', '.join(missing_columns)}")
151
  st.markdown("""
152
  For full functionality, your file should contain these columns:
153
  - User
@@ -156,8 +420,14 @@ if uploaded_file is not None:
156
  - Required
157
  - Logged
158
  - Date
 
159
  """)
160
 
 
 
 
 
 
161
  # Continue with available columns
162
  st.info("Continuing with available columns...")
163
 
@@ -165,6 +435,17 @@ if uploaded_file is not None:
165
  if st.button("Process Data"):
166
  with st.spinner("Processing data..."):
167
  result_df = process_genai_data(df)
 
 
 
 
 
 
 
 
 
 
 
168
 
169
  # Display the result
170
  st.subheader("Processed Data")
@@ -175,11 +456,244 @@ if uploaded_file is not None:
175
  st.subheader("Download Processed Data")
176
  st.markdown(get_download_link(result_df, f"genai_processed_data_{timestamp}.xlsx"), unsafe_allow_html=True)
177
 
178
- # Data visualization section
179
- st.subheader("Data Visualization")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  # Tab layout for visualizations
182
- tab1, tab2, tab3 = st.tabs(["GenAI Efficiency", "Utilization", "User Analysis"])
183
 
184
  with tab1:
185
  # GenAI Efficiency by User
@@ -237,6 +751,37 @@ if uploaded_file is not None:
237
  )
238
  st.plotly_chart(fig, use_container_width=True)
239
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  # Summary statistics
241
  st.subheader("Summary Statistics")
242
 
@@ -257,10 +802,28 @@ if uploaded_file is not None:
257
  avg_util = result_df['Utilization_Percentage'].mean()
258
  st.metric("Avg Utilization %", f"{round(avg_util, 2)}%")
259
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  except Exception as e:
261
  st.error(f"An error occurred: {str(e)}")
262
  st.markdown("Please check your file format and try again.")
263
 
264
  # Footer
265
  st.markdown("---")
266
- st.markdown("**GenAI Worklog Processor** β€’ Built with Streamlit and Pandas")
 
5
  import base64
6
  import plotly.express as px
7
  import datetime
8
+ import re
9
+ from collections import Counter
10
 
11
  def process_genai_data(df):
12
  # Create a new dataframe with unique users
 
63
 
64
  result_df['Date_Range'] = result_df['User'].apply(get_date_range)
65
 
66
+ # Add description quality score
67
+ result_df['Description_Quality_Score'] = calculate_description_quality(result_df)
68
+
69
+ # Get project data if available
70
+ if 'Project' in df.columns:
71
+ # Get a list of projects for each user
72
+ def get_projects(user):
73
+ projects = df[df['User'] == user]['Project'].dropna().unique()
74
+ return list(projects)
75
+
76
+ result_df['Projects'] = result_df['User'].apply(get_projects)
77
+
78
  return result_df
79
 
80
+ def analyze_projects_by_genai_hours(df):
81
+ """
82
+ Analyzes which projects have the highest GenAI efficiency log hours
83
+ Returns a dataframe with projects and their total GenAI hours
84
+
85
+ Fix: Ensure we don't double-count hours by first getting unique Project-User combinations
86
+ """
87
+ if 'Project' not in df.columns:
88
+ return None
89
+
90
+ # First, get unique Project-User combinations with their GenAI hours
91
+ # This avoids double-counting hours for the same user on the same project
92
+ user_project_hours = df.groupby(['Project', 'User'])['GenAI Efficiency (Log time in hours)'].first().reset_index()
93
+
94
+ # Now sum up the hours by project
95
+ project_hours = user_project_hours.groupby('Project')['GenAI Efficiency (Log time in hours)'].sum().reset_index()
96
+ project_hours = project_hours.sort_values('GenAI Efficiency (Log time in hours)', ascending=False)
97
+ project_hours.columns = ['Project', 'Total_GenAI_Hours']
98
+
99
+ # Add user count per project
100
+ project_users = df.groupby('Project')['User'].nunique().reset_index()
101
+ project_users.columns = ['Project', 'User_Count']
102
+
103
+ # Merge the dataframes
104
+ project_analysis = pd.merge(project_hours, project_users, on='Project')
105
+
106
+ return project_analysis
107
+
108
+ def extract_ai_tools_from_descriptions(df):
109
+ """
110
+ Extracts and counts AI tools mentioned in GenAI descriptions
111
+ Returns a Counter object with tools and their frequencies
112
+ """
113
+ # Common AI tools and platforms to look for
114
+ ai_tools = [
115
+ 'chatgpt', 'gpt-4', 'gpt-3', 'gpt', 'openai',
116
+ 'claude', 'anthropic',
117
+ 'gemini', 'bard', 'google ai',
118
+ 'copilot', 'github copilot', 'microsoft copilot',
119
+ 'dall-e', 'midjourney', 'stable diffusion',
120
+ 'hugging face', 'transformers',
121
+ 'bert', 'llama', 'mistral',
122
+ 'tensorflow', 'pytorch', 'ml',
123
+ 'jupyter', 'colab',
124
+ 'langchain', 'llm', 'rag'
125
+ ]
126
+
127
+ # Dictionary to store normalized tool names
128
+ tool_mapping = {
129
+ 'gpt': 'ChatGPT/GPT',
130
+ 'gpt-3': 'ChatGPT/GPT',
131
+ 'gpt-4': 'ChatGPT/GPT',
132
+ 'chatgpt': 'ChatGPT/GPT',
133
+ 'openai': 'OpenAI',
134
+ 'claude': 'Claude',
135
+ 'anthropic': 'Claude',
136
+ 'gemini': 'Google AI',
137
+ 'bard': 'Google AI',
138
+ 'google ai': 'Google AI',
139
+ 'copilot': 'GitHub Copilot',
140
+ 'github copilot': 'GitHub Copilot',
141
+ 'microsoft copilot': 'Microsoft Copilot',
142
+ 'dall-e': 'DALL-E',
143
+ 'midjourney': 'Midjourney',
144
+ 'stable diffusion': 'Stable Diffusion',
145
+ 'hugging face': 'Hugging Face',
146
+ 'transformers': 'Transformers',
147
+ 'bert': 'BERT',
148
+ 'llama': 'LLaMA',
149
+ 'mistral': 'Mistral AI',
150
+ 'tensorflow': 'TensorFlow',
151
+ 'pytorch': 'PyTorch',
152
+ 'ml': 'Machine Learning',
153
+ 'jupyter': 'Jupyter',
154
+ 'colab': 'Google Colab',
155
+ 'langchain': 'LangChain',
156
+ 'llm': 'Large Language Models',
157
+ 'rag': 'Retrieval Augmented Generation'
158
+ }
159
+
160
+ # Extract all GenAI descriptions
161
+ all_descriptions = " ".join(df['GenAI use case description'].dropna().astype(str).tolist()).lower()
162
+
163
+ # Count occurrences of each tool
164
+ tool_counts = Counter()
165
+ for tool in ai_tools:
166
+ count = len(re.findall(r'\b' + re.escape(tool) + r'\b', all_descriptions))
167
+ if count > 0:
168
+ normalized_tool = tool_mapping.get(tool, tool)
169
+ tool_counts[normalized_tool] += count
170
+
171
+ return tool_counts
172
+
173
+ def extract_use_cases_from_descriptions(df):
174
+ """
175
+ Analyzes GenAI descriptions to identify common use cases
176
+ Returns a Counter object with use cases and their frequencies
177
+ """
178
+ # Common use case categories to look for
179
+ use_case_keywords = {
180
+ 'Code Generation': ['code', 'coding', 'programming', 'script', 'develop', 'algorithm'],
181
+ 'Content Creation': ['content', 'write', 'writing', 'draft', 'article', 'blog'],
182
+ 'Data Analysis': ['data', 'analysis', 'analyze', 'analytics', 'statistics', 'insights'],
183
+ 'Documentation': ['document', 'documentation', 'manual', 'guide', 'readme'],
184
+ 'Research': ['research', 'study', 'investigate', 'explore', 'literature'],
185
+ 'Summarization': ['summary', 'summarize', 'summarization', 'extract key points'],
186
+ 'Translation': ['translate', 'translation', 'language', 'localize'],
187
+ 'Image Generation': ['image', 'picture', 'graphic', 'design', 'draw', 'art'],
188
+ 'Chatbot': ['chatbot', 'chat', 'conversation', 'dialogue', 'assistant'],
189
+ 'Automation': ['automate', 'automation', 'workflow', 'process', 'routine'],
190
+ 'Training': ['train', 'training', 'learn', 'learning', 'education'],
191
+ 'Testing': ['test', 'testing', 'QA', 'quality assurance', 'debug']
192
+ }
193
+
194
+ # Extract all GenAI descriptions
195
+ descriptions = df['GenAI use case description'].dropna().astype(str).tolist()
196
+
197
+ # Count occurrences of each use case
198
+ use_case_counts = Counter()
199
+
200
+ for description in descriptions:
201
+ description_lower = description.lower()
202
+ for use_case, keywords in use_case_keywords.items():
203
+ for keyword in keywords:
204
+ if re.search(r'\b' + re.escape(keyword) + r'\b', description_lower):
205
+ use_case_counts[use_case] += 1
206
+ break # Count each use case only once per description
207
+
208
+ return use_case_counts
209
+
210
+ def calculate_description_quality(df):
211
+ """
212
+ Calculates a quality score for each user's GenAI description
213
+ Score is based on length, specificity, and uniqueness
214
+ Returns a Series with quality scores
215
+ """
216
+ # Get descriptions column
217
+ descriptions = df['GenAI_Descriptions']
218
+
219
+ # Initialize scores
220
+ scores = pd.Series(0, index=df.index)
221
+
222
+ # Factor 1: Length score (longer descriptions get more points)
223
+ char_counts = descriptions.str.len()
224
+ max_char_count = char_counts.max() if not char_counts.empty else 1
225
+ length_score = (char_counts / max_char_count) * 40 # 40% weight to length
226
+
227
+ # Factor 2: Specificity score (mentions of specific tools or numbers)
228
+ def specificity_score(desc):
229
+ if not isinstance(desc, str) or desc.strip() == "":
230
+ return 0
231
+
232
+ score = 0
233
+ # Check for specific AI tools
234
+ ai_tools = ['gpt', 'chatgpt', 'claude', 'gemini', 'copilot', 'dall-e', 'midjourney']
235
+ for tool in ai_tools:
236
+ if re.search(r'\b' + re.escape(tool) + r'\b', desc.lower()):
237
+ score += 5
238
+
239
+ # Check for numbers (could indicate metrics or specific examples)
240
+ if re.search(r'\d+', desc):
241
+ score += 5
242
+
243
+ # Check for detailed explanations
244
+ if len(desc.split()) > 50: # Long descriptions
245
+ score += 10
246
+
247
+ return min(score, 30) # Cap at 30% weight
248
+
249
+ specificity_scores = descriptions.apply(specificity_score)
250
+
251
+ # Factor 3: Uniqueness score
252
+ def uniqueness_score(desc):
253
+ if not isinstance(desc, str) or desc.strip() == "":
254
+ return 0
255
+
256
+ # Simple word tokenization by splitting on whitespace
257
+ words = desc.lower().split()
258
+
259
+ # Remove common stop words and short words
260
+ common_stopwords = {"a", "an", "the", "and", "or", "but", "is", "are", "was", "were",
261
+ "in", "on", "at", "to", "for", "with", "by", "about", "of", "this",
262
+ "that", "i", "we", "you", "he", "she", "they", "it", "have", "has"}
263
+
264
+ # Filter out stopwords and short words
265
+ filtered_words = [word for word in words if word not in common_stopwords and len(word) > 2]
266
+
267
+ # Unique words ratio
268
+ if filtered_words:
269
+ uniqueness = len(set(filtered_words)) / len(filtered_words)
270
+ return uniqueness * 30 # 30% weight to uniqueness
271
+ return 0
272
+
273
+ uniqueness_scores = descriptions.apply(uniqueness_score)
274
+
275
+ # Combine scores
276
+ total_scores = length_score + specificity_scores + uniqueness_scores
277
+
278
+ # Normalize to 0-100 scale
279
+ max_score = total_scores.max() if not total_scores.empty else 1
280
+ normalized_scores = (total_scores / max_score * 100).round(1)
281
+
282
+ return normalized_scores
283
+
284
  def get_download_link(df, filename):
285
  """Generate a download link for the dataframe as an Excel file"""
286
  output = io.BytesIO()
 
289
 
290
  # Add a summary sheet
291
  summary = pd.DataFrame({
292
+ 'Metric': ['Total Users', 'Average GenAI Efficiency (hours)', 'Average Utilization (%)',
293
+ 'Top GenAI User', 'Top Quality Score'],
294
  'Value': [
295
  len(df),
296
  round(df['GenAI_Efficiency'].mean(), 2),
297
+ round(df['Utilization_Percentage'].mean(), 2),
298
+ df.loc[df['GenAI_Efficiency'].idxmax(), 'User'] if not df['GenAI_Efficiency'].isna().all() else 'N/A',
299
+ df.loc[df['Description_Quality_Score'].idxmax(), 'User'] if not df['Description_Quality_Score'].isna().all() else 'N/A'
300
  ]
301
  })
302
  summary.to_excel(writer, index=False, sheet_name='Summary')
 
328
  padding: 15px;
329
  border-radius: 5px;
330
  }
331
+ .highlight-box {
332
+ background-color: #f8f9fa;
333
+ border-left: 5px solid #4CAF50;
334
+ padding: 15px;
335
+ margin: 10px 0;
336
+ border-radius: 0 5px 5px 0;
337
+ }
338
+ .quality-high {
339
+ color: #4CAF50;
340
+ font-weight: bold;
341
+ }
342
+ .quality-medium {
343
+ color: #FFC107;
344
+ font-weight: bold;
345
+ }
346
+ .quality-low {
347
+ color: #F44336;
348
+ font-weight: bold;
349
+ }
350
+ /* Team category styling */
351
+ table {
352
+ width: 100%;
353
+ border-collapse: collapse;
354
+ margin-bottom: 20px;
355
+ }
356
+ th {
357
+ background-color: #f2f2f2;
358
+ padding: 8px;
359
+ text-align: left;
360
+ border: 1px solid #ddd;
361
+ }
362
+ td {
363
+ padding: 8px;
364
+ border: 1px solid #ddd;
365
+ }
366
+ tr:nth-child(even) {
367
+ background-color: #f9f9f9;
368
+ }
369
+ tr:hover {
370
+ background-color: #f0f0f0;
371
+ }
372
  </style>
373
  """, unsafe_allow_html=True)
374
 
 
382
  1. Creates a list of unique users
383
  2. Concatenates GenAI use case descriptions for each user with proper formatting
384
  3. Captures GenAI efficiency values and other metrics
385
+ 4. Identifies projects with highest GenAI usage
386
+ 5. Analyzes most common AI tools and use cases
387
+ 6. Identifies prompt champions based on quality metrics
388
  """)
389
 
390
  # File uploader
 
404
 
405
  # Check if required columns exist
406
  required_columns = ['User', 'GenAI use case description', 'GenAI Efficiency (Log time in hours)']
407
+ for col in ['Required', 'Logged', 'Date', 'Project']:
408
  if col in df.columns:
409
  required_columns.append(col)
410
 
411
+ missing_columns = [col for col in required_columns[:3] if col not in df.columns]
412
 
413
  if missing_columns:
414
+ st.warning(f"The following required columns are missing: {', '.join(missing_columns)}")
415
  st.markdown("""
416
  For full functionality, your file should contain these columns:
417
  - User
 
420
  - Required
421
  - Logged
422
  - Date
423
+ - Project (optional but recommended for project analysis)
424
  """)
425
 
426
+ # Stop if essential columns are missing
427
+ if any(col in missing_columns for col in ['User', 'GenAI use case description']):
428
+ st.error("Cannot continue without essential columns.")
429
+ st.stop()
430
+
431
  # Continue with available columns
432
  st.info("Continuing with available columns...")
433
 
 
435
  if st.button("Process Data"):
436
  with st.spinner("Processing data..."):
437
  result_df = process_genai_data(df)
438
+
439
+ # Get project analysis if available
440
+ project_analysis = None
441
+ if 'Project' in df.columns:
442
+ project_analysis = analyze_projects_by_genai_hours(df)
443
+
444
+ # Get AI tools usage
445
+ ai_tool_counts = extract_ai_tools_from_descriptions(df)
446
+
447
+ # Get use case analysis
448
+ use_case_counts = extract_use_cases_from_descriptions(df)
449
 
450
  # Display the result
451
  st.subheader("Processed Data")
 
456
  st.subheader("Download Processed Data")
457
  st.markdown(get_download_link(result_df, f"genai_processed_data_{timestamp}.xlsx"), unsafe_allow_html=True)
458
 
459
+ # NEW INSIGHTS SECTION
460
+ st.header("πŸ” Advanced GenAI Insights")
461
+
462
+ # 1. Project with highest GenAI efficacy log hours
463
+ if project_analysis is not None and not project_analysis.empty:
464
+ st.subheader("πŸ† Project with Highest GenAI Efficacy Hours")
465
+
466
+ top_project = project_analysis.iloc[0]
467
+
468
+ col1, col2 = st.columns(2)
469
+ with col1:
470
+ st.markdown(f"""
471
+ <div class="highlight-box">
472
+ <h3>{top_project['Project']}</h3>
473
+ <p>Total GenAI Hours: <b>{round(top_project['Total_GenAI_Hours'], 2)}</b></p>
474
+ <p>Number of Users: <b>{top_project['User_Count']}</b></p>
475
+ <p>Average Hours per User: <b>{round(top_project['Total_GenAI_Hours'] / top_project['User_Count'], 2)}</b></p>
476
+ </div>
477
+ """, unsafe_allow_html=True)
478
+
479
+ with col2:
480
+ # Bar chart of top 5 projects
481
+ top_projects = project_analysis.head(5)
482
+ fig = px.bar(
483
+ top_projects,
484
+ x='Project',
485
+ y='Total_GenAI_Hours',
486
+ title='Top 5 Projects by GenAI Hours',
487
+ color='Total_GenAI_Hours',
488
+ color_continuous_scale='Viridis'
489
+ )
490
+ fig.update_layout(xaxis_title="Project", yaxis_title="Total GenAI Hours")
491
+ st.plotly_chart(fig, use_container_width=True)
492
+
493
+ # Full project analysis
494
+ st.markdown("### All Projects Analysis")
495
+ st.dataframe(project_analysis)
496
+
497
+ # 2. Most prominent use cases of AI tools
498
+ st.subheader("πŸ“Š Most Prominent AI Use Cases")
499
+
500
+ col1, col2 = st.columns(2)
501
+
502
+ with col1:
503
+ # AI Tools Analysis
504
+ st.markdown("### Top AI Tools Mentioned")
505
+
506
+ if ai_tool_counts:
507
+ # Convert to dataframe for visualization
508
+ ai_tools_df = pd.DataFrame({
509
+ 'Tool': list(ai_tool_counts.keys()),
510
+ 'Mentions': list(ai_tool_counts.values())
511
+ }).sort_values('Mentions', ascending=False)
512
+
513
+ fig = px.bar(
514
+ ai_tools_df.head(10),
515
+ x='Tool',
516
+ y='Mentions',
517
+ title='Top 10 AI Tools Mentioned',
518
+ color='Mentions',
519
+ color_continuous_scale='Blues'
520
+ )
521
+ fig.update_layout(xaxis_title="AI Tool", yaxis_title="Number of Mentions")
522
+ st.plotly_chart(fig, use_container_width=True)
523
+
524
+ # Top tool insight
525
+ if not ai_tools_df.empty:
526
+ top_tool = ai_tools_df.iloc[0]
527
+ st.markdown(f"""
528
+ <div class="highlight-box">
529
+ <p>Most used AI tool: <b>{top_tool['Tool']}</b> with {top_tool['Mentions']} mentions</p>
530
+ </div>
531
+ """, unsafe_allow_html=True)
532
+ else:
533
+ st.info("No specific AI tools were identified in the descriptions.")
534
+
535
+ with col2:
536
+ # Use Cases Analysis
537
+ st.markdown("### Top Use Cases")
538
+
539
+ if use_case_counts:
540
+ # Convert to dataframe for visualization
541
+ use_cases_df = pd.DataFrame({
542
+ 'Use Case': list(use_case_counts.keys()),
543
+ 'Count': list(use_case_counts.values())
544
+ }).sort_values('Count', ascending=False)
545
+
546
+ fig = px.pie(
547
+ use_cases_df.head(5),
548
+ names='Use Case',
549
+ values='Count',
550
+ title='Top 5 GenAI Use Cases',
551
+ hole=0.4
552
+ )
553
+ st.plotly_chart(fig, use_container_width=True)
554
+
555
+ # Top use case insight
556
+ if not use_cases_df.empty:
557
+ top_use_case = use_cases_df.iloc[0]
558
+ st.markdown(f"""
559
+ <div class="highlight-box">
560
+ <p>Most common use case: <b>{top_use_case['Use Case']}</b> mentioned in {top_use_case['Count']} descriptions</p>
561
+ </div>
562
+ """, unsafe_allow_html=True)
563
+ else:
564
+ st.info("No specific use cases were identified in the descriptions.")
565
+
566
+ # 3. Champion of the prompt with quality GenAI Description
567
+ st.subheader("πŸ‘‘ GenAI Prompt Champions")
568
+
569
+ if 'Description_Quality_Score' in result_df.columns:
570
+ # Get top 3 users by quality score
571
+ top_quality_users = result_df.sort_values('Description_Quality_Score', ascending=False).head(3)
572
+
573
+ # Display top champion
574
+ if not top_quality_users.empty:
575
+ champion = top_quality_users.iloc[0]
576
+ st.markdown(f"""
577
+ <div class="highlight-box">
578
+ <h3>οΏ½οΏ½οΏ½οΏ½ Prompt Champion: {champion['User']}</h3>
579
+ <p>Quality Score: <span class="quality-high">{champion['Description_Quality_Score']}/100</span></p>
580
+ <p>GenAI Efficiency: {round(champion['GenAI_Efficiency'], 2)} hours</p>
581
+ <p><b>GenAI Descriptions:</b></p>
582
+ <pre>{champion['GenAI_Descriptions']}</pre>
583
+ </div>
584
+ """, unsafe_allow_html=True)
585
+
586
+ # Quality score distribution
587
+ st.markdown("### Quality Score Distribution")
588
+ fig = px.histogram(
589
+ result_df,
590
+ x='Description_Quality_Score',
591
+ nbins=10,
592
+ title='Distribution of GenAI Description Quality Scores',
593
+ color_discrete_sequence=['#4CAF50']
594
+ )
595
+ fig.update_layout(xaxis_title="Quality Score", yaxis_title="Number of Users")
596
+ st.plotly_chart(fig, use_container_width=True)
597
+
598
+ # Quality score by user with team categorization
599
+ st.markdown("### Quality Scores by User & Team Categories")
600
+
601
+ # Create a more comprehensive dataframe for team identification
602
+ team_df = result_df[['User', 'Description_Quality_Score', 'GenAI_Efficiency', 'Total_Logged_Hours']].copy()
603
+
604
+ # Ensure we have numeric values for calculations
605
+ team_df['Description_Quality_Score'] = pd.to_numeric(team_df['Description_Quality_Score'], errors='coerce').fillna(0)
606
+ team_df['GenAI_Efficiency'] = pd.to_numeric(team_df['GenAI_Efficiency'], errors='coerce').fillna(0)
607
+ team_df['Total_Logged_Hours'] = pd.to_numeric(team_df['Total_Logged_Hours'], errors='coerce').fillna(0)
608
+
609
+ # Calculate a combined score (weighted average of quality and hours)
610
+ # Weight: 60% quality, 40% efficiency hours
611
+ max_quality = team_df['Description_Quality_Score'].max() if not team_df.empty and team_df['Description_Quality_Score'].max() > 0 else 100
612
+ max_hours = team_df['GenAI_Efficiency'].max() if not team_df.empty and team_df['GenAI_Efficiency'].max() > 0 else 1
613
+
614
+ team_df['Quality_Normalized'] = team_df['Description_Quality_Score'] / max_quality * 100
615
+ team_df['Hours_Normalized'] = team_df['GenAI_Efficiency'] / max_hours * 100
616
+ team_df['Combined_Score'] = (team_df['Quality_Normalized'] * 0.6) + (team_df['Hours_Normalized'] * 0.4)
617
+
618
+ # Assign team categories based on combined score and individual metrics
619
+ def assign_team_category(row):
620
+ quality = row['Description_Quality_Score']
621
+ hours = row['GenAI_Efficiency']
622
+ combined = row['Combined_Score']
623
+
624
+ if quality >= 80 and hours >= (max_hours * 0.7):
625
+ return "πŸ”₯ GenAI Champion", "Masters of both quality and quantity"
626
+ elif quality >= 70:
627
+ return "✨ Prompt Expert", "High-quality prompt crafters"
628
+ elif hours >= (max_hours * 0.8):
629
+ return "⚑ Power User", "High volume GenAI users"
630
+ elif combined >= 60:
631
+ return "🌟 Balanced Performer", "Good balance of quality and usage"
632
+ elif quality >= 50:
633
+ return "πŸ“ Quality Focused", "Focuses on quality over quantity"
634
+ elif hours > 0:
635
+ return "πŸ” Exploring User", "Beginning GenAI journey"
636
+ else:
637
+ return "❓ Inactive", "Little to no GenAI usage"
638
+
639
+ # Apply the team categorization
640
+ team_df[['Team_Category', 'Category_Description']] = team_df.apply(assign_team_category, axis=1, result_type='expand')
641
+
642
+ # Sort by combined score
643
+ team_df = team_df.sort_values('Combined_Score', ascending=False)
644
+
645
+ # Add color coding based on quality score
646
+ def quality_color(score):
647
+ if score >= 70:
648
+ return 'quality-high'
649
+ elif score >= 40:
650
+ return 'quality-medium'
651
+ else:
652
+ return 'quality-low'
653
+
654
+ team_df['Score_Display'] = team_df['Description_Quality_Score'].apply(
655
+ lambda x: f'<span class="{quality_color(x)}">{x}</span>'
656
+ )
657
+
658
+ # Create a display dataframe with the relevant columns
659
+ display_df = team_df[['User', 'Score_Display', 'GenAI_Efficiency', 'Team_Category', 'Category_Description']]
660
+ display_df.columns = ['User', 'Quality Score', 'GenAI Hours', 'Team Category', 'Description']
661
+
662
+ # Display as a styled dataframe
663
+ st.write(display_df.to_html(escape=False), unsafe_allow_html=True)
664
+
665
+ # Team distribution pie chart
666
+ st.markdown("### Team Category Distribution")
667
+ team_counts = team_df['Team_Category'].value_counts().reset_index()
668
+ team_counts.columns = ['Team_Category', 'Count']
669
+
670
+ fig = px.pie(
671
+ team_counts,
672
+ names='Team_Category',
673
+ values='Count',
674
+ title='Distribution of Team Categories',
675
+ color_discrete_sequence=px.colors.qualitative.Bold
676
+ )
677
+ st.plotly_chart(fig, use_container_width=True)
678
+
679
+ # Quality factors explanation
680
+ st.markdown("""
681
+ ### How Quality Scores Are Calculated
682
+
683
+ The quality score is based on these factors:
684
+
685
+ 1. **Length & Detail (40%)**: Longer, more detailed descriptions score higher
686
+ 2. **Specificity (30%)**: Mentions of specific AI tools, metrics, and technical details
687
+ 3. **Uniqueness (30%)**: Variety of terms and concepts used
688
+
689
+ Scores range from 0-100, with higher scores indicating more comprehensive and useful GenAI descriptions.
690
+ """)
691
+
692
+ # Data visualization section (original visualizations)
693
+ st.header("πŸ“ˆ Data Visualization")
694
 
695
  # Tab layout for visualizations
696
+ tab1, tab2, tab3, tab4 = st.tabs(["GenAI Efficiency", "Utilization", "User Analysis", "Tools & Use Cases"])
697
 
698
  with tab1:
699
  # GenAI Efficiency by User
 
751
  )
752
  st.plotly_chart(fig, use_container_width=True)
753
 
754
+ with tab4:
755
+ # Combined tools and use cases view
756
+ st.subheader("AI Tools and Use Cases")
757
+
758
+ if ai_tool_counts and use_case_counts:
759
+ col1, col2 = st.columns(2)
760
+
761
+ with col1:
762
+ # Word cloud of AI tools (text representation)
763
+ st.markdown("### AI Tools Word Cloud")
764
+ ai_tools_text = " ".join([f"{tool} " * count for tool, count in ai_tool_counts.items()])
765
+ st.text_area("", ai_tools_text, height=200)
766
+
767
+ with col2:
768
+ # Use cases bar chart
769
+ use_cases_df = pd.DataFrame({
770
+ 'Use Case': list(use_case_counts.keys()),
771
+ 'Count': list(use_case_counts.values())
772
+ }).sort_values('Count', ascending=False)
773
+
774
+ fig = px.bar(
775
+ use_cases_df,
776
+ x='Use Case',
777
+ y='Count',
778
+ title='All GenAI Use Cases',
779
+ color='Count',
780
+ color_continuous_scale='YlOrRd'
781
+ )
782
+ fig.update_layout(xaxis_title="Use Case", yaxis_title="Count")
783
+ st.plotly_chart(fig, use_container_width=True)
784
+
785
  # Summary statistics
786
  st.subheader("Summary Statistics")
787
 
 
802
  avg_util = result_df['Utilization_Percentage'].mean()
803
  st.metric("Avg Utilization %", f"{round(avg_util, 2)}%")
804
 
805
+ # New row of metrics
806
+ col1, col2, col3 = st.columns(3)
807
+
808
+ with col1:
809
+ if ai_tool_counts:
810
+ top_tool = max(ai_tool_counts.items(), key=lambda x: x[1])[0]
811
+ st.metric("Most Used AI Tool", top_tool)
812
+
813
+ with col2:
814
+ if use_case_counts:
815
+ top_use_case = max(use_case_counts.items(), key=lambda x: x[1])[0]
816
+ st.metric("Top Use Case", top_use_case)
817
+
818
+ with col3:
819
+ if 'Description_Quality_Score' in result_df.columns and not result_df.empty:
820
+ avg_quality = result_df['Description_Quality_Score'].mean()
821
+ st.metric("Avg Description Quality", f"{round(avg_quality, 1)}/100")
822
+
823
  except Exception as e:
824
  st.error(f"An error occurred: {str(e)}")
825
  st.markdown("Please check your file format and try again.")
826
 
827
  # Footer
828
  st.markdown("---")
829
+ st.markdown("**Enhanced GenAI Worklog Processor** β€’ Built with Streamlit and Pandas")