Pratik333 commited on
Commit
e43744a
Β·
verified Β·
1 Parent(s): 398a5d8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +802 -0
app.py ADDED
@@ -0,0 +1,802 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import io, base64, datetime, re
5
+ from collections import Counter
6
+ import plotly.express as px
7
+ import plotly.graph_objects as go
8
+ from plotly.subplots import make_subplots
9
+
10
+ def get_first_row_totals(df, group_column):
11
+ """Get the GenAI efficiency hours from the first row of each group"""
12
+ first_row_totals = {}
13
+ for group_value in df[group_column].unique():
14
+ group_rows = df[df[group_column] == group_value]
15
+ if not group_rows.empty:
16
+ first_row_totals[group_value] = group_rows.iloc[0]['GenAI Efficiency (Log time in hours)']
17
+ return first_row_totals
18
+
19
+ def create_unique_work_items(df):
20
+ """Create unique work identifiers to avoid double counting"""
21
+ analysis_df = df.copy()
22
+ if 'Key' in analysis_df.columns and 'Project' in analysis_df.columns:
23
+ analysis_df['UniqueWorkID'] = analysis_df.apply(lambda row: f"{row['Project']}_{row['Key']}", axis=1)
24
+ elif all(col in analysis_df.columns for col in ['Date', 'Worklog', 'User']):
25
+ analysis_df['UniqueWorkID'] = analysis_df.apply(lambda row: f"{row['Project']}_{row['Date']}_{row['Worklog']}_{row['User']}", axis=1)
26
+ return analysis_df
27
+
28
+ def calculate_champion_score(descriptions, project_data=None):
29
+ """Calculate champion score based on Tools (20%), Use-case (30%), Prompt (30%), Outcome (20%)"""
30
+ if not descriptions or not any(pd.notnull(desc) for desc in descriptions):
31
+ return 0
32
+
33
+ # Filter and join descriptions
34
+ valid_descriptions = [desc for desc in descriptions if pd.notnull(desc) and str(desc).strip()]
35
+ if not valid_descriptions:
36
+ return 0
37
+
38
+ combined_desc = "\n".join(str(desc) for desc in valid_descriptions)
39
+ combined_desc_lower = combined_desc.lower()
40
+
41
+ # Tools score (20%)
42
+ tools_score = 0
43
+ ai_tools = ['gpt', 'chatgpt', 'claude', 'gemini', 'copilot', 'dall-e', 'midjourney', 'stable diffusion',
44
+ 'hugging face', 'llama', 'mistral', 'bard', 'anthropic']
45
+ tools_mentioned = sum(1 for tool in ai_tools if re.search(r'\b' + re.escape(tool) + r'\b', combined_desc_lower))
46
+
47
+ if tools_mentioned == 1:
48
+ tools_score = 10
49
+ elif tools_mentioned >= 2:
50
+ tools_score = 15
51
+ if re.search(r'\b(gpt-4|gpt-3.5|claude-2|claude-instant|gemini pro)\b', combined_desc_lower):
52
+ tools_score += 5
53
+ tools_score = min(tools_score, 20)
54
+
55
+ # Use-case score (30%)
56
+ use_case_score = 0
57
+ use_case_keywords = {
58
+ 'code generation': ['code', 'coding', 'script', 'programming', 'develop'],
59
+ 'content creation': ['content', 'write', 'writing', 'draft', 'article'],
60
+ 'data analysis': ['data', 'analysis', 'analyze', 'metrics', 'statistics'],
61
+ 'problem solving': ['problem', 'solution', 'solve', 'issue', 'challenge'],
62
+ 'summarization': ['summary', 'summarize', 'summarization', 'extract'],
63
+ 'research': ['research', 'study', 'investigate', 'literature', 'information'],
64
+ 'automation': ['automate', 'automation', 'workflow', 'process']
65
+ }
66
+
67
+ use_cases_found = sum(1 for _, keywords in use_case_keywords.items()
68
+ if any(re.search(r'\b' + re.escape(keyword) + r'\b', combined_desc_lower) for keyword in keywords))
69
+ use_case_score += min(use_cases_found * 5, 15)
70
+
71
+ if re.search(r'\bfor\s+(a|an|the)\s+\w+', combined_desc_lower) or re.search(r'\bto\s+\w+\s+the\s+\w+', combined_desc_lower):
72
+ use_case_score += 5
73
+
74
+ domain_terms = ['frontend', 'backend', 'api', 'database', 'ui', 'ux', 'algorithm', 'component', 'feature']
75
+ if any(re.search(r'\b' + re.escape(term) + r'\b', combined_desc_lower) for term in domain_terms):
76
+ use_case_score += 5
77
+
78
+ if re.search(r'\bproject\b|\btask\b|\bticket\b|\bissue\b|\bstory\b', combined_desc_lower):
79
+ use_case_score += 5
80
+
81
+ use_case_score = min(use_case_score, 30)
82
+
83
+ # Prompt quality score (30%)
84
+ prompt_score = 0
85
+ if len(combined_desc) > 500:
86
+ prompt_score += 10
87
+ elif len(combined_desc) > 200:
88
+ prompt_score += 5
89
+
90
+ if re.search(r'".*?"|\bprompt\b|\'.*?\'|\bassist\b|\bcreate\b|\bgenerate\b', combined_desc_lower):
91
+ prompt_score += 10
92
+
93
+ prompt_techniques = ['step by step', 'chain of thought', 'few-shot', 'zero-shot', 'example']
94
+ techniques_found = sum(1 for technique in prompt_techniques
95
+ if re.search(r'\b' + re.escape(technique) + r'\b', combined_desc_lower))
96
+ prompt_score += min(techniques_found * 2, 10)
97
+ prompt_score = min(prompt_score, 30)
98
+
99
+ # Outcome/iteration score (20%)
100
+ outcome_score = 0
101
+ outcome_keywords = ['result', 'output', 'generated', 'created', 'produced', 'improved']
102
+ outcomes_found = sum(1 for keyword in outcome_keywords
103
+ if re.search(r'\b' + re.escape(keyword) + r'\b', combined_desc_lower))
104
+ outcome_score += min(outcomes_found * 2, 10)
105
+
106
+ iteration_keywords = ['iteration', 'refine', 'revise', 'update', 'modify', 'enhance', 'feedback']
107
+ iterations_found = sum(1 for keyword in iteration_keywords
108
+ if re.search(r'\b' + re.escape(keyword) + r'\b', combined_desc_lower))
109
+ outcome_score += min(iterations_found * 2, 5)
110
+
111
+ if re.search(r'\d+%|\d+\s*hours|\d+\s*minutes|reduced by|increased by', combined_desc_lower):
112
+ outcome_score += 5
113
+
114
+ outcome_score = min(outcome_score, 20)
115
+
116
+ return tools_score + use_case_score + prompt_score + outcome_score
117
+
118
+ def process_genai_data(df):
119
+ """Process GenAI data at the user level, ensuring no duplication of hours"""
120
+ # Create unique users DataFrame
121
+ unique_users = df['User'].drop_duplicates().reset_index(drop=True)
122
+ result_df = pd.DataFrame(unique_users, columns=['User'])
123
+
124
+ # Get descriptions for each user
125
+ result_df['GenAI_Descriptions'] = result_df['User'].apply(
126
+ lambda user: "\n".join(["- " + str(desc) for desc in df[df['User'] == user]['GenAI use case description'].dropna().unique()])
127
+ if len(df[df['User'] == user]['GenAI use case description'].dropna().unique()) > 0 else ""
128
+ )
129
+
130
+ # Calculate metrics using unique combinations
131
+ def get_unique_metric_sum(user, metric_col):
132
+ user_data = df[df['User'] == user].copy()
133
+
134
+ if all(col in user_data.columns for col in ['Project', 'Key']):
135
+ user_data['UniqueID'] = user_data.apply(lambda row: f"{row['Project']}_{row['Key']}", axis=1)
136
+ return user_data.drop_duplicates('UniqueID')[metric_col].sum()
137
+ elif all(col in user_data.columns for col in ['Date', 'Project', 'Worklog']):
138
+ user_data['UniqueID'] = user_data.apply(lambda row: f"{row['Project']}_{row['Date']}_{row['Worklog']}", axis=1)
139
+ return user_data.drop_duplicates('UniqueID')[metric_col].sum()
140
+ return user_data[metric_col].sum()
141
+
142
+ result_df['GenAI_Efficiency'] = result_df['User'].apply(lambda user: get_unique_metric_sum(user, 'GenAI Efficiency (Log time in hours)'))
143
+
144
+ if 'Logged' in df.columns:
145
+ result_df['Total_Logged_Hours'] = result_df['User'].apply(lambda user: get_unique_metric_sum(user, 'Logged'))
146
+ if 'Required' in df.columns:
147
+ result_df['Total_Required_Hours'] = result_df['User'].apply(lambda user: get_unique_metric_sum(user, 'Required'))
148
+
149
+ # Calculate utilization percentage
150
+ if 'Total_Logged_Hours' in result_df.columns and 'Total_Required_Hours' in result_df.columns:
151
+ result_df['Utilization_Percentage'] = (result_df['Total_Logged_Hours'] / result_df['Total_Required_Hours'] * 100).round(2)
152
+
153
+ # Get date range for each user
154
+ if 'Date' in df.columns:
155
+ result_df['Date_Range'] = result_df['User'].apply(
156
+ lambda user: f"{min(dates)} to {max(dates)}" if
157
+ len(dates := df[df['User'] == user]['Date'].dropna()) > 0 else "N/A"
158
+ )
159
+
160
+ # Add champion score for each user
161
+ result_df['Description_Quality_Score'] = result_df['GenAI_Descriptions'].apply(
162
+ lambda desc: calculate_champion_score([desc]) if isinstance(desc, str) and desc.strip() else 0
163
+ )
164
+
165
+ # Get project and category data if available
166
+ if 'Project' in df.columns:
167
+ result_df['Projects'] = result_df['User'].apply(
168
+ lambda user: list(df[df['User'] == user]['Project'].dropna().unique())
169
+ )
170
+
171
+ if 'Project Category' in df.columns:
172
+ result_df['Project_Categories'] = result_df['User'].apply(
173
+ lambda user: list(df[df['User'] == user]['Project Category'].dropna().unique())
174
+ )
175
+
176
+ return result_df
177
+
178
+ def analyze_projects_by_genai_hours(df, exclude_qed42_global=False):
179
+ """Analyzes projects by GenAI hours with quality metrics"""
180
+ if 'Project' not in df.columns:
181
+ return None
182
+
183
+ # Get first row totals for each project
184
+ project_totals = get_first_row_totals(df, 'Project')
185
+
186
+ # Calculate project data using unique work items
187
+ analysis_df = create_unique_work_items(df)
188
+
189
+ # Filter out QED42 Global projects if requested
190
+ if exclude_qed42_global:
191
+ analysis_df = analysis_df[~analysis_df['Project'].str.contains('QED42 Global', case=False, na=False)]
192
+ project_totals = {k: v for k, v in project_totals.items() if 'qed42 global' not in k.lower()}
193
+
194
+ projects_data = []
195
+ for project in analysis_df['Project'].unique():
196
+ if project in project_totals:
197
+ total_hours = project_totals[project]
198
+ user_count = len(analysis_df[analysis_df['Project'] == project]['User'].unique())
199
+
200
+ # Get project category if available
201
+ project_category = 'Unknown'
202
+ if 'Project Category' in analysis_df.columns:
203
+ project_category_series = analysis_df[analysis_df['Project'] == project]['Project Category'].dropna()
204
+ if not project_category_series.empty:
205
+ project_category = project_category_series.iloc[0]
206
+
207
+ # Get best description for this project
208
+ project_descriptions = analysis_df[analysis_df['Project'] == project]['GenAI use case description'].dropna().tolist()
209
+ best_description = max(project_descriptions, key=lambda x: len(str(x))) if project_descriptions else ""
210
+ champion_score = calculate_champion_score(project_descriptions)
211
+
212
+ projects_data.append({
213
+ 'Project': project,
214
+ 'Total_GenAI_Hours': total_hours,
215
+ 'User_Count': user_count,
216
+ 'Project Category': project_category,
217
+ 'Best_Description': best_description,
218
+ 'Champion_Score': champion_score
219
+ })
220
+
221
+ # Create DataFrame from projects data
222
+ project_hours = pd.DataFrame(projects_data) if projects_data else pd.DataFrame()
223
+
224
+ # Add combined scores
225
+ if not project_hours.empty:
226
+ max_hours = project_hours['Total_GenAI_Hours'].max() or 1
227
+ max_quality = project_hours['Champion_Score'].max() or 1
228
+
229
+ project_hours['Hours_Score'] = (project_hours['Total_GenAI_Hours'] / max_hours) * 100
230
+ project_hours['Quality_Score_Normalized'] = (project_hours['Champion_Score'] / max_quality) * 100
231
+ project_hours['Combined_Score'] = (project_hours['Hours_Score'] * 0.6) + (project_hours['Quality_Score_Normalized'] * 0.4)
232
+
233
+ project_hours = project_hours.sort_values('Combined_Score', ascending=False)
234
+
235
+ return project_hours
236
+
237
+ def extract_ai_tools_from_descriptions(df):
238
+ """Extracts AI tools mentioned in descriptions"""
239
+ ai_tools = [
240
+ 'chatgpt', 'gpt-4', 'gpt-3', 'gpt', 'openai', 'claude', 'anthropic',
241
+ 'gemini', 'bard', 'google ai', 'copilot', 'github copilot', 'microsoft copilot',
242
+ 'dall-e', 'midjourney', 'stable diffusion', 'hugging face', 'transformers',
243
+ 'bert', 'llama', 'mistral', 'tensorflow', 'pytorch', 'ml',
244
+ 'jupyter', 'colab', 'langchain', 'llm', 'rag'
245
+ ]
246
+
247
+ tool_mapping = {
248
+ 'gpt': 'ChatGPT/GPT', 'gpt-3': 'ChatGPT/GPT', 'gpt-4': 'ChatGPT/GPT', 'chatgpt': 'ChatGPT/GPT',
249
+ 'openai': 'OpenAI', 'claude': 'Claude', 'anthropic': 'Claude',
250
+ 'gemini': 'Google AI', 'bard': 'Google AI', 'google ai': 'Google AI',
251
+ 'copilot': 'GitHub Copilot', 'github copilot': 'GitHub Copilot'
252
+ }
253
+
254
+ all_descriptions = df['GenAI use case description'].dropna()
255
+ if all_descriptions.empty:
256
+ return Counter()
257
+
258
+ all_descriptions_text = " ".join(all_descriptions.astype(str)).lower()
259
+
260
+ tool_counts = Counter()
261
+ for tool in ai_tools:
262
+ count = len(re.findall(r'\b' + re.escape(tool) + r'\b', all_descriptions_text))
263
+ if count > 0:
264
+ normalized_tool = tool_mapping.get(tool, tool)
265
+ tool_counts[normalized_tool] += count
266
+
267
+ return tool_counts
268
+
269
+ def extract_use_cases_from_descriptions(df):
270
+ """Analyzes use cases in GenAI descriptions"""
271
+ use_case_keywords = {
272
+ 'Code Generation': ['code', 'coding', 'programming', 'script', 'develop', 'algorithm'],
273
+ 'Content Creation': ['content', 'write', 'writing', 'draft', 'article', 'blog'],
274
+ 'Data Analysis': ['data', 'analysis', 'analyze', 'analytics', 'statistics', 'insights'],
275
+ 'Documentation': ['document', 'documentation', 'manual', 'guide', 'readme'],
276
+ 'Research': ['research', 'study', 'investigate', 'explore', 'literature'],
277
+ 'Summarization': ['summary', 'summarize', 'summarization', 'extract'],
278
+ 'Translation': ['translate', 'translation', 'language', 'localize']
279
+ }
280
+
281
+ descriptions = df['GenAI use case description'].dropna()
282
+ if descriptions.empty:
283
+ return Counter()
284
+
285
+ descriptions_list = descriptions.astype(str).tolist()
286
+
287
+ use_case_counts = Counter()
288
+ for description in descriptions_list:
289
+ description_lower = description.lower()
290
+ for use_case, keywords in use_case_keywords.items():
291
+ if any(re.search(r'\b' + re.escape(keyword) + r'\b', description_lower) for keyword in keywords):
292
+ use_case_counts[use_case] += 1
293
+
294
+ return use_case_counts
295
+
296
+ def create_download_excel(df):
297
+ """Create Excel file for download"""
298
+ output = io.BytesIO()
299
+ with pd.ExcelWriter(output, engine='openpyxl') as writer:
300
+ df.to_excel(writer, index=False, sheet_name='Processed Data')
301
+
302
+ # Add summary sheet
303
+ if not df.empty:
304
+ summary = pd.DataFrame({
305
+ 'Metric': ['Total Users', 'Average GenAI Efficiency (hours)', 'Average Utilization (%)',
306
+ 'Top GenAI User', 'Top Quality Score'],
307
+ 'Value': [
308
+ len(df),
309
+ round(df['GenAI_Efficiency'].mean(), 2) if 'GenAI_Efficiency' in df.columns else 'N/A',
310
+ round(df['Utilization_Percentage'].mean(), 2) if 'Utilization_Percentage' in df.columns else 'N/A',
311
+ df.loc[df['GenAI_Efficiency'].idxmax(), 'User'] if 'GenAI_Efficiency' in df.columns and not df['GenAI_Efficiency'].isna().all() else 'N/A',
312
+ df.loc[df['Description_Quality_Score'].idxmax(), 'User'] if 'Description_Quality_Score' in df.columns and not df['Description_Quality_Score'].isna().all() else 'N/A'
313
+ ]
314
+ })
315
+ summary.to_excel(writer, index=False, sheet_name='Summary')
316
+
317
+ return output.getvalue()
318
+
319
+ def create_visualizations(result_df, project_analysis, ai_tool_counts, use_case_counts):
320
+ """Create visualization plots"""
321
+ plots = []
322
+
323
+ # 1. GenAI Efficiency by User
324
+ if 'GenAI_Efficiency' in result_df.columns and not result_df.empty:
325
+ sorted_df = result_df.sort_values('GenAI_Efficiency', ascending=False).head(10)
326
+ fig1 = px.bar(
327
+ sorted_df,
328
+ x='User',
329
+ y='GenAI_Efficiency',
330
+ title='Top 10 Users by GenAI Efficiency Hours',
331
+ color='GenAI_Efficiency',
332
+ color_continuous_scale='Viridis'
333
+ )
334
+ fig1.update_layout(xaxis_tickangle=-45)
335
+ plots.append(fig1)
336
+
337
+ # 2. Project Analysis
338
+ if project_analysis is not None and not project_analysis.empty:
339
+ top_projects = project_analysis.head(8)
340
+ fig2 = px.bar(
341
+ top_projects,
342
+ x='Project',
343
+ y='Total_GenAI_Hours',
344
+ title='Top Projects by GenAI Hours',
345
+ color='Champion_Score',
346
+ color_continuous_scale='RdYlGn'
347
+ )
348
+ fig2.update_layout(xaxis_tickangle=-45)
349
+ plots.append(fig2)
350
+
351
+ # 3. AI Tools Usage
352
+ if ai_tool_counts:
353
+ ai_tools_df = pd.DataFrame({
354
+ 'Tool': list(ai_tool_counts.keys()),
355
+ 'Mentions': list(ai_tool_counts.values())
356
+ }).sort_values('Mentions', ascending=False).head(8)
357
+
358
+ fig3 = px.bar(
359
+ ai_tools_df,
360
+ x='Tool',
361
+ y='Mentions',
362
+ title='Most Mentioned AI Tools',
363
+ color='Mentions',
364
+ color_continuous_scale='Blues'
365
+ )
366
+ plots.append(fig3)
367
+
368
+ # 4. Use Cases Distribution
369
+ if use_case_counts:
370
+ use_cases_df = pd.DataFrame({
371
+ 'Use Case': list(use_case_counts.keys()),
372
+ 'Count': list(use_case_counts.values())
373
+ }).sort_values('Count', ascending=False)
374
+
375
+ fig4 = px.pie(
376
+ use_cases_df,
377
+ names='Use Case',
378
+ values='Count',
379
+ title='GenAI Use Cases Distribution'
380
+ )
381
+ plots.append(fig4)
382
+
383
+ # 5. Quality Score Distribution
384
+ if 'Description_Quality_Score' in result_df.columns and not result_df.empty:
385
+ fig5 = px.histogram(
386
+ result_df,
387
+ x='Description_Quality_Score',
388
+ title='Distribution of Champion Scores',
389
+ nbins=20,
390
+ color_discrete_sequence=['#2E86AB']
391
+ )
392
+ plots.append(fig5)
393
+
394
+ # 6. Utilization Analysis
395
+ if 'Utilization_Percentage' in result_df.columns and not result_df.empty:
396
+ sorted_util = result_df.sort_values('Utilization_Percentage', ascending=False).head(10)
397
+ fig6 = px.bar(
398
+ sorted_util,
399
+ x='User',
400
+ y='Utilization_Percentage',
401
+ title='Top 10 Users by Utilization Percentage',
402
+ color='Utilization_Percentage',
403
+ color_continuous_scale='RdYlGn'
404
+ )
405
+ fig6.update_layout(xaxis_tickangle=-45)
406
+ plots.append(fig6)
407
+
408
+ return plots
409
+
410
+ def process_file(file):
411
+ """Main processing function for Gradio"""
412
+ if file is None:
413
+ return None, "Please upload a file", None, None, None, None, None, None
414
+
415
+ try:
416
+ # Read the file
417
+ if file.name.endswith('.csv'):
418
+ df = pd.read_csv(file.name)
419
+ else:
420
+ df = pd.read_excel(file.name)
421
+
422
+ # Check required columns
423
+ required_columns = ['User', 'GenAI use case description', 'GenAI Efficiency (Log time in hours)']
424
+ missing_columns = [col for col in required_columns if col not in df.columns]
425
+
426
+ if missing_columns:
427
+ return None, f"Missing required columns: {', '.join(missing_columns)}", None, None, None, None, None, None
428
+
429
+ # Process the data
430
+ result_df = process_genai_data(df)
431
+ project_analysis = analyze_projects_by_genai_hours(df)
432
+ ai_tool_counts = extract_ai_tools_from_descriptions(df)
433
+ use_case_counts = extract_use_cases_from_descriptions(df)
434
+
435
+ # Create Excel download
436
+ excel_data = create_download_excel(result_df)
437
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
438
+ excel_filename = f"genai_processed_data_{timestamp}.xlsx"
439
+
440
+ # Save Excel file temporarily
441
+ with open(excel_filename, 'wb') as f:
442
+ f.write(excel_data)
443
+
444
+ # Create visualizations
445
+ plots = create_visualizations(result_df, project_analysis, ai_tool_counts, use_case_counts)
446
+
447
+ # Create summary statistics
448
+ summary_stats = create_summary_stats(result_df, project_analysis, ai_tool_counts, use_case_counts)
449
+
450
+ # Create insights text
451
+ insights = create_insights_text(result_df, project_analysis, ai_tool_counts, use_case_counts)
452
+
453
+ return (
454
+ result_df,
455
+ "Processing completed successfully!",
456
+ excel_filename,
457
+ summary_stats,
458
+ insights,
459
+ plots[0] if len(plots) > 0 else None,
460
+ plots[1] if len(plots) > 1 else None,
461
+ plots[2:] if len(plots) > 2 else []
462
+ )
463
+
464
+ except Exception as e:
465
+ return None, f"Error processing file: {str(e)}", None, None, None, None, None, None
466
+
467
+ def create_summary_stats(result_df, project_analysis, ai_tool_counts, use_case_counts):
468
+ """Create summary statistics"""
469
+ if result_df is None or result_df.empty:
470
+ return "No data to analyze"
471
+
472
+ stats = []
473
+ stats.append(f"**πŸ“Š Summary Statistics**")
474
+ stats.append(f"β€’ Total Users: {len(result_df)}")
475
+
476
+ if 'GenAI_Efficiency' in result_df.columns:
477
+ avg_efficiency = result_df['GenAI_Efficiency'].mean()
478
+ total_efficiency = result_df['GenAI_Efficiency'].sum()
479
+ stats.append(f"β€’ Total GenAI Hours: {round(total_efficiency, 2)}")
480
+ stats.append(f"β€’ Average GenAI Efficiency: {round(avg_efficiency, 2)} hours")
481
+
482
+ if 'Utilization_Percentage' in result_df.columns:
483
+ avg_util = result_df['Utilization_Percentage'].mean()
484
+ stats.append(f"β€’ Average Utilization: {round(avg_util, 2)}%")
485
+
486
+ if 'Description_Quality_Score' in result_df.columns:
487
+ avg_quality = result_df['Description_Quality_Score'].mean()
488
+ stats.append(f"β€’ Average Champion Score: {round(avg_quality, 1)}/100")
489
+
490
+ if ai_tool_counts:
491
+ top_tool = max(ai_tool_counts.items(), key=lambda x: x[1])[0]
492
+ stats.append(f"β€’ Most Used AI Tool: {top_tool}")
493
+
494
+ if use_case_counts:
495
+ top_use_case = max(use_case_counts.items(), key=lambda x: x[1])[0]
496
+ stats.append(f"β€’ Top Use Case: {top_use_case}")
497
+
498
+ if project_analysis is not None and not project_analysis.empty:
499
+ top_project = project_analysis.iloc[0]
500
+ stats.append(f"β€’ Top Project: {top_project['Project']} ({round(top_project['Total_GenAI_Hours'], 2)} hours)")
501
+
502
+ return "\n".join(stats)
503
+
504
+ def create_insights_text(result_df, project_analysis, ai_tool_counts, use_case_counts):
505
+ """Create insights text"""
506
+ if result_df is None or result_df.empty:
507
+ return "No insights available"
508
+
509
+ insights = []
510
+ insights.append("**πŸ” Key Insights**")
511
+
512
+ # Champion user
513
+ if 'GenAI_Efficiency' in result_df.columns and 'Description_Quality_Score' in result_df.columns:
514
+ # Calculate combined score for users
515
+ max_hours = result_df['GenAI_Efficiency'].max() or 1
516
+ max_quality = result_df['Description_Quality_Score'].max() or 1
517
+
518
+ result_df['Hours_Score'] = (result_df['GenAI_Efficiency'] / max_hours) * 100
519
+ result_df['Quality_Score_Normalized'] = (result_df['Description_Quality_Score'] / max_quality) * 100
520
+ result_df['Combined_Score'] = (result_df['Hours_Score'] * 0.6) + (result_df['Quality_Score_Normalized'] * 0.4)
521
+
522
+ champion_user = result_df.loc[result_df['Combined_Score'].idxmax()]
523
+ insights.append(f"πŸ† **Champion User:** {champion_user['User']}")
524
+ insights.append(f" - GenAI Hours: {round(champion_user['GenAI_Efficiency'], 2)}")
525
+ insights.append(f" - Champion Score: {round(champion_user['Description_Quality_Score'], 1)}/100")
526
+ insights.append("")
527
+
528
+ # Project insights
529
+ if project_analysis is not None and not project_analysis.empty:
530
+ top_project = project_analysis.iloc[0]
531
+ insights.append(f"πŸš€ **Top Project:** {top_project['Project']}")
532
+ insights.append(f" - Total Hours: {round(top_project['Total_GenAI_Hours'], 2)}")
533
+ insights.append(f" - Users Involved: {top_project['User_Count']}")
534
+ if 'Champion_Score' in top_project:
535
+ insights.append(f" - Champion Score: {round(top_project['Champion_Score'], 1)}/100")
536
+ insights.append("")
537
+
538
+ # Usage patterns
539
+ if 'GenAI_Efficiency' in result_df.columns:
540
+ active_users = len(result_df[result_df['GenAI_Efficiency'] > 0])
541
+ usage_rate = (active_users / len(result_df)) * 100
542
+ insights.append(f"πŸ“ˆ **Usage Analysis:**")
543
+ insights.append(f" - Users with GenAI activity: {active_users}/{len(result_df)} ({round(usage_rate, 1)}%)")
544
+
545
+ if active_users > 0:
546
+ high_users = len(result_df[result_df['GenAI_Efficiency'] >= 10])
547
+ insights.append(f" - High-usage users (β‰₯10 hours): {high_users}")
548
+ insights.append("")
549
+
550
+ # Tool and use case insights
551
+ if ai_tool_counts and use_case_counts:
552
+ insights.append("πŸ› οΈ **Technology Adoption:**")
553
+ top_3_tools = dict(sorted(ai_tool_counts.items(), key=lambda x: x[1], reverse=True)[:3])
554
+ for tool, count in top_3_tools.items():
555
+ insights.append(f" - {tool}: {count} mentions")
556
+
557
+ insights.append("")
558
+ insights.append("πŸ’‘ **Primary Use Cases:**")
559
+ top_3_cases = dict(sorted(use_case_counts.items(), key=lambda x: x[1], reverse=True)[:3])
560
+ for case, count in top_3_cases.items():
561
+ insights.append(f" - {case}: {count} instances")
562
+
563
+ return "\n".join(insights)
564
+
565
+ # Create Gradio interface
566
+ def create_gradio_app():
567
+ with gr.Blocks(title="GenAI Worklog Processor", theme=gr.themes.Soft()) as app:
568
+ gr.Markdown("""
569
+ # πŸ€– GenAI Worklog Data Processor v1.1
570
+
571
+ This application processes worklog data to extract insights about GenAI usage:
572
+
573
+ βœ… Creates a list of unique users
574
+ βœ… Concatenates GenAI use case descriptions for each user
575
+ βœ… Captures GenAI efficiency values and metrics
576
+ βœ… Identifies projects with highest GenAI usage
577
+ βœ… Analyzes AI tools and use cases
578
+ βœ… Identifies prompt champions based on quality metrics
579
+
580
+ **Required columns:** User, GenAI use case description, GenAI Efficiency (Log time in hours)
581
+ **Optional columns:** Required, Logged, Date, Project, Project Category, Epic, Key
582
+ """)
583
+
584
+ with gr.Row():
585
+ with gr.Column(scale=1):
586
+ file_input = gr.File(
587
+ label="πŸ“ Upload CSV or Excel File",
588
+ file_types=[".csv", ".xlsx", ".xls"],
589
+ type="filepath"
590
+ )
591
+ process_btn = gr.Button("πŸš€ Process Data", variant="primary", size="lg")
592
+
593
+ with gr.Column(scale=1):
594
+ status_output = gr.Textbox(
595
+ label="πŸ“‹ Processing Status",
596
+ interactive=False,
597
+ lines=3
598
+ )
599
+
600
+ with gr.Tabs():
601
+ with gr.TabItem("πŸ“Š Processed Data"):
602
+ processed_data = gr.Dataframe(
603
+ label="Processed Results",
604
+ interactive=False,
605
+ wrap=True
606
+ )
607
+ download_file = gr.File(
608
+ label="πŸ’Ύ Download Excel Report",
609
+ interactive=False
610
+ )
611
+
612
+ with gr.TabItem("πŸ“ˆ Summary & Insights"):
613
+ with gr.Row():
614
+ with gr.Column():
615
+ summary_stats = gr.Markdown(label="Summary Statistics")
616
+ with gr.Column():
617
+ insights_text = gr.Markdown(label="Key Insights")
618
+
619
+ with gr.TabItem("πŸ“Š Visualizations"):
620
+ with gr.Row():
621
+ plot1 = gr.Plot(label="GenAI Efficiency by User")
622
+ plot2 = gr.Plot(label="Project Analysis")
623
+
624
+ with gr.Row():
625
+ plot3 = gr.Plot(label="AI Tools Usage")
626
+ plot4 = gr.Plot(label="Use Cases Distribution")
627
+
628
+ with gr.Row():
629
+ plot5 = gr.Plot(label="Quality Score Distribution")
630
+ plot6 = gr.Plot(label="Utilization Analysis")
631
+
632
+ with gr.TabItem("ℹ️ How Champion Scores Work"):
633
+ gr.Markdown("""
634
+ ## πŸ† Champion Score Calculation
635
+
636
+ The Champion Score evaluates the quality and comprehensiveness of GenAI usage descriptions on a scale of 0-100:
637
+
638
+ ### πŸ› οΈ Tools (20 points)
639
+ - **Basic mention** (10 pts): References one AI tool (GPT, Claude, etc.)
640
+ - **Multiple tools** (15 pts): Uses 2+ different AI tools
641
+ - **Specific versions** (+5 pts): Mentions specific models (GPT-4, Claude-2, etc.)
642
+
643
+ ### πŸ’‘ Use Case (30 points)
644
+ - **Category identification** (5 pts each): Code generation, content creation, data analysis, etc.
645
+ - **Context specificity** (+5 pts): Clear "for/to" statements showing purpose
646
+ - **Domain expertise** (+5 pts): Technical terms (API, database, algorithm, etc.)
647
+ - **Work integration** (+5 pts): References projects, tasks, tickets, stories
648
+
649
+ ### ✍️ Prompt Quality (30 points)
650
+ - **Length bonus**: 200+ chars (5 pts), 500+ chars (10 pts)
651
+ - **Prompt indicators** (10 pts): Quotes, mentions "prompt", "assist", "create", "generate"
652
+ - **Advanced techniques** (2 pts each): Step-by-step, chain of thought, few-shot, examples
653
+
654
+ ### 🎯 Outcomes & Iteration (20 points)
655
+ - **Results mentioned** (2 pts each): "result", "output", "generated", "created", "improved"
656
+ - **Iteration indicators** (2 pts each): "refine", "revise", "update", "feedback"
657
+ - **Quantified impact** (+5 pts): Percentages, time saved, metrics
658
+
659
+ ### πŸ… Score Interpretation
660
+ - **πŸ₯‡ 90-100**: Exceptional - Comprehensive usage with advanced techniques
661
+ - **πŸ₯ˆ 70-89**: Strong - Good tool usage with clear outcomes
662
+ - **πŸ₯‰ 50-69**: Moderate - Basic usage with some detail
663
+ - **πŸ“ 30-49**: Basic - Simple usage descriptions
664
+ - **⚠️ 0-29**: Minimal - Very basic or unclear usage
665
+
666
+ Higher scores indicate more sophisticated and effective GenAI adoption!
667
+ """)
668
+
669
+ # Event handlers
670
+ def process_and_update(file):
671
+ if file is None:
672
+ return (
673
+ None, "Please upload a file first", None,
674
+ "No data to display", "No insights available",
675
+ None, None, None, None, None, None
676
+ )
677
+
678
+ try:
679
+ # Read the file
680
+ if file.endswith('.csv'):
681
+ df = pd.read_csv(file)
682
+ else:
683
+ df = pd.read_excel(file)
684
+
685
+ # Check required columns
686
+ required_columns = ['User', 'GenAI use case description', 'GenAI Efficiency (Log time in hours)']
687
+ missing_columns = [col for col in required_columns if col not in df.columns]
688
+
689
+ if missing_columns:
690
+ return (
691
+ None, f"❌ Missing required columns: {', '.join(missing_columns)}", None,
692
+ "Cannot process data", "Missing required columns",
693
+ None, None, None, None, None, None
694
+ )
695
+
696
+ # Process the data
697
+ result_df = process_genai_data(df)
698
+ project_analysis = analyze_projects_by_genai_hours(df)
699
+ ai_tool_counts = extract_ai_tools_from_descriptions(df)
700
+ use_case_counts = extract_use_cases_from_descriptions(df)
701
+
702
+ # Create Excel download
703
+ excel_data = create_download_excel(result_df)
704
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
705
+ excel_filename = f"genai_processed_data_{timestamp}.xlsx"
706
+
707
+ # Save Excel file temporarily
708
+ with open(excel_filename, 'wb') as f:
709
+ f.write(excel_data)
710
+
711
+ # Create visualizations
712
+ plots = create_visualizations(result_df, project_analysis, ai_tool_counts, use_case_counts)
713
+
714
+ # Create summary statistics and insights
715
+ summary_stats = create_summary_stats(result_df, project_analysis, ai_tool_counts, use_case_counts)
716
+ insights = create_insights_text(result_df, project_analysis, ai_tool_counts, use_case_counts)
717
+
718
+ return (
719
+ result_df,
720
+ "βœ… Processing completed successfully!",
721
+ excel_filename,
722
+ summary_stats,
723
+ insights,
724
+ plots[0] if len(plots) > 0 else None,
725
+ plots[1] if len(plots) > 1 else None,
726
+ plots[2] if len(plots) > 2 else None,
727
+ plots[3] if len(plots) > 3 else None,
728
+ plots[4] if len(plots) > 4 else None,
729
+ plots[5] if len(plots) > 5 else None
730
+ )
731
+
732
+ except Exception as e:
733
+ error_msg = f"❌ Error processing file: {str(e)}"
734
+ return (
735
+ None, error_msg, None,
736
+ "Error occurred", error_msg,
737
+ None, None, None, None, None, None
738
+ )
739
+
740
+ process_btn.click(
741
+ fn=process_and_update,
742
+ inputs=[file_input],
743
+ outputs=[
744
+ processed_data, status_output, download_file,
745
+ summary_stats, insights_text,
746
+ plot1, plot2, plot3, plot4, plot5, plot6
747
+ ]
748
+ )
749
+
750
+ # Add examples
751
+ gr.Examples(
752
+ examples=[
753
+ ["sample_worklog.csv"],
754
+ ],
755
+ inputs=file_input,
756
+ label="πŸ“‹ Example Files (if available)"
757
+ )
758
+
759
+ gr.Markdown("""
760
+ ---
761
+ **Enhanced GenAI Worklog Processor** β€’ Built with Gradio and Pandas
762
+
763
+ πŸ’‘ **Tips for best results:**
764
+ - Ensure your CSV/Excel file has the required columns
765
+ - GenAI descriptions should be detailed for better Champion Scores
766
+ - Include project information for comprehensive analysis
767
+ """)
768
+
769
+ return app
770
+
771
+ # Helper function to assign team categories (referenced in original code)
772
+ def assign_team_category(row, max_quality, max_hours):
773
+ """Assign team category based on usage patterns"""
774
+ quality_score = row['Champion_Score']
775
+ hours = row['GenAI_Efficiency']
776
+
777
+ # Normalize scores
778
+ quality_norm = (quality_score / max_quality) * 100 if max_quality > 0 else 0
779
+ hours_norm = (hours / max_hours) * 100 if max_hours > 0 else 0
780
+
781
+ if quality_norm >= 70 and hours_norm >= 50:
782
+ return "πŸš€ Power Users", "High quality usage with significant hours"
783
+ elif quality_norm >= 70:
784
+ return "🎯 Quality Champions", "Excellent usage quality, moderate hours"
785
+ elif hours_norm >= 70:
786
+ return "⚑ High Volume", "Heavy usage, opportunity for quality improvement"
787
+ elif quality_norm >= 40 or hours_norm >= 30:
788
+ return "πŸ“ˆ Growing Users", "Developing GenAI skills and usage"
789
+ elif hours > 0:
790
+ return "🌱 Beginners", "Starting GenAI journey"
791
+ else:
792
+ return "πŸ’€ Inactive", "No recorded GenAI usage"
793
+
794
+ # Launch the app
795
+ if __name__ == "__main__":
796
+ app = create_gradio_app()
797
+ app.launch(
798
+ share=True,
799
+ server_name="0.0.0.0",
800
+ server_port=7860,
801
+ show_error=True
802
+ )