rohangbs commited on
Commit
cc8c18c
·
verified ·
1 Parent(s): f9f07b6

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +541 -0
app.py ADDED
@@ -0,0 +1,541 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import plotly.express as px
4
+ from groq import Groq
5
+ import json
6
+ import time
7
+ import re
8
+ from concurrent.futures import ThreadPoolExecutor
9
+ from io import StringIO
10
+
11
+ class CustomConversationIntentClassifier:
12
+ def __init__(self):
13
+ # Define hierarchical intent categories and their patterns
14
+ if 'custom_intents' not in st.session_state:
15
+ self.intent_hierarchy = {
16
+ "A. Communication & Response Intent": {
17
+ "Information-Seeking": [
18
+ r"what", r"how", r"why", r"when", r"where", r"who",
19
+ r"want to know", r"tell me about", r"can you explain"
20
+ ],
21
+ "Clarification": [
22
+ r"explain", r"clarify", r"what do you mean", r"repeat",
23
+ r"didn't understand", r"could you elaborate"
24
+ ],
25
+ "Agreement": [
26
+ r"yes", r"agree", r"makes sense", r"exactly",
27
+ r"that's right", r"correct"
28
+ ],
29
+ "Disagreement": [
30
+ r"no", r"don't agree", r"incorrect", r"that's wrong",
31
+ r"i disagree", r"not correct"
32
+ ],
33
+ "Acknowledgment": [
34
+ r"got it", r"i see", r"understood", r"noted",
35
+ r"alright", r"okay"
36
+ ],
37
+ "Apology": [
38
+ r"sorry", r"apologize", r"my mistake", r"my fault",
39
+ r"i apologize", r"regret"
40
+ ],
41
+ "Appreciation": [
42
+ r"thank you", r"thanks", r"appreciate", r"grateful",
43
+ r"thank you for your help"
44
+ ],
45
+ "Urgency": [
46
+ r"asap", r"urgent", r"immediately", r"right away",
47
+ r"emergency", r"as soon as possible"
48
+ ]
49
+ },
50
+ "B. Decision-Making Intent": {
51
+ "Exploration": [
52
+ r"consider", r"explore", r"what if", r"options",
53
+ r"alternatives", r"possibilities"
54
+ ],
55
+ "Commitment": [
56
+ r"decided", r"will do", r"i've made my decision",
57
+ r"going to", r"i will", r"definitely"
58
+ ],
59
+ "Indecision": [
60
+ r"not sure", r"unsure", r"undecided", r"can't decide",
61
+ r"torn between", r"haven't decided"
62
+ ],
63
+ "Delegation": [
64
+ r"can you handle", r"take care of", r"assign",
65
+ r"please handle", r"can you manage"
66
+ ],
67
+ "Evaluation": [
68
+ r"compare", r"evaluate", r"assess", r"weigh",
69
+ r"pros and cons", r"better option"
70
+ ]
71
+ },
72
+ "C. Emotional & Psychological Intent": {
73
+ "Seeking Validation": [
74
+ r"am i right", r"is this correct", r"does this make sense",
75
+ r"what do you think", r"how did i do"
76
+ ],
77
+ "Seeking Support": [
78
+ r"need help", r"support", r"assist", r"guide",
79
+ r"can you help", r"struggling with"
80
+ ],
81
+ "Expressing Frustration": [
82
+ r"annoying", r"frustrated", r"irritating", r"fed up",
83
+ r"this is difficult", r"getting nowhere"
84
+ ],
85
+ "Venting": [
86
+ r"just need to", r"off my chest", r"let me tell you",
87
+ r"you won't believe", r"so tired of"
88
+ ],
89
+ "Seeking Comfort": [
90
+ r"feeling down", r"upset", r"worried", r"anxious",
91
+ r"stressed", r"not feeling great"
92
+ ]
93
+ },
94
+ "D. Social & Relationship Intent": {
95
+ "Social Bonding": [
96
+ r"coffee", r"lunch", r"catch up", r"get together",
97
+ r"hang out", r"meet up"
98
+ ],
99
+ "Networking": [
100
+ r"connect", r"network", r"introduction", r"link up",
101
+ r"get in touch", r"reach out"
102
+ ],
103
+ "Collaboration": [
104
+ r"work together", r"collaborate", r"team up",
105
+ r"join forces", r"partner"
106
+ ],
107
+ "Teaching": [
108
+ r"let me show", r"teach", r"explain how",
109
+ r"guide you through", r"help you understand"
110
+ ],
111
+ "Testing Boundaries": [
112
+ r"be honest", r"frank", r"between us",
113
+ r"confidential", r"keep this private"
114
+ ]
115
+ },
116
+ "E. Action-Oriented Intent": {
117
+ "Requesting Action": [
118
+ r"can you", r"please", r"would you", r"need you to",
119
+ r"send", r"do this"
120
+ ],
121
+ "Offering Help": [
122
+ r"can i help", r"let me help", r"assistance",
123
+ r"i can do", r"happy to help"
124
+ ],
125
+ "Providing Feedback": [
126
+ r"feedback", r"suggestion", r"think about",
127
+ r"my opinion", r"recommend"
128
+ ],
129
+ "Expressing Intent to Quit": [
130
+ r"quit", r"give up", r"stop", r"abandon",
131
+ r"no longer want", r"discontinue"
132
+ ],
133
+ "Confirming Action": [
134
+ r"is this done", r"completed", r"finished",
135
+ r"status", r"update"
136
+ ]
137
+ }
138
+ }
139
+
140
+ st.session_state['custom_intents'] = self.intent_hierarchy
141
+ else:
142
+ self.intent_hierarchy = st.session_state['custom_intents']
143
+
144
+ def add_intent_category(self, main_category, subcategory, patterns):
145
+ if main_category not in self.intent_hierarchy:
146
+ self.intent_hierarchy[main_category] = {}
147
+
148
+ self.intent_hierarchy[main_category][subcategory] = patterns
149
+ st.session_state['custom_intents'] = self.intent_hierarchy
150
+
151
+ def preprocess_text(self, text):
152
+ if pd.isna(text):
153
+ return ""
154
+ text = str(text).lower()
155
+ text = re.sub(r'[^\w\s]', ' ', text)
156
+ return text
157
+
158
+ def classify_intent(self, text):
159
+ text = self.preprocess_text(text)
160
+ results = []
161
+
162
+ for main_category, subcategories in self.intent_hierarchy.items():
163
+ for subcategory, patterns in subcategories.items():
164
+ for pattern in patterns:
165
+ if re.search(r'\b' + pattern + r'\b', text):
166
+ results.append({
167
+ 'main_category': main_category,
168
+ 'subcategory': subcategory
169
+ })
170
+ break
171
+ if results and results[-1]['subcategory'] == subcategory:
172
+ break
173
+
174
+ if not results:
175
+ return [{'main_category': 'Unclassified', 'subcategory': 'Other'}]
176
+ return results
177
+
178
+ def process_conversation(self, df):
179
+ hr_intents = [self.classify_intent(msg) for msg in df['HR']]
180
+ employee_intents = [self.classify_intent(msg) for msg in df['Employee']]
181
+
182
+ results_df = pd.DataFrame({
183
+ 'HR_Message': df['HR'],
184
+ 'HR_Main_Category': [intent[0]['main_category'] for intent in hr_intents],
185
+ 'HR_Subcategory': [intent[0]['subcategory'] for intent in hr_intents],
186
+ 'Employee_Message': df['Employee'],
187
+ 'Employee_Main_Category': [intent[0]['main_category'] for intent in employee_intents],
188
+ 'Employee_Subcategory': [intent[0]['subcategory'] for intent in employee_intents]
189
+ })
190
+
191
+ return results_df
192
+
193
+ class EnhancedConversationAnalyzer:
194
+ def __init__(self, groq_api_key):
195
+ self.client = Groq(api_key=groq_api_key)
196
+
197
+ # System prompt for consistent analysis
198
+ self.system_prompt = """You are an expert conversation analyzer focusing on workplace communications.
199
+ Analyze conversations for sentiment, psychological aspects, and satisfaction levels.
200
+ Always respond with valid JSON containing numerical scores and brief explanations."""
201
+
202
+ def clean_json_response(self, response_text):
203
+ """Clean and validate JSON response"""
204
+ try:
205
+ # Try to find JSON content between curly braces
206
+ start = response_text.find('{')
207
+ end = response_text.rfind('}') + 1
208
+ if start != -1 and end != 0:
209
+ json_str = response_text[start:end]
210
+ return json.loads(json_str)
211
+ except:
212
+ pass
213
+ return self.get_empty_analysis()
214
+
215
+ def analyze_message(self, message, role):
216
+ """Analyze a single message using Groq LLM"""
217
+ if pd.isna(message):
218
+ return self.get_empty_analysis()
219
+
220
+ prompt = f"""Analyze this {role} message and respond ONLY with a JSON object:
221
+
222
+ Message: "{message}"
223
+
224
+ Required JSON format:
225
+ {{
226
+ "sentiment": {{
227
+ "compound": <float between -1 and 1>,
228
+ "positive": <float between 0 and 1>,
229
+ "negative": <float between 0 and 1>
230
+ }},
231
+ "psychological": {{
232
+ "stress": <integer between 0 and 10>,
233
+ "confidence": <integer between 0 and 10>,
234
+ "frustration": <integer between 0 and 10>
235
+ }},
236
+ "satisfaction": <integer between 0 and 100>,
237
+ "explanation": "<brief analysis, max 50 words>"
238
+ }}
239
+
240
+ Ensure the response is ONLY the JSON object with no additional text."""
241
+
242
+ try:
243
+ completion = self.client.chat.completions.create(
244
+ messages=[
245
+ {"role": "system", "content": self.system_prompt},
246
+ {"role": "user", "content": prompt}
247
+ ],
248
+ model="llama-3.3-70b-versatile",
249
+ temperature=0.1,
250
+ )
251
+
252
+ # Get and clean the response
253
+ response_text = completion.choices[0].message.content
254
+ analysis = self.clean_json_response(response_text)
255
+
256
+ # Validate and sanitize the values
257
+ analysis = self.validate_analysis(analysis)
258
+ return analysis
259
+
260
+ except Exception as e:
261
+ st.error(f"Error analyzing message: {str(e)}")
262
+ return self.get_empty_analysis()
263
+
264
+ def validate_analysis(self, analysis):
265
+ """Validate and sanitize analysis values"""
266
+ template = self.get_empty_analysis()
267
+ try:
268
+ # Ensure all required fields exist and have valid values
269
+ sentiment = analysis.get('sentiment', {})
270
+ template['sentiment']['compound'] = max(-1, min(1, float(sentiment.get('compound', 0))))
271
+ template['sentiment']['positive'] = max(0, min(1, float(sentiment.get('positive', 0))))
272
+ template['sentiment']['negative'] = max(0, min(1, float(sentiment.get('negative', 0))))
273
+
274
+ psychological = analysis.get('psychological', {})
275
+ template['psychological']['stress'] = max(0, min(10, int(psychological.get('stress', 0))))
276
+ template['psychological']['confidence'] = max(0, min(10, int(psychological.get('confidence', 0))))
277
+ template['psychological']['frustration'] = max(0, min(10, int(psychological.get('frustration', 0))))
278
+
279
+ template['satisfaction'] = max(0, min(100, int(analysis.get('satisfaction', 0))))
280
+ template['explanation'] = str(analysis.get('explanation', ''))[:50]
281
+
282
+ return template
283
+ except:
284
+ return template
285
+
286
+ def get_empty_analysis(self):
287
+ """Return empty analysis structure"""
288
+ return {
289
+ "sentiment": {"compound": 0.0, "positive": 0.0, "negative": 0.0},
290
+ "psychological": {"stress": 0, "confidence": 0, "frustration": 0},
291
+ "satisfaction": 0,
292
+ "explanation": "No message to analyze"
293
+ }
294
+
295
+ def process_conversation(self, df):
296
+ """Process conversation with LLM analysis"""
297
+ results = []
298
+ total_rows = len(df)
299
+ progress_bar = st.progress(0)
300
+
301
+ with ThreadPoolExecutor(max_workers=4) as executor:
302
+ for index, row in df.iterrows():
303
+ # Update progress
304
+ progress = (index + 1) / total_rows
305
+ progress_bar.progress(progress)
306
+
307
+ # Process messages
308
+ hr_future = executor.submit(self.analyze_message, row['HR'], 'HR')
309
+ emp_future = executor.submit(self.analyze_message, row['Employee'], 'Employee')
310
+
311
+ hr_analysis = hr_future.result()
312
+ emp_analysis = emp_future.result()
313
+
314
+ results.append({
315
+ 'HR_Message': row['HR'],
316
+ 'HR_Sentiment_Compound': hr_analysis['sentiment']['compound'],
317
+ 'HR_Sentiment_Positive': hr_analysis['sentiment']['positive'],
318
+ 'HR_Sentiment_Negative': hr_analysis['sentiment']['negative'],
319
+ 'HR_Satisfaction_Score': hr_analysis['satisfaction'],
320
+ 'HR_Stress_Level': hr_analysis['psychological']['stress'],
321
+ 'HR_Confidence_Level': hr_analysis['psychological']['confidence'],
322
+ 'HR_Frustration_Level': hr_analysis['psychological']['frustration'],
323
+ 'HR_Analysis': hr_analysis['explanation'],
324
+
325
+ 'Employee_Message': row['Employee'],
326
+ 'Employee_Sentiment_Compound': emp_analysis['sentiment']['compound'],
327
+ 'Employee_Sentiment_Positive': emp_analysis['sentiment']['positive'],
328
+ 'Employee_Sentiment_Negative': emp_analysis['sentiment']['negative'],
329
+ 'Employee_Satisfaction_Score': emp_analysis['satisfaction'],
330
+ 'Employee_Stress_Level': emp_analysis['psychological']['stress'],
331
+ 'Employee_Confidence_Level': emp_analysis['psychological']['confidence'],
332
+ 'Employee_Frustration_Level': emp_analysis['psychological']['frustration'],
333
+ 'Employee_Analysis': emp_analysis['explanation']
334
+ })
335
+
336
+ # Add a small delay to avoid rate limits
337
+ time.sleep(0.1)
338
+
339
+ progress_bar.empty()
340
+ return pd.DataFrame(results)
341
+
342
+ def create_intent_distribution_plot(df, role):
343
+ main_category_counts = df[f'{role}_Main_Category'].value_counts()
344
+ fig = px.bar(
345
+ x=main_category_counts.index,
346
+ y=main_category_counts.values,
347
+ title=f'Intent Distribution for {role}',
348
+ labels={'x': 'Intent Category', 'y': 'Count'}
349
+ )
350
+ return fig
351
+
352
+ def intent_management_ui():
353
+ st.sidebar.header("Custom Intent Management")
354
+
355
+ # Add new intent category
356
+ with st.sidebar.expander("Add New Intent Category"):
357
+ main_category = st.text_input("Main Category (e.g., F. Custom Intent)")
358
+ subcategory = st.text_input("Subcategory (e.g., Custom Type)")
359
+ patterns = st.text_area("Patterns (one per line)")
360
+
361
+ if st.button("Add Intent"):
362
+ if main_category and subcategory and patterns:
363
+ pattern_list = [p.strip() for p in patterns.split('\n') if p.strip()]
364
+ st.session_state.classifier.add_intent_category(
365
+ main_category, subcategory, pattern_list
366
+ )
367
+ st.success(f"Added new intent: {main_category} - {subcategory}")
368
+
369
+ # View current intents
370
+ with st.sidebar.expander("View Current Intents"):
371
+ st.json(st.session_state.classifier.intent_hierarchy)
372
+
373
+ # Export/Import intents
374
+ with st.sidebar.expander("Export/Import Intents"):
375
+ if st.button("Export Intents"):
376
+ json_str = json.dumps(st.session_state.classifier.intent_hierarchy, indent=2)
377
+ st.download_button(
378
+ label="Download Intents JSON",
379
+ data=json_str,
380
+ file_name="custom_intents.json",
381
+ mime="application/json"
382
+ )
383
+
384
+ uploaded_json = st.file_uploader("Import Intents JSON", type="json")
385
+ if uploaded_json is not None:
386
+ try:
387
+ new_intents = json.load(uploaded_json)
388
+ st.session_state.classifier.intent_hierarchy = new_intents
389
+ st.session_state['custom_intents'] = new_intents
390
+ st.success("Successfully imported intents")
391
+ except Exception as e:
392
+ st.error(f"Error importing intents: {str(e)}")
393
+
394
+ def main():
395
+ st.title("Comprehensive Conversation Analyzer")
396
+ st.write("Upload a CSV file to analyze conversations using intent classification and sentiment analysis.")
397
+
398
+ # Initialize intent classifier
399
+ if 'classifier' not in st.session_state:
400
+ st.session_state.classifier = CustomConversationIntentClassifier()
401
+
402
+ # Show intent management UI in sidebar
403
+ intent_management_ui()
404
+
405
+ # Groq API key input for sentiment analysis
406
+ groq_api_key = st.text_input("Enter your Groq API key for sentiment analysis", type="password")
407
+
408
+ # File upload
409
+ uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
410
+
411
+ if uploaded_file is not None:
412
+ try:
413
+ df = pd.read_csv(uploaded_file)
414
+
415
+ if 'HR' not in df.columns or 'Employee' not in df.columns:
416
+ st.error("CSV file must contain 'HR' and 'Employee' columns!")
417
+ return
418
+
419
+ st.subheader("Sample of Original Data")
420
+ st.dataframe(df.head())
421
+
422
+ # Store results for later combination
423
+ intent_results = None
424
+ sentiment_results = None
425
+
426
+ # Intent Classification
427
+ with st.expander("Intent Classification Results"):
428
+ with st.spinner("Classifying intents..."):
429
+ intent_results = st.session_state.classifier.process_conversation(df)
430
+
431
+ st.dataframe(intent_results)
432
+
433
+ st.subheader("Intent Distribution")
434
+ hr_plot = create_intent_distribution_plot(intent_results, 'HR')
435
+ st.plotly_chart(hr_plot)
436
+
437
+ emp_plot = create_intent_distribution_plot(intent_results, 'Employee')
438
+ st.plotly_chart(emp_plot)
439
+
440
+ # Download intent results
441
+ intent_csv = intent_results.to_csv(index=False)
442
+ st.download_button(
443
+ label="Download intent classification results as CSV",
444
+ data=intent_csv,
445
+ file_name="classified_conversations.csv",
446
+ mime="text/csv"
447
+ )
448
+
449
+ # Sentiment Analysis
450
+ if groq_api_key:
451
+ with st.expander("Sentiment Analysis Results"):
452
+ analyzer = EnhancedConversationAnalyzer(groq_api_key)
453
+ with st.spinner("Analyzing sentiments using AI... This may take a few minutes."):
454
+ sentiment_results = analyzer.process_conversation(df)
455
+
456
+ # Display sentiment summary metrics
457
+ col1, col2, col3 = st.columns(3)
458
+ with col1:
459
+ st.metric(
460
+ "Average HR Satisfaction",
461
+ f"{sentiment_results['HR_Satisfaction_Score'].mean():.1f}%"
462
+ )
463
+ with col2:
464
+ st.metric(
465
+ "Average Employee Satisfaction",
466
+ f"{sentiment_results['Employee_Satisfaction_Score'].mean():.1f}%"
467
+ )
468
+ with col3:
469
+ st.metric(
470
+ "Overall Sentiment",
471
+ f"{sentiment_results['Employee_Sentiment_Compound'].mean():.2f}"
472
+ )
473
+
474
+ # Display sentiment visualizations
475
+ sentiment_fig = px.line(
476
+ sentiment_results,
477
+ y=['HR_Sentiment_Compound', 'Employee_Sentiment_Compound'],
478
+ title='Sentiment Trends',
479
+ labels={'value': 'Sentiment Score', 'index': 'Message Number'}
480
+ )
481
+ st.plotly_chart(sentiment_fig)
482
+
483
+ satisfaction_fig = px.line(
484
+ sentiment_results,
485
+ y=['HR_Satisfaction_Score', 'Employee_Satisfaction_Score'],
486
+ title='Satisfaction Score Trends',
487
+ labels={'value': 'Satisfaction Score', 'index': 'Message Number'}
488
+ )
489
+ st.plotly_chart(satisfaction_fig)
490
+
491
+
492
+ # Display detailed sentiment results
493
+ st.subheader("Detailed Sentiment Analysis")
494
+ st.dataframe(sentiment_results)
495
+
496
+ # Download sentiment results
497
+ sentiment_csv = sentiment_results.to_csv(index=False)
498
+ st.download_button(
499
+ label="Download sentiment analysis results as CSV",
500
+ data=sentiment_csv,
501
+ file_name="sentiment_analysis.csv",
502
+ mime="text/csv"
503
+ )
504
+ else:
505
+ st.warning("Please enter your Groq API key to perform sentiment analysis.")
506
+
507
+ # Combined Results Section
508
+ if intent_results is not None:
509
+ st.subheader("Combined Analysis Results")
510
+
511
+ if sentiment_results is not None:
512
+ # Combine the results
513
+ # Keep only one copy of the messages
514
+ combined_results = intent_results.copy()
515
+
516
+ # Add sentiment columns
517
+ sentiment_columns = [col for col in sentiment_results.columns
518
+ if col not in ['HR_Message', 'Employee_Message']]
519
+ for col in sentiment_columns:
520
+ combined_results[col] = sentiment_results[col]
521
+
522
+ st.write("Preview of combined results:")
523
+ st.dataframe(combined_results.head())
524
+
525
+ # Download combined results
526
+ combined_csv = combined_results.to_csv(index=False)
527
+ st.download_button(
528
+ label="Download combined analysis results as CSV",
529
+ data=combined_csv,
530
+ file_name="combined_analysis.csv",
531
+ mime="text/csv",
532
+ key="combined_download"
533
+ )
534
+ else:
535
+ st.info("Add your Groq API key and run sentiment analysis to get combined results.")
536
+
537
+ except Exception as e:
538
+ st.error(f"An error occurred: {str(e)}")
539
+
540
+ if __name__ == "__main__":
541
+ main()