riazmo commited on
Commit
abf141b
Β·
verified Β·
1 Parent(s): 2def748

Update database_enhanced.py

Browse files
Files changed (1) hide show
  1. database_enhanced.py +677 -430
database_enhanced.py CHANGED
@@ -1,445 +1,692 @@
1
  """
2
- Enhanced Database Schema for Multi-Stage Review Analysis
3
- Adds Stage 1-4 columns to existing reviews table
4
  """
5
 
6
- import sqlite3
7
- from datetime import datetime
8
- from typing import Dict, List, Any, Optional
9
  import json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- class EnhancedDatabase:
12
- """
13
- Manages enhanced database schema with Stage 1-4 columns
14
- """
15
-
16
- def __init__(self, db_file: str = "review_database.db"):
17
- self.db_file = db_file
18
- self.conn = None
19
- print(f"πŸ“ Database: {db_file}")
20
-
21
- def connect(self):
22
- """Connect to database"""
23
- self.conn = sqlite3.connect(self.db_file, check_same_thread=False)
24
- self.conn.row_factory = sqlite3.Row
25
- print("βœ… Connected to database")
26
- return self.conn
27
-
28
- def close(self):
29
- """Close database connection"""
30
- if self.conn:
31
- self.conn.close()
32
- print("βœ… Database connection closed")
33
-
34
- def enhance_schema(self):
35
- """
36
- Add Stage 1-4 columns to existing reviews table
37
- Non-destructive: keeps all existing data
38
- """
39
- print("\n" + "="*60)
40
- print("πŸ”§ ENHANCING DATABASE SCHEMA")
41
- print("="*60)
42
-
43
- cursor = self.conn.cursor()
44
-
45
- # Get existing columns
46
- cursor.execute("PRAGMA table_info(reviews)")
47
- existing_columns = [row[1] for row in cursor.fetchall()]
48
- print(f"πŸ“‹ Existing columns: {len(existing_columns)}")
49
-
50
- # Stage 1: Classification columns
51
- stage1_columns = [
52
- ("stage1_llm1_type", "TEXT"),
53
- ("stage1_llm1_department", "TEXT"),
54
- ("stage1_llm1_priority", "TEXT"),
55
- ("stage1_llm1_confidence", "REAL"),
56
- ("stage1_llm1_reasoning", "TEXT"),
57
- ("stage1_llm2_user_type", "TEXT"),
58
- ("stage1_llm2_emotion", "TEXT"),
59
- ("stage1_llm2_context", "TEXT"),
60
- ("stage1_llm2_confidence", "REAL"),
61
- ("stage1_llm2_reasoning", "TEXT"),
62
- ("stage1_manager_classification", "TEXT"),
63
- ("stage1_manager_reasoning", "TEXT"),
64
- ("stage1_completed_at", "TIMESTAMP"),
65
- ]
66
-
67
- # Stage 2: Sentiment columns
68
- stage2_columns = [
69
- ("stage2_best_sentiment", "TEXT"),
70
- ("stage2_best_confidence", "REAL"),
71
- ("stage2_best_prob_positive", "REAL"),
72
- ("stage2_best_prob_neutral", "REAL"),
73
- ("stage2_best_prob_negative", "REAL"),
74
- ("stage2_alt_sentiment", "TEXT"),
75
- ("stage2_alt_confidence", "REAL"),
76
- ("stage2_alt_prob_positive", "REAL"),
77
- ("stage2_alt_prob_neutral", "REAL"),
78
- ("stage2_alt_prob_negative", "REAL"),
79
- ("stage2_agreement", "BOOLEAN"),
80
- ("stage2_layer_sentiment", "TEXT"),
81
- ("stage2_completed_at", "TIMESTAMP"),
82
- ]
83
-
84
- # Stage 3: Finalization columns
85
- stage3_columns = [
86
- ("stage3_final_sentiment", "TEXT"),
87
- ("stage3_confidence", "REAL"),
88
- ("stage3_reasoning", "TEXT"),
89
- ("stage3_validation_notes", "TEXT"),
90
- ("stage3_conflicts_found", "TEXT"),
91
- ("stage3_action_recommendation", "TEXT"),
92
- ("stage3_needs_human_review", "BOOLEAN"),
93
- ("stage3_completed_at", "TIMESTAMP"),
94
- ]
95
-
96
- # Processing metadata
97
- metadata_columns = [
98
- ("processing_status", "TEXT DEFAULT 'pending'"),
99
- ("processing_version", "TEXT DEFAULT 'v1.0'"),
100
- ("processing_started_at", "TIMESTAMP"),
101
- ("processing_completed_at", "TIMESTAMP"),
102
- ]
103
-
104
- all_new_columns = (
105
- stage1_columns +
106
- stage2_columns +
107
- stage3_columns +
108
- metadata_columns
109
  )
110
 
111
- # Add columns that don't exist
112
- added_count = 0
113
- for col_name, col_type in all_new_columns:
114
- if col_name not in existing_columns:
115
- try:
116
- cursor.execute(f"ALTER TABLE reviews ADD COLUMN {col_name} {col_type}")
117
- added_count += 1
118
- print(f" βœ… Added column: {col_name}")
119
- except sqlite3.OperationalError as e:
120
- if "duplicate column" not in str(e).lower():
121
- print(f" ⚠️ Error adding {col_name}: {e}")
122
-
123
- self.conn.commit()
124
- print(f"\nβœ… Schema enhanced: {added_count} new columns added")
125
-
126
- # Create logs table for LLM decisions
127
- self._create_logs_table(cursor)
128
-
129
- # Create batch insights table
130
- self._create_batch_insights_table(cursor)
131
-
132
- return added_count
133
-
134
- def _create_logs_table(self, cursor):
135
- """Create table for LLM decision logs"""
136
- cursor.execute("""
137
- CREATE TABLE IF NOT EXISTS llm_decision_logs (
138
- log_id INTEGER PRIMARY KEY AUTOINCREMENT,
139
- review_id TEXT NOT NULL,
140
- stage TEXT NOT NULL,
141
- model_name TEXT NOT NULL,
142
- input_prompt TEXT,
143
- output_response TEXT,
144
- confidence REAL,
145
- reasoning TEXT,
146
- processing_time_seconds REAL,
147
- timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
148
- FOREIGN KEY (review_id) REFERENCES reviews(review_id)
149
- )
150
- """)
151
-
152
- cursor.execute("""
153
- CREATE INDEX IF NOT EXISTS idx_logs_review_id
154
- ON llm_decision_logs(review_id)
155
- """)
156
-
157
- cursor.execute("""
158
- CREATE INDEX IF NOT EXISTS idx_logs_stage
159
- ON llm_decision_logs(stage)
160
- """)
161
-
162
- self.conn.commit()
163
- print(" βœ… Created llm_decision_logs table")
164
-
165
- def _create_batch_insights_table(self, cursor):
166
- """Create table for batch analytics"""
167
- cursor.execute("""
168
- CREATE TABLE IF NOT EXISTS batch_insights (
169
- batch_id INTEGER PRIMARY KEY AUTOINCREMENT,
170
- analysis_date DATE,
171
- total_reviews INTEGER,
172
- sentiment_positive INTEGER,
173
- sentiment_neutral INTEGER,
174
- sentiment_negative INTEGER,
175
- priority_critical INTEGER,
176
- priority_high INTEGER,
177
- priority_medium INTEGER,
178
- priority_low INTEGER,
179
- dept_engineering INTEGER,
180
- dept_ux INTEGER,
181
- dept_support INTEGER,
182
- dept_business INTEGER,
183
- critical_issues TEXT,
184
- quick_wins TEXT,
185
- recommendations TEXT,
186
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
187
- )
188
- """)
189
-
190
- self.conn.commit()
191
- print(" βœ… Created batch_insights table")
192
-
193
- def get_pending_reviews(self, limit: Optional[int] = None) -> List[Dict]:
194
- """Get reviews that haven't been processed yet"""
195
- cursor = self.conn.cursor()
196
-
197
- query = """
198
- SELECT * FROM reviews
199
- WHERE processing_status IS NULL OR processing_status = 'pending'
200
- ORDER BY scraped_at DESC
201
- """
202
-
203
- if limit:
204
- query += f" LIMIT {limit}"
205
-
206
- cursor.execute(query)
207
- rows = cursor.fetchall()
208
-
209
- return [dict(row) for row in rows]
210
-
211
- def update_stage1(self, review_id: str, data: Dict[str, Any]):
212
- """Update Stage 1 classification data"""
213
- cursor = self.conn.cursor()
214
-
215
- cursor.execute("""
216
- UPDATE reviews SET
217
- stage1_llm1_type = ?,
218
- stage1_llm1_department = ?,
219
- stage1_llm1_priority = ?,
220
- stage1_llm1_confidence = ?,
221
- stage1_llm1_reasoning = ?,
222
- stage1_llm2_user_type = ?,
223
- stage1_llm2_emotion = ?,
224
- stage1_llm2_context = ?,
225
- stage1_llm2_confidence = ?,
226
- stage1_llm2_reasoning = ?,
227
- stage1_manager_classification = ?,
228
- stage1_manager_reasoning = ?,
229
- stage1_completed_at = ?,
230
- processing_status = 'stage1_complete'
231
- WHERE review_id = ?
232
- """, (
233
- data.get('llm1_type'),
234
- data.get('llm1_department'),
235
- data.get('llm1_priority'),
236
- data.get('llm1_confidence'),
237
- data.get('llm1_reasoning'),
238
- data.get('llm2_user_type'),
239
- data.get('llm2_emotion'),
240
- data.get('llm2_context'),
241
- data.get('llm2_confidence'),
242
- data.get('llm2_reasoning'),
243
- data.get('manager_classification'),
244
- data.get('manager_reasoning'),
245
- datetime.now().isoformat(),
246
- review_id
247
- ))
248
-
249
- self.conn.commit()
250
-
251
- def update_stage2(self, review_id: str, data: Dict[str, Any]):
252
- """Update Stage 2 sentiment data"""
253
- cursor = self.conn.cursor()
254
-
255
- cursor.execute("""
256
- UPDATE reviews SET
257
- stage2_best_sentiment = ?,
258
- stage2_best_confidence = ?,
259
- stage2_best_prob_positive = ?,
260
- stage2_best_prob_neutral = ?,
261
- stage2_best_prob_negative = ?,
262
- stage2_alt_sentiment = ?,
263
- stage2_alt_confidence = ?,
264
- stage2_alt_prob_positive = ?,
265
- stage2_alt_prob_neutral = ?,
266
- stage2_alt_prob_negative = ?,
267
- stage2_agreement = ?,
268
- stage2_layer_sentiment = ?,
269
- stage2_completed_at = ?,
270
- processing_status = 'stage2_complete'
271
- WHERE review_id = ?
272
- """, (
273
- data.get('best_sentiment'),
274
- data.get('best_confidence'),
275
- data.get('best_prob_positive'),
276
- data.get('best_prob_neutral'),
277
- data.get('best_prob_negative'),
278
- data.get('alt_sentiment'),
279
- data.get('alt_confidence'),
280
- data.get('alt_prob_positive'),
281
- data.get('alt_prob_neutral'),
282
- data.get('alt_prob_negative'),
283
- data.get('agreement'),
284
- data.get('layer_sentiment'),
285
- datetime.now().isoformat(),
286
- review_id
287
- ))
288
-
289
- self.conn.commit()
290
-
291
- def update_stage3(self, review_id: str, data: Dict[str, Any]):
292
- """Update Stage 3 finalization data"""
293
- cursor = self.conn.cursor()
294
-
295
- cursor.execute("""
296
- UPDATE reviews SET
297
- stage3_final_sentiment = ?,
298
- stage3_confidence = ?,
299
- stage3_reasoning = ?,
300
- stage3_validation_notes = ?,
301
- stage3_conflicts_found = ?,
302
- stage3_action_recommendation = ?,
303
- stage3_needs_human_review = ?,
304
- stage3_completed_at = ?,
305
- processing_status = 'complete',
306
- processing_completed_at = ?
307
- WHERE review_id = ?
308
- """, (
309
- data.get('final_sentiment'),
310
- data.get('confidence'),
311
- data.get('reasoning'),
312
- data.get('validation_notes'),
313
- data.get('conflicts_found'),
314
- data.get('action_recommendation'),
315
- data.get('needs_human_review'),
316
- datetime.now().isoformat(),
317
- datetime.now().isoformat(),
318
- review_id
319
- ))
320
-
321
- self.conn.commit()
322
-
323
- def log_llm_decision(self, review_id: str, stage: str, model_name: str,
324
- input_prompt: str, output_response: str,
325
- confidence: float, reasoning: str, processing_time: float):
326
- """Log LLM decision for audit trail"""
327
- cursor = self.conn.cursor()
328
-
329
- cursor.execute("""
330
- INSERT INTO llm_decision_logs
331
- (review_id, stage, model_name, input_prompt, output_response,
332
- confidence, reasoning, processing_time_seconds)
333
- VALUES (?, ?, ?, ?, ?, ?, ?, ?)
334
- """, (
335
- review_id, stage, model_name, input_prompt, output_response,
336
- confidence, reasoning, processing_time
337
- ))
338
-
339
- self.conn.commit()
340
-
341
- def get_all_processed_reviews(self) -> List[Dict]:
342
- """Get all reviews that have been fully processed"""
343
- cursor = self.conn.cursor()
344
-
345
- cursor.execute("""
346
- SELECT * FROM reviews
347
- WHERE processing_status = 'complete'
348
- ORDER BY processing_completed_at DESC
349
- """)
350
-
351
- rows = cursor.fetchall()
352
- return [dict(row) for row in rows]
353
-
354
- def save_batch_insights(self, insights: Dict[str, Any]):
355
- """Save batch analytics to database"""
356
- cursor = self.conn.cursor()
357
-
358
- cursor.execute("""
359
- INSERT INTO batch_insights
360
- (analysis_date, total_reviews, sentiment_positive, sentiment_neutral,
361
- sentiment_negative, priority_critical, priority_high, priority_medium,
362
- priority_low, dept_engineering, dept_ux, dept_support, dept_business,
363
- critical_issues, quick_wins, recommendations)
364
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
365
- """, (
366
- datetime.now().date(),
367
- insights.get('total_reviews', 0),
368
- insights.get('sentiment_positive', 0),
369
- insights.get('sentiment_neutral', 0),
370
- insights.get('sentiment_negative', 0),
371
- insights.get('priority_critical', 0),
372
- insights.get('priority_high', 0),
373
- insights.get('priority_medium', 0),
374
- insights.get('priority_low', 0),
375
- insights.get('dept_engineering', 0),
376
- insights.get('dept_ux', 0),
377
- insights.get('dept_support', 0),
378
- insights.get('dept_business', 0),
379
- json.dumps(insights.get('critical_issues', [])),
380
- json.dumps(insights.get('quick_wins', [])),
381
- json.dumps(insights.get('recommendations', []))
382
- ))
383
-
384
- self.conn.commit()
385
- print(" βœ… Batch insights saved to database")
386
-
387
- def reset_processing_status(self, limit: Optional[int] = None):
388
- """Reset processing status to reprocess reviews"""
389
- cursor = self.conn.cursor()
390
-
391
- if limit:
392
- # Reset only the most recent N reviews
393
- query = """
394
- UPDATE reviews
395
- SET processing_status = 'pending',
396
- processing_started_at = NULL,
397
- processing_completed_at = NULL,
398
- stage1_completed_at = NULL,
399
- stage2_completed_at = NULL,
400
- stage3_completed_at = NULL
401
- WHERE review_id IN (
402
- SELECT review_id FROM reviews
403
- ORDER BY scraped_at DESC
404
- LIMIT ?
405
- )
406
- """
407
- cursor.execute(query, (limit,))
408
- else:
409
- # Reset all reviews
410
- query = """
411
- UPDATE reviews
412
- SET processing_status = 'pending',
413
- processing_started_at = NULL,
414
- processing_completed_at = NULL,
415
- stage1_completed_at = NULL,
416
- stage2_completed_at = NULL,
417
- stage3_completed_at = NULL
418
- """
419
- cursor.execute(query)
420
 
421
- affected = cursor.rowcount
422
- self.conn.commit()
423
 
424
- if affected > 0:
425
- print(f" πŸ”„ Reset {affected} reviews to pending status")
 
 
 
 
426
 
427
- return affected
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
428
 
429
 
430
- if __name__ == "__main__":
431
- # Test database enhancement
432
- print("\n" + "="*60)
433
- print("πŸ§ͺ TESTING DATABASE ENHANCEMENT")
434
- print("="*60 + "\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
 
436
- db = EnhancedDatabase()
437
- db.connect()
438
- db.enhance_schema()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439
 
440
- # Get pending reviews
441
- pending = db.get_pending_reviews(limit=5)
442
- print(f"\nπŸ“‹ Found {len(pending)} pending reviews")
443
 
444
- db.close()
445
- print("\nβœ… Database enhancement test complete!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
+ LangGraph Nodes - FINAL WORKING VERSION
3
+ Uses chat_completion() API format + Lazy loading + Fixed alt sentiment
4
  """
5
 
6
+ import os
 
 
7
  import json
8
+ import time
9
+ from typing import Dict, Any
10
+ from datetime import datetime
11
+ from concurrent.futures import ThreadPoolExecutor, as_completed
12
+ from huggingface_hub import InferenceClient
13
+ import torch
14
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
15
+ import warnings
16
+ warnings.filterwarnings('ignore')
17
+
18
+ from langgraph_state import ReviewState, BatchState
19
+ from database_enhanced import EnhancedDatabase
20
+
21
+ # FIXED: Don't initialize client at module import
22
+ _hf_client = None
23
+
24
+ def get_hf_client():
25
+ """Get or initialize HuggingFace client (lazy loading)"""
26
+ global _hf_client
27
+
28
+ if _hf_client is not None:
29
+ return _hf_client
30
+
31
+ # Try to get token from environment
32
+ HF_TOKEN = os.getenv("HUGGINGFACE_API_KEY")
33
+
34
+ if not HF_TOKEN or HF_TOKEN.strip() == "":
35
+ return None
36
+
37
+ # Initialize client with token
38
+ print(f"βœ… Initializing HF client with token: {HF_TOKEN[:10]}...")
39
+ _hf_client = InferenceClient(token=HF_TOKEN)
40
+ return _hf_client
41
+
42
+
43
+ # Initialize sentiment models (singleton)
44
+ _sentiment_models_loaded = False
45
+ _best_tokenizer = None
46
+ _best_model = None
47
+ _alt_tokenizer = None
48
+ _alt_model = None
49
+
50
+ def load_sentiment_models():
51
+ """Load sentiment models once (singleton pattern)"""
52
+ global _sentiment_models_loaded, _best_tokenizer, _best_model, _alt_tokenizer, _alt_model
53
+
54
+ if _sentiment_models_loaded:
55
+ return
56
+
57
+ print(" πŸ“¦ Loading Twitter-BERT models (one-time)...")
58
+
59
+ # Best Model
60
+ _best_tokenizer = AutoTokenizer.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
61
+ _best_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
62
+ _best_model.eval()
63
+
64
+ # Alternate Model - FIXED: Load with low_cpu_mem_usage to avoid meta tensors
65
+ _alt_tokenizer = AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")
66
+ _alt_model = AutoModelForSequenceClassification.from_pretrained(
67
+ "finiteautomata/bertweet-base-sentiment-analysis",
68
+ low_cpu_mem_usage=False # FIXED: Don't use meta device
69
+ )
70
+ _alt_model.eval()
71
+
72
+ _sentiment_models_loaded = True
73
+ print(" βœ… Sentiment models loaded!")
74
+
75
+
76
+ # ============================================================================
77
+ # STAGE 1: CLASSIFICATION NODE
78
+ # ============================================================================
79
+
80
+ def llm1_classify(review: Dict[str, Any]) -> Dict[str, Any]:
81
+ """LLM1: Type, Department, Priority classification"""
82
+
83
+ hf_client = get_hf_client()
84
+
85
+ if hf_client is None:
86
+ return {
87
+ 'type': 'unknown',
88
+ 'department': 'unknown',
89
+ 'priority': 'medium',
90
+ 'confidence': 0.0,
91
+ 'reasoning': 'HuggingFace API key not set',
92
+ 'model': 'Qwen/Qwen2.5-72B-Instruct'
93
+ }
94
+
95
+ review_text = review.get('review_text', '')
96
+ rating = review.get('rating', 3)
97
+
98
+ # FIXED: Use chat format with system + user messages
99
+ system_prompt = """You are an expert at classifying customer reviews for theme park and attraction apps.
100
+
101
+ Classify reviews across these dimensions:
102
+
103
+ 1. TYPE: complaint, praise, suggestion, question, or bug_report
104
+ 2. DEPARTMENT: engineering, ux, support, or business
105
+ 3. PRIORITY: critical, high, medium, or low
106
+ 4. CONFIDENCE: 0.0-1.0
107
+ 5. REASONING: Brief one-sentence explanation
108
+
109
+ Respond ONLY in valid JSON format:
110
+ {
111
+ "type": "complaint/praise/suggestion/question/bug_report",
112
+ "department": "engineering/ux/support/business",
113
+ "priority": "critical/high/medium/low",
114
+ "confidence": 0.0-1.0,
115
+ "reasoning": "brief explanation"
116
+ }"""
117
+
118
+ user_prompt = f"""REVIEW:
119
+ Rating: {rating}/5
120
+ Text: {review_text}
121
+
122
+ Classify this review:"""
123
 
124
+ try:
125
+ print(f" πŸ” Calling Qwen API...")
126
+
127
+ # FIXED: Use chat_completion instead of text_generation
128
+ response = hf_client.chat_completion(
129
+ messages=[
130
+ {"role": "system", "content": system_prompt},
131
+ {"role": "user", "content": user_prompt}
132
+ ],
133
+ model="Qwen/Qwen2.5-72B-Instruct",
134
+ max_tokens=200,
135
+ temperature=0.1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  )
137
 
138
+ # Extract content from chat response
139
+ content = response.choices[0].message.content
140
+ print(f" βœ… Got response ({len(content)} chars)")
141
+
142
+ # Clean and parse JSON
143
+ content_clean = content.strip()
144
+ if content_clean.startswith('```'):
145
+ content_clean = content_clean.split('```')[1]
146
+ if content_clean.startswith('json'):
147
+ content_clean = content_clean[4:]
148
+ content_clean = content_clean.strip()
149
+
150
+ result = json.loads(content_clean)
151
+ result['model'] = 'Qwen/Qwen2.5-72B-Instruct'
152
+
153
+ print(f" βœ… Parsed: {result['type']} β†’ {result['department']}")
154
+ return result
155
+
156
+ except Exception as e:
157
+ print(f"❌ LLM1 ERROR: {type(e).__name__}: {str(e)}")
158
+
159
+ return {
160
+ 'type': 'unknown',
161
+ 'department': 'unknown',
162
+ 'priority': 'medium',
163
+ 'confidence': 0.0,
164
+ 'reasoning': f'API Error: {str(e)}',
165
+ 'model': 'Qwen/Qwen2.5-72B-Instruct'
166
+ }
167
+
168
+
169
+ def llm2_analyze(review: Dict[str, Any]) -> Dict[str, Any]:
170
+ """LLM2: User type, Emotion, Context analysis"""
171
+
172
+ hf_client = get_hf_client()
173
+
174
+ if hf_client is None:
175
+ return {
176
+ 'user_type': 'unknown',
177
+ 'emotion': 'unknown',
178
+ 'context': 'unknown',
179
+ 'confidence': 0.0,
180
+ 'reasoning': 'HuggingFace API key not set',
181
+ 'model': 'mistralai/Mistral-7B-Instruct-v0.3'
182
+ }
183
+
184
+ review_text = review.get('review_text', '')
185
+ rating = review.get('rating', 3)
186
+
187
+ # FIXED: Use chat format
188
+ system_prompt = """You are an expert at understanding customer psychology and emotional context.
189
+
190
+ Analyze reviews for:
191
+ 1. USER_TYPE: new_user, regular_user, power_user, or churning_user
192
+ 2. EMOTION: anger, frustration, joy, satisfaction, disappointment, or confusion
193
+ 3. CONTEXT: Brief context (1-2 words)
194
+ 4. CONFIDENCE: 0.0-1.0
195
+ 5. REASONING: Brief explanation
196
+
197
+ Respond ONLY in valid JSON format:
198
+ {
199
+ "user_type": "new_user/regular_user/power_user/churning_user",
200
+ "emotion": "anger/frustration/joy/satisfaction/disappointment/confusion",
201
+ "context": "brief context",
202
+ "confidence": 0.0-1.0,
203
+ "reasoning": "brief explanation"
204
+ }"""
205
+
206
+ user_prompt = f"""REVIEW:
207
+ Rating: {rating}/5
208
+ Text: {review_text}
209
+
210
+ Analyze this review:"""
211
+
212
+ try:
213
+ print(f" πŸ” Calling Mistral API...")
214
+
215
+ # FIXED: Use chat_completion
216
+ response = hf_client.chat_completion(
217
+ messages=[
218
+ {"role": "system", "content": system_prompt},
219
+ {"role": "user", "content": user_prompt}
220
+ ],
221
+ model="mistralai/Mistral-7B-Instruct-v0.3",
222
+ max_tokens=200,
223
+ temperature=0.1
224
+ )
225
+
226
+ content = response.choices[0].message.content
227
+ print(f" βœ… Got response ({len(content)} chars)")
228
+
229
+ # Clean and parse JSON
230
+ content_clean = content.strip()
231
+ if content_clean.startswith('```'):
232
+ content_clean = content_clean.split('```')[1]
233
+ if content_clean.startswith('json'):
234
+ content_clean = content_clean[4:]
235
+ content_clean = content_clean.strip()
236
+
237
+ result = json.loads(content_clean)
238
+ result['model'] = 'mistralai/Mistral-7B-Instruct-v0.3'
239
+
240
+ print(f" βœ… Parsed: {result['user_type']}, {result['emotion']}")
241
+ return result
242
+
243
+ except Exception as e:
244
+ print(f"❌ LLM2 ERROR: {type(e).__name__}: {str(e)}")
245
+
246
+ return {
247
+ 'user_type': 'unknown',
248
+ 'emotion': 'unknown',
249
+ 'context': 'unknown',
250
+ 'confidence': 0.0,
251
+ 'reasoning': f'API Error: {str(e)}',
252
+ 'model': 'mistralai/Mistral-7B-Instruct-v0.3'
253
+ }
254
+
255
+
256
+ def manager_synthesize(llm1_result: Dict, llm2_result: Dict, review: Dict) -> Dict[str, Any]:
257
+ """Manager: Synthesize LLM1 and LLM2 results"""
258
+
259
+ hf_client = get_hf_client()
260
+
261
+ if hf_client is None:
262
+ return {
263
+ 'final_type': llm1_result.get('type', 'unknown'),
264
+ 'final_department': llm1_result.get('department', 'unknown'),
265
+ 'final_priority': llm1_result.get('priority', 'medium'),
266
+ 'synthesis_reasoning': 'HuggingFace API key not set',
267
+ 'model': 'meta-llama/Llama-3.3-70B-Instruct'
268
+ }
269
+
270
+ review_text = review.get('review_text', '')
271
+ rating = review.get('rating', 3)
272
+
273
+ # FIXED: Use chat format
274
+ system_prompt = """You are a synthesis manager evaluating two AI analyses.
275
+
276
+ Your task:
277
+ 1. Validate both analyses
278
+ 2. Resolve conflicts
279
+ 3. Make final classification decision
280
+ 4. Provide synthesis reasoning
281
+
282
+ Respond ONLY in valid JSON format:
283
+ {
284
+ "final_type": "from llm1 or adjusted",
285
+ "final_department": "from llm1 or adjusted",
286
+ "final_priority": "from llm1 or adjusted",
287
+ "synthesis_reasoning": "brief explanation"
288
+ }"""
289
+
290
+ user_prompt = f"""REVIEW:
291
+ Rating: {rating}/5
292
+ Text: {review_text}
293
+
294
+ LLM1 ANALYSIS (Type/Dept/Priority):
295
+ {json.dumps(llm1_result, indent=2)}
296
+
297
+ LLM2 ANALYSIS (User/Emotion/Context):
298
+ {json.dumps(llm2_result, indent=2)}
299
+
300
+ Synthesize these analyses:"""
301
+
302
+ try:
303
+ print(f" πŸ” Calling Llama Manager API...")
304
+
305
+ # FIXED: Use chat_completion
306
+ response = hf_client.chat_completion(
307
+ messages=[
308
+ {"role": "system", "content": system_prompt},
309
+ {"role": "user", "content": user_prompt}
310
+ ],
311
+ model="meta-llama/Llama-3.3-70B-Instruct",
312
+ max_tokens=200,
313
+ temperature=0.1
314
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
+ content = response.choices[0].message.content
317
+ print(f" βœ… Got response ({len(content)} chars)")
318
 
319
+ content_clean = content.strip()
320
+ if content_clean.startswith('```'):
321
+ content_clean = content_clean.split('```')[1]
322
+ if content_clean.startswith('json'):
323
+ content_clean = content_clean[4:]
324
+ content_clean = content_clean.strip()
325
 
326
+ result = json.loads(content_clean)
327
+ result['model'] = 'meta-llama/Llama-3.3-70B-Instruct'
328
+
329
+ print(f" βœ… Manager decision: {result['final_type']} β†’ {result['final_department']}")
330
+ return result
331
+
332
+ except Exception as e:
333
+ print(f"❌ MANAGER ERROR: {type(e).__name__}: {str(e)}")
334
+
335
+ return {
336
+ 'final_type': llm1_result.get('type', 'unknown'),
337
+ 'final_department': llm1_result.get('department', 'unknown'),
338
+ 'final_priority': llm1_result.get('priority', 'medium'),
339
+ 'synthesis_reasoning': f'Manager error: {str(e)}',
340
+ 'model': 'meta-llama/Llama-3.3-70B-Instruct'
341
+ }
342
 
343
 
344
+ def stage1_classification_node(state: ReviewState) -> Dict[str, Any]:
345
+ """Stage 1 Node: Classification with PARALLEL execution"""
346
+ print(f"\n πŸ“ Review ID: {state['review_id']}")
347
+ print(f" ⏳ STAGE 1: Classification (Parallel LLM1 + LLM2)...")
348
+
349
+ start_time = time.time()
350
+ review_dict = dict(state)
351
+
352
+ # PARALLEL EXECUTION
353
+ with ThreadPoolExecutor(max_workers=2) as executor:
354
+ future_llm1 = executor.submit(llm1_classify, review_dict)
355
+ future_llm2 = executor.submit(llm2_analyze, review_dict)
356
+
357
+ llm1_result = future_llm1.result()
358
+ llm2_result = future_llm2.result()
359
+
360
+ print(f" βœ… LLM1: {llm1_result['type']} β†’ {llm1_result['department']} (Priority: {llm1_result['priority']})")
361
+ print(f" βœ… LLM2: {llm2_result['user_type']}, {llm2_result['emotion']}")
362
+
363
+ # Manager synthesizes
364
+ print(f" πŸ€– Manager synthesizing...")
365
+ manager_result = manager_synthesize(llm1_result, llm2_result, review_dict)
366
+
367
+ stage1_time = time.time() - start_time
368
+ print(f" βœ… Stage 1 complete ({stage1_time:.2f}s)")
369
+
370
+ return {
371
+ "llm1_result": llm1_result,
372
+ "llm2_result": llm2_result,
373
+ "manager_result": manager_result,
374
+ "classification_type": manager_result['final_type'],
375
+ "department": manager_result['final_department'],
376
+ "priority": manager_result['final_priority'],
377
+ "user_type": llm2_result['user_type'],
378
+ "emotion": llm2_result['emotion'],
379
+ "context": llm2_result.get('context', ''),
380
+ "stage1_completed": True,
381
+ "stage1_time": stage1_time,
382
+ "errors": state.get('errors', [])
383
+ }
384
+
385
+
386
+ # ============================================================================
387
+ # STAGE 2: SENTIMENT ANALYSIS
388
+ # ============================================================================
389
+
390
+ def analyze_best_sentiment(text: str) -> Dict[str, Any]:
391
+ """Best Model: Twitter-BERT"""
392
+ load_sentiment_models()
393
+
394
+ try:
395
+ inputs = _best_tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
396
+
397
+ with torch.no_grad():
398
+ outputs = _best_model(**inputs)
399
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
400
+ prediction = torch.argmax(probs, dim=-1).item()
401
+ confidence = probs[0][prediction].item()
402
+
403
+ label_map = {0: "NEGATIVE", 1: "NEUTRAL", 2: "POSITIVE"}
404
+
405
+ return {
406
+ 'sentiment': label_map[prediction],
407
+ 'confidence': confidence,
408
+ 'prob_negative': probs[0][0].item(),
409
+ 'prob_neutral': probs[0][1].item(),
410
+ 'prob_positive': probs[0][2].item(),
411
+ 'model': 'twitter-roberta-base-sentiment-latest'
412
+ }
413
+ except Exception as e:
414
+ print(f"❌ Best sentiment ERROR: {e}")
415
+ return {
416
+ 'sentiment': 'NEUTRAL',
417
+ 'confidence': 0.0,
418
+ 'prob_negative': 0.33,
419
+ 'prob_neutral': 0.34,
420
+ 'prob_positive': 0.33,
421
+ 'model': 'error',
422
+ 'error': str(e)
423
+ }
424
+
425
+
426
+ def analyze_alt_sentiment(text: str) -> Dict[str, Any]:
427
+ """Alternate Model: BERTweet - FIXED"""
428
+ load_sentiment_models()
429
+
430
+ try:
431
+ inputs = _alt_tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True)
432
+
433
+ with torch.no_grad():
434
+ outputs = _alt_model(**inputs)
435
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
436
+ prediction = torch.argmax(probs, dim=-1).item()
437
+ confidence = probs[0][prediction].item()
438
+
439
+ label_map = {0: "NEGATIVE", 1: "NEUTRAL", 2: "POSITIVE"}
440
+
441
+ return {
442
+ 'sentiment': label_map[prediction],
443
+ 'confidence': confidence,
444
+ 'prob_negative': probs[0][0].item(),
445
+ 'prob_neutral': probs[0][1].item(),
446
+ 'prob_positive': probs[0][2].item(),
447
+ 'model': 'bertweet-base-sentiment-analysis'
448
+ }
449
+ except Exception as e:
450
+ print(f"❌ Alt sentiment ERROR: {e}")
451
+ return {
452
+ 'sentiment': 'NEUTRAL',
453
+ 'confidence': 0.0,
454
+ 'prob_negative': 0.33,
455
+ 'prob_neutral': 0.34,
456
+ 'prob_positive': 0.33,
457
+ 'model': 'error',
458
+ 'error': str(e)
459
+ }
460
+
461
+
462
+ def sentiment_layer(best_result: Dict, alt_result: Dict) -> Dict[str, Any]:
463
+ """Sentiment Layer: Combine with confidence weighting"""
464
+ best_sentiment = best_result.get('sentiment')
465
+ best_confidence = best_result.get('confidence', 0.0)
466
+
467
+ alt_sentiment = alt_result.get('sentiment')
468
+ alt_confidence = alt_result.get('confidence', 0.0)
469
+
470
+ agreement = (best_sentiment == alt_sentiment)
471
+
472
+ if agreement:
473
+ final_sentiment = best_sentiment
474
+ combined_confidence = max(best_confidence, alt_confidence)
475
+ agreement_strength = "STRONG"
476
+ else:
477
+ if best_confidence > alt_confidence:
478
+ final_sentiment = best_sentiment
479
+ combined_confidence = best_confidence
480
+ else:
481
+ final_sentiment = alt_sentiment
482
+ combined_confidence = alt_confidence
483
+ agreement_strength = "WEAK"
484
+
485
+ return {
486
+ 'layer_sentiment': final_sentiment,
487
+ 'combined_confidence': combined_confidence,
488
+ 'agreement': agreement,
489
+ 'agreement_strength': agreement_strength
490
+ }
491
+
492
+
493
+ def stage2_sentiment_node(state: ReviewState) -> Dict[str, Any]:
494
+ """Stage 2 Node: Sentiment with PARALLEL execution"""
495
+ print(f"\n ⏳ STAGE 2: Sentiment Analysis (Parallel Best + Alternate)...")
496
+
497
+ start_time = time.time()
498
+ review_text = state['review_text']
499
 
500
+ # PARALLEL EXECUTION
501
+ with ThreadPoolExecutor(max_workers=2) as executor:
502
+ future_best = executor.submit(analyze_best_sentiment, review_text)
503
+ future_alt = executor.submit(analyze_alt_sentiment, review_text)
504
+
505
+ best_result = future_best.result()
506
+ alt_result = future_alt.result()
507
+
508
+ print(f" βœ… Best: {best_result['sentiment']} ({best_result['confidence']:.3f})")
509
+ print(f" βœ… Alt: {alt_result['sentiment']} ({alt_result['confidence']:.3f})")
510
+
511
+ # Sentiment Layer combines results
512
+ layer_result = sentiment_layer(best_result, alt_result)
513
+
514
+ agreement_icon = "βœ…" if layer_result['agreement'] else "⚠️ "
515
+ print(f" {agreement_icon} Final: {layer_result['layer_sentiment']} (agreement: {layer_result['agreement']})")
516
+
517
+ stage2_time = time.time() - start_time
518
+ print(f" βœ… Stage 2 complete ({stage2_time:.2f}s)")
519
+
520
+ return {
521
+ "best_sentiment_result": best_result,
522
+ "alt_sentiment_result": alt_result,
523
+ "sentiment_layer_result": layer_result,
524
+ "sentiment": layer_result['layer_sentiment'],
525
+ "sentiment_confidence": layer_result['combined_confidence'],
526
+ "sentiment_agreement": layer_result['agreement'],
527
+ "stage2_completed": True,
528
+ "stage2_time": stage2_time,
529
+ "errors": state.get('errors', [])
530
+ }
531
+
532
+
533
+ # ============================================================================
534
+ # STAGE 3: FINALIZATION NODE
535
+ # ============================================================================
536
+
537
+ def stage3_finalization_node(state: ReviewState) -> Dict[str, Any]:
538
+ """Stage 3 Node: Final synthesis with LLM3"""
539
+ print(f"\n ⏳ STAGE 3: Finalization (LLM3)...")
540
+
541
+ start_time = time.time()
542
+
543
+ hf_client = get_hf_client()
544
+
545
+ if hf_client is None:
546
+ result = {
547
+ 'final_sentiment': state.get('sentiment', 'NEUTRAL'),
548
+ 'confidence': state.get('sentiment_confidence', 0.0),
549
+ 'reasoning': 'Stage 3 skipped - HuggingFace API key not set',
550
+ 'validation_notes': 'API key missing',
551
+ 'conflicts_found': 'none',
552
+ 'action_recommendation': f"Route to {state.get('department', 'support')}",
553
+ 'needs_human_review': True,
554
+ 'model': 'meta-llama/Llama-3.1-70B-Instruct'
555
+ }
556
+
557
+ stage3_time = 0.00
558
+ print(f" βœ… Final: {result['final_sentiment']} ({result.get('confidence', 0):.3f})")
559
+ print(f" πŸ“‹ Needs Review: {result.get('needs_human_review', False)}")
560
+ print(f" βœ… Stage 3 complete ({stage3_time:.2f}s)")
561
+
562
+ return {
563
+ "final_result": result,
564
+ "final_sentiment": result['final_sentiment'],
565
+ "final_confidence": result['confidence'],
566
+ "reasoning": result['reasoning'],
567
+ "action_recommendation": result['action_recommendation'],
568
+ "conflicts_found": result['conflicts_found'],
569
+ "validation_notes": result['validation_notes'],
570
+ "needs_human_review": result['needs_human_review'],
571
+ "stage3_completed": True,
572
+ "stage3_time": stage3_time,
573
+ "total_time": state.get('stage1_time', 0) + state.get('stage2_time', 0),
574
+ "processing_completed_at": datetime.now().isoformat(),
575
+ "errors": state.get('errors', [])
576
+ }
577
+
578
+ review_text = state['review_text']
579
+ rating = state['rating']
580
+
581
+ # FIXED: Use chat format
582
+ system_prompt = """You are a final decision-making AI analyzing customer feedback for a theme park/attraction app.
583
+
584
+ Your task:
585
+ 1. Review all data from previous stages
586
+ 2. Make FINAL sentiment decision
587
+ 3. Provide comprehensive reasoning
588
+ 4. Generate action recommendation
589
+ 5. Flag if human review needed
590
+
591
+ Respond ONLY in valid JSON format:
592
+ {
593
+ "final_sentiment": "POSITIVE/NEGATIVE/NEUTRAL",
594
+ "confidence": 0.0-1.0,
595
+ "reasoning": "Comprehensive explanation",
596
+ "validation_notes": "Does classification match sentiment?",
597
+ "conflicts_found": "any conflicts or 'none'",
598
+ "action_recommendation": "Specific action",
599
+ "needs_human_review": true/false
600
+ }"""
601
+
602
+ user_prompt = f"""REVIEW DATA:
603
+ Rating: {rating}/5
604
+ Text: {review_text}
605
+
606
+ STAGE 1 CLASSIFICATION:
607
+ - Type: {state.get('classification_type')}
608
+ - Department: {state.get('department')}
609
+ - Priority: {state.get('priority')}
610
+ - User Type: {state.get('user_type')}
611
+ - Emotion: {state.get('emotion')}
612
+
613
+ STAGE 2 SENTIMENT:
614
+ - Best: {state['best_sentiment_result'].get('sentiment')} ({state['best_sentiment_result'].get('confidence'):.2f})
615
+ - Alternate: {state['alt_sentiment_result'].get('sentiment')} ({state['alt_sentiment_result'].get('confidence'):.2f})
616
+ - Agreement: {state.get('sentiment_agreement')}
617
+
618
+ Make your final decision:"""
619
+
620
+ try:
621
+ print(f" πŸ” Calling Llama 70B API...")
622
+
623
+ # FIXED: Use chat_completion
624
+ response = hf_client.chat_completion(
625
+ messages=[
626
+ {"role": "system", "content": system_prompt},
627
+ {"role": "user", "content": user_prompt}
628
+ ],
629
+ model="meta-llama/Llama-3.1-70B-Instruct",
630
+ max_tokens=400,
631
+ temperature=0.1
632
+ )
633
+
634
+ content = response.choices[0].message.content
635
+ print(f" βœ… Got response ({len(content)} chars)")
636
+
637
+ content_clean = content.strip()
638
+ if content_clean.startswith('```'):
639
+ content_clean = content_clean.split('```')[1]
640
+ if content_clean.startswith('json'):
641
+ content_clean = content_clean[4:]
642
+ content_clean = content_clean.strip()
643
+
644
+ result = json.loads(content_clean)
645
+ result['model'] = 'meta-llama/Llama-3.1-70B-Instruct'
646
+
647
+ except Exception as e:
648
+ print(f"❌ STAGE 3 ERROR: {type(e).__name__}: {str(e)}")
649
+
650
+ result = {
651
+ 'final_sentiment': state.get('sentiment', 'NEUTRAL'),
652
+ 'confidence': state.get('sentiment_confidence', 0.5),
653
+ 'reasoning': f'Error in LLM3: {str(e)}',
654
+ 'validation_notes': 'Error',
655
+ 'conflicts_found': 'error',
656
+ 'action_recommendation': f"Route to {state.get('department')}",
657
+ 'needs_human_review': True,
658
+ 'model': 'meta-llama/Llama-3.1-70B-Instruct'
659
+ }
660
+
661
+ stage3_time = time.time() - start_time
662
+
663
+ print(f" βœ… Final: {result['final_sentiment']} ({result.get('confidence', 0):.3f})")
664
+ print(f" πŸ“‹ Needs Review: {result.get('needs_human_review', False)}")
665
+ print(f" βœ… Stage 3 complete ({stage3_time:.2f}s)")
666
 
667
+ # Calculate total time
668
+ total_time = state.get('stage1_time', 0) + state.get('stage2_time', 0) + stage3_time
 
669
 
670
+ return {
671
+ "final_result": result,
672
+ "final_sentiment": result['final_sentiment'],
673
+ "final_confidence": result['confidence'],
674
+ "reasoning": result['reasoning'],
675
+ "action_recommendation": result['action_recommendation'],
676
+ "conflicts_found": result['conflicts_found'],
677
+ "validation_notes": result['validation_notes'],
678
+ "needs_human_review": result['needs_human_review'],
679
+ "stage3_completed": True,
680
+ "stage3_time": stage3_time,
681
+ "total_time": total_time,
682
+ "processing_completed_at": datetime.now().isoformat(),
683
+ "errors": state.get('errors', [])
684
+ }
685
+
686
+
687
+ if __name__ == "__main__":
688
+ print("\nβœ… LangGraph nodes module loaded!")
689
+ print(" Nodes available:")
690
+ print(" - stage1_classification_node (parallel LLM1+LLM2)")
691
+ print(" - stage2_sentiment_node (parallel Best+Alt)")
692
+ print(" - stage3_finalization_node (LLM3)")