Spaces:

riazmo
/

CxSentimentAnalysisAI

Sleeping

App Files Files Community

riazmo commited on Jan 1

Commit

868df9f

verified ·

1 Parent(s): 1da16aa

Update langgraph_nodes.py

Browse files

Files changed (1) hide show

langgraph_nodes.py +152 -174

langgraph_nodes.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
-LangGraph Nodes - LAZY LOADING VERSION
-Initializes HF client when needed, not at module import
 """
 import os
@@ -19,7 +19,6 @@ from langgraph_state import ReviewState, BatchState
 from database_enhanced import EnhancedDatabase
 # FIXED: Don't initialize client at module import
-# Initialize LAZILY when first needed
 _hf_client = None
 def get_hf_client():
@@ -33,7 +32,6 @@ def get_hf_client():
     HF_TOKEN = os.getenv("HUGGINGFACE_API_KEY")
     if not HF_TOKEN or HF_TOKEN.strip() == "":
-        # No token available
         return None
     # Initialize client with token
@@ -42,7 +40,7 @@ def get_hf_client():
     return _hf_client
-# Initialize sentiment models (singleton) - load once
 _sentiment_models_loaded = False
 _best_tokenizer = None
 _best_model = None
@@ -63,11 +61,11 @@ def load_sentiment_models():
     _best_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
     _best_model.eval()
-    # Alternate Model - FIXED: Proper loading
     _alt_tokenizer = AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")
     _alt_model = AutoModelForSequenceClassification.from_pretrained(
         "finiteautomata/bertweet-base-sentiment-analysis",
-        torch_dtype=torch.float32  # FIXED: Explicit dtype to avoid meta tensors
     )
     _alt_model.eval()
@@ -76,13 +74,12 @@ def load_sentiment_models():
 # ============================================================================
-# STAGE 1: CLASSIFICATION NODE (Parallel LLM1 + LLM2)
 # ============================================================================
 def llm1_classify(review: Dict[str, Any]) -> Dict[str, Any]:
     """LLM1: Type, Department, Priority classification"""
-    # FIXED: Get client lazily
     hf_client = get_hf_client()
     if hf_client is None:
@@ -98,67 +95,59 @@ def llm1_classify(review: Dict[str, Any]) -> Dict[str, Any]:
     review_text = review.get('review_text', '')
     rating = review.get('rating', 3)
-    prompt = f"""You are an expert at classifying customer reviews for theme park and attraction apps.
-REVIEW:
-Rating: {rating}/5
-Text: {review_text}
-Classify this review across these dimensions:
-1. TYPE (choose ONE):
-   - complaint: Customer reports a problem
-   - praise: Customer expresses satisfaction
-   - suggestion: Customer proposes improvement
-   - question: Customer asks about something
-   - bug_report: Technical issue described
-2. DEPARTMENT (choose ONE):
-   - engineering: Technical issues, bugs, crashes
-   - ux: Design, usability, interface issues
-   - support: Customer service, help needed
-   - business: Pricing, policies, marketing
-3. PRIORITY (choose ONE):
-   - critical: Service down, major blocker
-   - high: Significant problem affecting use
-   - medium: Inconvenience but not blocking
-   - low: Minor issue or suggestion
-4. CONFIDENCE (0.0-1.0): How confident are you?
 5. REASONING: Brief one-sentence explanation
 Respond ONLY in valid JSON format:
-{{
   "type": "complaint/praise/suggestion/question/bug_report",
   "department": "engineering/ux/support/business",
   "priority": "critical/high/medium/low",
   "confidence": 0.0-1.0,
   "reasoning": "brief explanation"
-}}"""
     try:
         print(f"   🔍 Calling Qwen API...")
-        response = hf_client.text_generation(
-            prompt,
             model="Qwen/Qwen2.5-72B-Instruct",
-            max_new_tokens=200,
             temperature=0.1
         )
-        print(f"   ✅ Got response ({len(response)} chars)")
         # Clean and parse JSON
-        response_clean = response.strip()
-        if response_clean.startswith('```'):
-            response_clean = response_clean.split('```')[1]
-            if response_clean.startswith('json'):
-                response_clean = response_clean[4:]
-        response_clean = response_clean.strip()
-        result = json.loads(response_clean)
         result['model'] = 'Qwen/Qwen2.5-72B-Instruct'
         print(f"   ✅ Parsed: {result['type']} → {result['department']}")
@@ -180,7 +169,6 @@ Respond ONLY in valid JSON format:
 def llm2_analyze(review: Dict[str, Any]) -> Dict[str, Any]:
     """LLM2: User type, Emotion, Context analysis"""
-    # FIXED: Get client lazily
     hf_client = get_hf_client()
     if hf_client is None:
@@ -196,64 +184,57 @@ def llm2_analyze(review: Dict[str, Any]) -> Dict[str, Any]:
     review_text = review.get('review_text', '')
     rating = review.get('rating', 3)
-    prompt = f"""You are an expert at understanding customer psychology and emotional context.
-REVIEW:
-Rating: {rating}/5
-Text: {review_text}
-Analyze the user and emotional context:
-1. USER_TYPE (choose ONE):
-   - new_user: First-time or new user
-   - regular_user: Returning customer
-   - power_user: Heavy user, tech-savvy
-   - churning_user: Considering leaving
-2. EMOTION (choose ONE):
-   - anger: Angry, hostile tone
-   - frustration: Frustrated but not angry
-   - joy: Happy, satisfied
-   - satisfaction: Content, pleased
-   - disappointment: Let down, sad
-   - confusion: Unclear, needs help
-3. CONTEXT (brief): What is the underlying issue? 1-2 words
-4. CONFIDENCE (0.0-1.0): How confident are you?
-5. REASONING: Brief one-sentence explanation
 Respond ONLY in valid JSON format:
-{{
   "user_type": "new_user/regular_user/power_user/churning_user",
   "emotion": "anger/frustration/joy/satisfaction/disappointment/confusion",
   "context": "brief context",
   "confidence": 0.0-1.0,
   "reasoning": "brief explanation"
-}}"""
     try:
         print(f"   🔍 Calling Mistral API...")
-        response = hf_client.text_generation(
-            prompt,
             model="mistralai/Mistral-7B-Instruct-v0.3",
-            max_new_tokens=200,
             temperature=0.1
         )
-        print(f"   ✅ Got response ({len(response)} chars)")
         # Clean and parse JSON
-        response_clean = response.strip()
-        if response_clean.startswith('```'):
-            response_clean = response_clean.split('```')[1]
-            if response_clean.startswith('json'):
-                response_clean = response_clean[4:]
-        response_clean = response_clean.strip()
-        result = json.loads(response_clean)
         result['model'] = 'mistralai/Mistral-7B-Instruct-v0.3'
         print(f"   ✅ Parsed: {result['user_type']}, {result['emotion']}")
@@ -275,7 +256,6 @@ Respond ONLY in valid JSON format:
 def manager_synthesize(llm1_result: Dict, llm2_result: Dict, review: Dict) -> Dict[str, Any]:
     """Manager: Synthesize LLM1 and LLM2 results"""
-    # FIXED: Get client lazily
     hf_client = get_hf_client()
     if hf_client is None:
@@ -290,52 +270,60 @@ def manager_synthesize(llm1_result: Dict, llm2_result: Dict, review: Dict) -> Di
     review_text = review.get('review_text', '')
     rating = review.get('rating', 3)
-    prompt = f"""You are a synthesis manager evaluating two AI analyses of the same review.
-REVIEW:
-Rating: {rating}/5
-Text: {review_text}
-LLM1 ANALYSIS (Type/Dept/Priority):
-{json.dumps(llm1_result, indent=2)}
-LLM2 ANALYSIS (User/Emotion/Context):
-{json.dumps(llm2_result, indent=2)}
 Your task:
 1. Validate both analyses
-2. Resolve any conflicts
 3. Make final classification decision
 4. Provide synthesis reasoning
 Respond ONLY in valid JSON format:
-{{
   "final_type": "from llm1 or adjusted",
   "final_department": "from llm1 or adjusted",
   "final_priority": "from llm1 or adjusted",
-  "synthesis_reasoning": "brief explanation of synthesis"
-}}"""
     try:
         print(f"   🔍 Calling Llama Manager API...")
-        response = hf_client.text_generation(
-            prompt,
             model="meta-llama/Llama-3.3-70B-Instruct",
-            max_new_tokens=200,
             temperature=0.1
         )
-        print(f"   ✅ Got response ({len(response)} chars)")
-        response_clean = response.strip()
-        if response_clean.startswith('```'):
-            response_clean = response_clean.split('```')[1]
-            if response_clean.startswith('json'):
-                response_clean = response_clean[4:]
-        response_clean = response_clean.strip()
-        result = json.loads(response_clean)
         result['model'] = 'meta-llama/Llama-3.3-70B-Instruct'
         print(f"   ✅ Manager decision: {result['final_type']} → {result['final_department']}")
@@ -354,17 +342,14 @@ Respond ONLY in valid JSON format:
 def stage1_classification_node(state: ReviewState) -> Dict[str, Any]:
-    """
-    Stage 1 Node: Classification with PARALLEL execution
-    Runs LLM1 and LLM2 in parallel, then Manager synthesizes
-    """
     print(f"\n      📝 Review ID: {state['review_id']}")
     print(f"      ⏳ STAGE 1: Classification (Parallel LLM1 + LLM2)...")
     start_time = time.time()
     review_dict = dict(state)
-    # PARALLEL EXECUTION: LLM1 and LLM2 run simultaneously
     with ThreadPoolExecutor(max_workers=2) as executor:
         future_llm1 = executor.submit(llm1_classify, review_dict)
         future_llm2 = executor.submit(llm2_analyze, review_dict)
@@ -375,7 +360,7 @@ def stage1_classification_node(state: ReviewState) -> Dict[str, Any]:
     print(f"         ✅ LLM1: {llm1_result['type']} → {llm1_result['department']} (Priority: {llm1_result['priority']})")
     print(f"         ✅ LLM2: {llm2_result['user_type']}, {llm2_result['emotion']}")
-    # Manager synthesizes results
     print(f"         🤖 Manager synthesizing...")
     manager_result = manager_synthesize(llm1_result, llm2_result, review_dict)
@@ -439,7 +424,7 @@ def analyze_best_sentiment(text: str) -> Dict[str, Any]:
 def analyze_alt_sentiment(text: str) -> Dict[str, Any]:
-    """Alternate Model: BERTweet - FIXED version"""
     load_sentiment_models()
     try:
@@ -447,16 +432,7 @@ def analyze_alt_sentiment(text: str) -> Dict[str, Any]:
         with torch.no_grad():
             outputs = _alt_model(**inputs)
-            logits = outputs.logits
-            # FIXED: Check if logits are on meta device
-            if logits.device.type == 'meta':
-                print("⚠️ Warning: Model on meta device, moving to CPU")
-                _alt_model.to('cpu')
-                outputs = _alt_model(**inputs)
-                logits = outputs.logits
-            probs = torch.nn.functional.softmax(logits, dim=-1)
             prediction = torch.argmax(probs, dim=-1).item()
             confidence = probs[0][prediction].item()
@@ -515,16 +491,13 @@ def sentiment_layer(best_result: Dict, alt_result: Dict) -> Dict[str, Any]:
 def stage2_sentiment_node(state: ReviewState) -> Dict[str, Any]:
-    """
-    Stage 2 Node: Sentiment with PARALLEL execution
-    Runs Best and Alternate models in parallel, then combines
-    """
     print(f"\n      ⏳ STAGE 2: Sentiment Analysis (Parallel Best + Alternate)...")
     start_time = time.time()
     review_text = state['review_text']
-    # PARALLEL EXECUTION: Best and Alternate models run simultaneously
     with ThreadPoolExecutor(max_workers=2) as executor:
         future_best = executor.submit(analyze_best_sentiment, review_text)
         future_alt = executor.submit(analyze_alt_sentiment, review_text)
@@ -562,14 +535,11 @@ def stage2_sentiment_node(state: ReviewState) -> Dict[str, Any]:
 # ============================================================================
 def stage3_finalization_node(state: ReviewState) -> Dict[str, Any]:
-    """
-    Stage 3 Node: Final synthesis with LLM3 (Llama 70B)
-    """
     print(f"\n      ⏳ STAGE 3: Finalization (LLM3)...")
     start_time = time.time()
-    # FIXED: Get client lazily
     hf_client = get_hf_client()
     if hf_client is None:
@@ -608,9 +578,28 @@ def stage3_finalization_node(state: ReviewState) -> Dict[str, Any]:
     review_text = state['review_text']
     rating = state['rating']
-    prompt = f"""You are a final decision-making AI analyzing customer feedback for a theme park/attraction app.
-REVIEW DATA:
 Rating: {rating}/5
 Text: {review_text}
@@ -626,44 +615,33 @@ STAGE 2 SENTIMENT:
 - Alternate: {state['alt_sentiment_result'].get('sentiment')} ({state['alt_sentiment_result'].get('confidence'):.2f})
 - Agreement: {state.get('sentiment_agreement')}
-YOUR TASK:
-1. Review all data from both stages
-2. Make FINAL sentiment decision
-3. Provide comprehensive reasoning
-4. Generate action recommendation
-5. Flag if human review needed
-Respond ONLY in valid JSON format:
-{{
-  "final_sentiment": "POSITIVE/NEGATIVE/NEUTRAL",
-  "confidence": 0.0-1.0,
-  "reasoning": "Comprehensive explanation",
-  "validation_notes": "Does classification match sentiment?",
-  "conflicts_found": "any conflicts or 'none'",
-  "action_recommendation": "Specific action",
-  "needs_human_review": true/false
-}}"""
     try:
         print(f"   🔍 Calling Llama 70B API...")
-        response = hf_client.text_generation(
-            prompt,
             model="meta-llama/Llama-3.1-70B-Instruct",
-            max_new_tokens=400,
             temperature=0.1
         )
-        print(f"   ✅ Got response ({len(response)} chars)")
-        response_clean = response.strip()
-        if response_clean.startswith('```'):
-            response_clean = response_clean.split('```')[1]
-            if response_clean.startswith('json'):
-                response_clean = response_clean[4:]
-        response_clean = response_clean.strip()
-        result = json.loads(response_clean)
         result['model'] = 'meta-llama/Llama-3.1-70B-Instruct'
     except Exception as e:

 """
+LangGraph Nodes - FINAL WORKING VERSION
+Uses chat_completion() API format + Lazy loading + Fixed alt sentiment
 """
 import os
 from database_enhanced import EnhancedDatabase
 # FIXED: Don't initialize client at module import
 _hf_client = None
 def get_hf_client():
     HF_TOKEN = os.getenv("HUGGINGFACE_API_KEY")
     if not HF_TOKEN or HF_TOKEN.strip() == "":
         return None
     # Initialize client with token
     return _hf_client
+# Initialize sentiment models (singleton)
 _sentiment_models_loaded = False
 _best_tokenizer = None
 _best_model = None
     _best_model = AutoModelForSequenceClassification.from_pretrained("cardiffnlp/twitter-roberta-base-sentiment-latest")
     _best_model.eval()
+    # Alternate Model - FIXED: Load with low_cpu_mem_usage to avoid meta tensors
     _alt_tokenizer = AutoTokenizer.from_pretrained("finiteautomata/bertweet-base-sentiment-analysis")
     _alt_model = AutoModelForSequenceClassification.from_pretrained(
         "finiteautomata/bertweet-base-sentiment-analysis",
+        low_cpu_mem_usage=False  # FIXED: Don't use meta device
     )
     _alt_model.eval()
 # ============================================================================
+# STAGE 1: CLASSIFICATION NODE
 # ============================================================================
 def llm1_classify(review: Dict[str, Any]) -> Dict[str, Any]:
     """LLM1: Type, Department, Priority classification"""
     hf_client = get_hf_client()
     if hf_client is None:
     review_text = review.get('review_text', '')
     rating = review.get('rating', 3)
+    # FIXED: Use chat format with system + user messages
+    system_prompt = """You are an expert at classifying customer reviews for theme park and attraction apps.
+Classify reviews across these dimensions:
+1. TYPE: complaint, praise, suggestion, question, or bug_report
+2. DEPARTMENT: engineering, ux, support, or business
+3. PRIORITY: critical, high, medium, or low
+4. CONFIDENCE: 0.0-1.0
 5. REASONING: Brief one-sentence explanation
 Respond ONLY in valid JSON format:
+{
   "type": "complaint/praise/suggestion/question/bug_report",
   "department": "engineering/ux/support/business",
   "priority": "critical/high/medium/low",
   "confidence": 0.0-1.0,
   "reasoning": "brief explanation"
+}"""
+    user_prompt = f"""REVIEW:
+Rating: {rating}/5
+Text: {review_text}
+Classify this review:"""
     try:
         print(f"   🔍 Calling Qwen API...")
+        # FIXED: Use chat_completion instead of text_generation
+        response = hf_client.chat_completion(
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt}
+            ],
             model="Qwen/Qwen2.5-72B-Instruct",
+            max_tokens=200,
             temperature=0.1
         )
+        # Extract content from chat response
+        content = response.choices[0].message.content
+        print(f"   ✅ Got response ({len(content)} chars)")
         # Clean and parse JSON
+        content_clean = content.strip()
+        if content_clean.startswith('```'):
+            content_clean = content_clean.split('```')[1]
+            if content_clean.startswith('json'):
+                content_clean = content_clean[4:]
+        content_clean = content_clean.strip()
+        result = json.loads(content_clean)
         result['model'] = 'Qwen/Qwen2.5-72B-Instruct'
         print(f"   ✅ Parsed: {result['type']} → {result['department']}")
 def llm2_analyze(review: Dict[str, Any]) -> Dict[str, Any]:
     """LLM2: User type, Emotion, Context analysis"""
     hf_client = get_hf_client()
     if hf_client is None:
     review_text = review.get('review_text', '')
     rating = review.get('rating', 3)
+    # FIXED: Use chat format
+    system_prompt = """You are an expert at understanding customer psychology and emotional context.
+Analyze reviews for:
+1. USER_TYPE: new_user, regular_user, power_user, or churning_user
+2. EMOTION: anger, frustration, joy, satisfaction, disappointment, or confusion
+3. CONTEXT: Brief context (1-2 words)
+4. CONFIDENCE: 0.0-1.0
+5. REASONING: Brief explanation
 Respond ONLY in valid JSON format:
+{
   "user_type": "new_user/regular_user/power_user/churning_user",
   "emotion": "anger/frustration/joy/satisfaction/disappointment/confusion",
   "context": "brief context",
   "confidence": 0.0-1.0,
   "reasoning": "brief explanation"
+}"""
+    user_prompt = f"""REVIEW:
+Rating: {rating}/5
+Text: {review_text}
+Analyze this review:"""
     try:
         print(f"   🔍 Calling Mistral API...")
+        # FIXED: Use chat_completion
+        response = hf_client.chat_completion(
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt}
+            ],
             model="mistralai/Mistral-7B-Instruct-v0.3",
+            max_tokens=200,
             temperature=0.1
         )
+        content = response.choices[0].message.content
+        print(f"   ✅ Got response ({len(content)} chars)")
         # Clean and parse JSON
+        content_clean = content.strip()
+        if content_clean.startswith('```'):
+            content_clean = content_clean.split('```')[1]
+            if content_clean.startswith('json'):
+                content_clean = content_clean[4:]
+        content_clean = content_clean.strip()
+        result = json.loads(content_clean)
         result['model'] = 'mistralai/Mistral-7B-Instruct-v0.3'
         print(f"   ✅ Parsed: {result['user_type']}, {result['emotion']}")
 def manager_synthesize(llm1_result: Dict, llm2_result: Dict, review: Dict) -> Dict[str, Any]:
     """Manager: Synthesize LLM1 and LLM2 results"""
     hf_client = get_hf_client()
     if hf_client is None:
     review_text = review.get('review_text', '')
     rating = review.get('rating', 3)
+    # FIXED: Use chat format
+    system_prompt = """You are a synthesis manager evaluating two AI analyses.
 Your task:
 1. Validate both analyses
+2. Resolve conflicts
 3. Make final classification decision
 4. Provide synthesis reasoning
 Respond ONLY in valid JSON format:
+{
   "final_type": "from llm1 or adjusted",
   "final_department": "from llm1 or adjusted",
   "final_priority": "from llm1 or adjusted",
+  "synthesis_reasoning": "brief explanation"
+}"""
+    user_prompt = f"""REVIEW:
+Rating: {rating}/5
+Text: {review_text}
+LLM1 ANALYSIS (Type/Dept/Priority):
+{json.dumps(llm1_result, indent=2)}
+LLM2 ANALYSIS (User/Emotion/Context):
+{json.dumps(llm2_result, indent=2)}
+Synthesize these analyses:"""
     try:
         print(f"   🔍 Calling Llama Manager API...")
+        # FIXED: Use chat_completion
+        response = hf_client.chat_completion(
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt}
+            ],
             model="meta-llama/Llama-3.3-70B-Instruct",
+            max_tokens=200,
             temperature=0.1
         )
+        content = response.choices[0].message.content
+        print(f"   ✅ Got response ({len(content)} chars)")
+        content_clean = content.strip()
+        if content_clean.startswith('```'):
+            content_clean = content_clean.split('```')[1]
+            if content_clean.startswith('json'):
+                content_clean = content_clean[4:]
+        content_clean = content_clean.strip()
+        result = json.loads(content_clean)
         result['model'] = 'meta-llama/Llama-3.3-70B-Instruct'
         print(f"   ✅ Manager decision: {result['final_type']} → {result['final_department']}")
 def stage1_classification_node(state: ReviewState) -> Dict[str, Any]:
+    """Stage 1 Node: Classification with PARALLEL execution"""
     print(f"\n      📝 Review ID: {state['review_id']}")
     print(f"      ⏳ STAGE 1: Classification (Parallel LLM1 + LLM2)...")
     start_time = time.time()
     review_dict = dict(state)
+    # PARALLEL EXECUTION
     with ThreadPoolExecutor(max_workers=2) as executor:
         future_llm1 = executor.submit(llm1_classify, review_dict)
         future_llm2 = executor.submit(llm2_analyze, review_dict)
     print(f"         ✅ LLM1: {llm1_result['type']} → {llm1_result['department']} (Priority: {llm1_result['priority']})")
     print(f"         ✅ LLM2: {llm2_result['user_type']}, {llm2_result['emotion']}")
+    # Manager synthesizes
     print(f"         🤖 Manager synthesizing...")
     manager_result = manager_synthesize(llm1_result, llm2_result, review_dict)
 def analyze_alt_sentiment(text: str) -> Dict[str, Any]:
+    """Alternate Model: BERTweet - FIXED"""
     load_sentiment_models()
     try:
         with torch.no_grad():
             outputs = _alt_model(**inputs)
+            probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
             prediction = torch.argmax(probs, dim=-1).item()
             confidence = probs[0][prediction].item()
 def stage2_sentiment_node(state: ReviewState) -> Dict[str, Any]:
+    """Stage 2 Node: Sentiment with PARALLEL execution"""
     print(f"\n      ⏳ STAGE 2: Sentiment Analysis (Parallel Best + Alternate)...")
     start_time = time.time()
     review_text = state['review_text']
+    # PARALLEL EXECUTION
     with ThreadPoolExecutor(max_workers=2) as executor:
         future_best = executor.submit(analyze_best_sentiment, review_text)
         future_alt = executor.submit(analyze_alt_sentiment, review_text)
 # ============================================================================
 def stage3_finalization_node(state: ReviewState) -> Dict[str, Any]:
+    """Stage 3 Node: Final synthesis with LLM3"""
     print(f"\n      ⏳ STAGE 3: Finalization (LLM3)...")
     start_time = time.time()
     hf_client = get_hf_client()
     if hf_client is None:
     review_text = state['review_text']
     rating = state['rating']
+    # FIXED: Use chat format
+    system_prompt = """You are a final decision-making AI analyzing customer feedback for a theme park/attraction app.
+Your task:
+1. Review all data from previous stages
+2. Make FINAL sentiment decision
+3. Provide comprehensive reasoning
+4. Generate action recommendation
+5. Flag if human review needed
+Respond ONLY in valid JSON format:
+{
+  "final_sentiment": "POSITIVE/NEGATIVE/NEUTRAL",
+  "confidence": 0.0-1.0,
+  "reasoning": "Comprehensive explanation",
+  "validation_notes": "Does classification match sentiment?",
+  "conflicts_found": "any conflicts or 'none'",
+  "action_recommendation": "Specific action",
+  "needs_human_review": true/false
+}"""
+    user_prompt = f"""REVIEW DATA:
 Rating: {rating}/5
 Text: {review_text}
 - Alternate: {state['alt_sentiment_result'].get('sentiment')} ({state['alt_sentiment_result'].get('confidence'):.2f})
 - Agreement: {state.get('sentiment_agreement')}
+Make your final decision:"""
     try:
         print(f"   🔍 Calling Llama 70B API...")
+        # FIXED: Use chat_completion
+        response = hf_client.chat_completion(
+            messages=[
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt}
+            ],
             model="meta-llama/Llama-3.1-70B-Instruct",
+            max_tokens=400,
             temperature=0.1
         )
+        content = response.choices[0].message.content
+        print(f"   ✅ Got response ({len(content)} chars)")
+        content_clean = content.strip()
+        if content_clean.startswith('```'):
+            content_clean = content_clean.split('```')[1]
+            if content_clean.startswith('json'):
+                content_clean = content_clean[4:]
+        content_clean = content_clean.strip()
+        result = json.loads(content_clean)
         result['model'] = 'meta-llama/Llama-3.1-70B-Instruct'
     except Exception as e: