Spaces:
Sleeping
Sleeping
| """ | |
| Menu Discovery Module - FIXED for large review sets | |
| Processes reviews in batches with retry logic | |
| """ | |
| from typing import List, Dict, Any, Optional | |
| from anthropic import Anthropic | |
| import json | |
| import os | |
| import sys | |
| # Add project root | |
| project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| if project_root not in sys.path: | |
| sys.path.insert(0, project_root) | |
| from src.agent.api_utils import call_claude_with_retry | |
| class MenuDiscovery: | |
| """ | |
| Discovers menu items and drinks from reviews using AI. | |
| Handles large review sets by batching. | |
| """ | |
| def __init__(self, client: Anthropic, model: str): | |
| """Initialize menu discovery.""" | |
| self.client = client | |
| self.model = model | |
| def extract_menu_items( | |
| self, | |
| reviews: List[str], | |
| restaurant_name: str = "the restaurant", | |
| max_items: int = 50, | |
| batch_size: int = 15 | |
| ) -> Dict[str, Any]: | |
| """Extract menu items in batches to handle large review sets.""" | |
| print(f"🔍 Processing {len(reviews)} reviews in batches of {batch_size}...") | |
| all_food_items = {} | |
| all_drinks = {} | |
| # Process in batches | |
| for i in range(0, len(reviews), batch_size): | |
| batch = reviews[i:i+batch_size] | |
| batch_num = (i // batch_size) + 1 | |
| total_batches = (len(reviews) + batch_size - 1) // batch_size | |
| print(f" Batch {batch_num}/{total_batches}: {len(batch)} reviews...") | |
| try: | |
| batch_result = self._extract_batch(batch, restaurant_name, max_items) | |
| # Merge results | |
| for item in batch_result.get('food_items', []): | |
| name = item['name'] | |
| if name in all_food_items: | |
| all_food_items[name]['mention_count'] += item['mention_count'] | |
| all_food_items[name]['related_reviews'].extend(item.get('related_reviews', [])) | |
| old_sent = all_food_items[name]['sentiment'] | |
| new_sent = item['sentiment'] | |
| all_food_items[name]['sentiment'] = (old_sent + new_sent) / 2 | |
| else: | |
| all_food_items[name] = item | |
| for drink in batch_result.get('drinks', []): | |
| name = drink['name'] | |
| if name in all_drinks: | |
| all_drinks[name]['mention_count'] += drink['mention_count'] | |
| all_drinks[name]['related_reviews'].extend(drink.get('related_reviews', [])) | |
| old_sent = all_drinks[name]['sentiment'] | |
| new_sent = drink['sentiment'] | |
| all_drinks[name]['sentiment'] = (old_sent + new_sent) / 2 | |
| else: | |
| all_drinks[name] = drink | |
| except Exception as e: | |
| print(f" ⚠️ Batch {batch_num} failed: {e}") | |
| continue | |
| # Convert back to lists | |
| food_items_list = list(all_food_items.values()) | |
| drinks_list = list(all_drinks.values()) | |
| # Sort by mention count | |
| food_items_list.sort(key=lambda x: x['mention_count'], reverse=True) | |
| drinks_list.sort(key=lambda x: x['mention_count'], reverse=True) | |
| # Limit results | |
| food_items_list = food_items_list[:max_items] | |
| drinks_list = drinks_list[:max_items] | |
| print(f"✅ Discovered {len(food_items_list)} food items + {len(drinks_list)} drinks") | |
| return { | |
| "food_items": food_items_list, | |
| "drinks": drinks_list, | |
| "total_extracted": len(food_items_list) + len(drinks_list) | |
| } | |
| def _extract_batch( | |
| self, | |
| reviews: List[str], | |
| restaurant_name: str, | |
| max_items: int | |
| ) -> Dict[str, Any]: | |
| """Extract from a single batch with retry logic.""" | |
| prompt = self._build_extraction_prompt(reviews, restaurant_name, max_items) | |
| try: | |
| response = call_claude_with_retry( | |
| client=self.client, | |
| model=self.model, | |
| max_tokens=4000, | |
| temperature=0.3, | |
| messages=[{"role": "user", "content": prompt}] | |
| ) | |
| result_text = response.content[0].text | |
| result_text = result_text.replace('```json', '').replace('```', '').strip() | |
| extracted_data = json.loads(result_text) | |
| extracted_data = self._normalize_items(extracted_data) | |
| return extracted_data | |
| except json.JSONDecodeError as e: | |
| print(f"❌ Failed to parse menu items: {e}") | |
| return {"food_items": [], "drinks": [], "total_extracted": 0} | |
| except Exception as e: | |
| print(f"❌ Error extracting menu items: {e}") | |
| return {"food_items": [], "drinks": [], "total_extracted": 0} | |
| def _normalize_items(self, data: Dict[str, Any]) -> Dict[str, Any]: | |
| """Normalize item names to lowercase.""" | |
| for item in data.get('food_items', []): | |
| if 'name' in item: | |
| item['name'] = item['name'].lower() | |
| for drink in data.get('drinks', []): | |
| if 'name' in drink: | |
| drink['name'] = drink['name'].lower() | |
| return data | |
| def generate_item_summary( | |
| self, | |
| item: Dict[str, Any], | |
| restaurant_name: str = "the restaurant" | |
| ) -> str: | |
| """Generate 2-3 sentence summary for a menu item.""" | |
| item_name = item.get('name', 'unknown') | |
| sentiment = item.get('sentiment', 0) | |
| related_reviews = item.get('related_reviews', []) | |
| if not related_reviews: | |
| return f"No specific feedback found for {item_name}." | |
| review_texts = [r.get('review_text', '') for r in related_reviews[:10]] | |
| reviews_combined = "\n\n".join(review_texts) | |
| prompt = f"""Summarize customer feedback about "{item_name}" at {restaurant_name}. | |
| REVIEWS MENTIONING THIS ITEM: | |
| {reviews_combined} | |
| TASK: | |
| Create a 2-3 sentence summary of what customers say about {item_name}. | |
| - Overall sentiment: {sentiment:+.2f} ({self._sentiment_label(sentiment)}) | |
| - Be specific and evidence-based | |
| - Mention common praise points | |
| - Mention concerns if any | |
| - Keep it concise (2-3 sentences max) | |
| Summary:""" | |
| try: | |
| response = call_claude_with_retry( | |
| client=self.client, | |
| model=self.model, | |
| max_tokens=200, | |
| temperature=0.4, | |
| messages=[{"role": "user", "content": prompt}] | |
| ) | |
| return response.content[0].text.strip() | |
| except Exception as e: | |
| print(f"❌ Error generating summary: {e}") | |
| return f"Unable to generate summary for {item_name}." | |
| def _sentiment_label(self, sentiment: float) -> str: | |
| """Convert sentiment score to label.""" | |
| if sentiment >= 0.7: | |
| return "Very Positive" | |
| elif sentiment >= 0.3: | |
| return "Positive" | |
| elif sentiment >= 0: | |
| return "Mixed" | |
| elif sentiment >= -0.3: | |
| return "Negative" | |
| else: | |
| return "Very Negative" | |
| def _build_extraction_prompt( | |
| self, | |
| reviews: List[str], | |
| restaurant_name: str, | |
| max_items: int | |
| ) -> str: | |
| """Build menu extraction prompt.""" | |
| numbered_reviews = [] | |
| for i, review in enumerate(reviews): | |
| numbered_reviews.append(f"[Review {i}]: {review}") | |
| reviews_text = "\n\n".join(numbered_reviews) | |
| prompt = f"""You are analyzing customer reviews for {restaurant_name} to discover SPECIFIC menu items and drinks WITH SENTIMENT. | |
| REVIEWS (numbered for reference): | |
| {reviews_text} | |
| YOUR TASK: | |
| 1. Extract SPECIFIC food items and drinks | |
| 2. Calculate sentiment for each | |
| 3. IDENTIFY WHICH REVIEWS mention each item (use review numbers!) | |
| CRITICAL RULES: | |
| 1. GRANULARITY: | |
| - Keep items SEPARATE: "salmon sushi" ≠ "salmon roll" ≠ "salmon nigiri" | |
| - Use LOWERCASE for all item names | |
| 2. SENTIMENT ANALYSIS: | |
| - Calculate sentiment from context where item is mentioned | |
| - Score: -1.0 (very negative) to +1.0 (very positive) | |
| 3. FOOD vs DRINKS: | |
| - Separate food from drinks | |
| 4. REVIEW EXTRACTION: | |
| - For EACH item, identify which reviews mention it | |
| - Use review numbers | |
| - Include full review text | |
| 5. FILTER NOISE: | |
| - ❌ Skip: "food", "meal" | |
| - ✅ Only: SPECIFIC menu items | |
| OUTPUT FORMAT (JSON): | |
| {{ | |
| "food_items": [ | |
| {{ | |
| "name": "item name in lowercase", | |
| "mention_count": number, | |
| "sentiment": float, | |
| "category": "appetizer/entree/dessert/etc", | |
| "related_reviews": [ | |
| {{ | |
| "review_index": 0, | |
| "review_text": "full review text", | |
| "sentiment_context": "quote" | |
| }} | |
| ] | |
| }} | |
| ], | |
| "drinks": [...same structure...], | |
| "total_extracted": total_count | |
| }} | |
| Extract ALL items (up to {max_items}):""" | |
| return prompt | |