Spaces:
Sleeping
Sleeping
File size: 12,544 Bytes
bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f bb9baa9 7dab48f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 | """
Unified Review Analyzer - Single-pass extraction
Extracts menu items, aspects, and sentiment in ONE API call per batch
UPDATED: New sentiment scale
- Positive: >= 0.6
- Neutral: 0 to 0.59
- Negative: < 0
"""
from typing import List, Dict, Any
from anthropic import Anthropic
import json
import sys
import os
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
if project_root not in sys.path:
sys.path.insert(0, project_root)
from src.agent.api_utils import call_claude_with_retry
class UnifiedReviewAnalyzer:
"""
Analyzes reviews in a SINGLE PASS to extract:
- Menu items (food + drinks)
- Customer aspects (service, ambience, etc.)
- Sentiment for each
Reduces API calls by 3x compared to separate extraction!
"""
def __init__(self, client: Anthropic, model: str):
self.client = client
self.model = model
def analyze_reviews(
self,
reviews: List[str],
restaurant_name: str = "the restaurant",
batch_size: int = 20
) -> Dict[str, Any]:
"""
Single-pass analysis of all reviews.
Returns:
{
"menu_analysis": {
"food_items": [...],
"drinks": [...]
},
"aspect_analysis": {
"aspects": [...]
}
}
"""
print(f"🚀 Unified analysis: {len(reviews)} reviews in batches of {batch_size}...")
all_food_items = {}
all_drinks = {}
all_aspects = {}
# Process in batches
for i in range(0, len(reviews), batch_size):
batch = reviews[i:i+batch_size]
batch_num = (i // batch_size) + 1
total_batches = (len(reviews) + batch_size - 1) // batch_size
print(f" Batch {batch_num}/{total_batches}: {len(batch)} reviews...")
try:
batch_result = self._analyze_batch(batch, restaurant_name, start_index=i)
# Merge food items
for item in batch_result.get('food_items', []):
name = item.get('name', '').lower()
if not name:
continue
if name in all_food_items:
all_food_items[name]['mention_count'] += item.get('mention_count', 1)
all_food_items[name]['related_reviews'].extend(item.get('related_reviews', []))
# Average sentiment
old_sent = all_food_items[name]['sentiment']
new_sent = item.get('sentiment', 0)
old_count = all_food_items[name]['mention_count'] - item.get('mention_count', 1)
new_count = item.get('mention_count', 1)
all_food_items[name]['sentiment'] = (old_sent * old_count + new_sent * new_count) / (old_count + new_count)
else:
all_food_items[name] = item
# Merge drinks
for item in batch_result.get('drinks', []):
name = item.get('name', '').lower()
if not name:
continue
if name in all_drinks:
all_drinks[name]['mention_count'] += item.get('mention_count', 1)
all_drinks[name]['related_reviews'].extend(item.get('related_reviews', []))
old_sent = all_drinks[name]['sentiment']
new_sent = item.get('sentiment', 0)
old_count = all_drinks[name]['mention_count'] - item.get('mention_count', 1)
new_count = item.get('mention_count', 1)
all_drinks[name]['sentiment'] = (old_sent * old_count + new_sent * new_count) / (old_count + new_count)
else:
all_drinks[name] = item
# Merge aspects
for aspect in batch_result.get('aspects', []):
name = aspect.get('name', '').lower()
if not name:
continue
if name in all_aspects:
all_aspects[name]['mention_count'] += aspect.get('mention_count', 1)
all_aspects[name]['related_reviews'].extend(aspect.get('related_reviews', []))
old_sent = all_aspects[name]['sentiment']
new_sent = aspect.get('sentiment', 0)
old_count = all_aspects[name]['mention_count'] - aspect.get('mention_count', 1)
new_count = aspect.get('mention_count', 1)
all_aspects[name]['sentiment'] = (old_sent * old_count + new_sent * new_count) / (old_count + new_count)
else:
all_aspects[name] = aspect
except Exception as e:
print(f" ⚠️ Batch {batch_num} error: {e}")
continue
# Convert to lists and sort by mention count
food_list = sorted(all_food_items.values(), key=lambda x: x.get('mention_count', 0), reverse=True)
drinks_list = sorted(all_drinks.values(), key=lambda x: x.get('mention_count', 0), reverse=True)
aspects_list = sorted(all_aspects.values(), key=lambda x: x.get('mention_count', 0), reverse=True)
print(f"✅ Discovered: {len(food_list)} food + {len(drinks_list)} drinks + {len(aspects_list)} aspects")
return {
"menu_analysis": {
"food_items": food_list,
"drinks": drinks_list
},
"aspect_analysis": {
"aspects": aspects_list
}
}
def _analyze_batch(
self,
reviews: List[str],
restaurant_name: str,
start_index: int = 0
) -> Dict[str, Any]:
"""Analyze a single batch of reviews."""
prompt = self._build_unified_prompt(reviews, restaurant_name, start_index)
try:
response = call_claude_with_retry(
client=self.client,
model=self.model,
max_tokens=4000,
temperature=0.3,
messages=[{"role": "user", "content": prompt}]
)
result_text = response.content[0].text
result_text = result_text.replace('```json', '').replace('```', '').strip()
# Parse JSON
try:
data = json.loads(result_text)
except json.JSONDecodeError as e:
print(f" ⚠️ JSON parse error: {e}")
return {"food_items": [], "drinks": [], "aspects": []}
# Post-process: Add full review text back using indices
data = self._map_reviews_to_items(data, reviews, start_index)
data = self._normalize_data(data)
return data
except Exception as e:
print(f"❌ Extraction error: {e}")
return {"food_items": [], "drinks": [], "aspects": []}
def _build_unified_prompt(
self,
reviews: List[str],
restaurant_name: str,
start_index: int
) -> str:
"""Build unified extraction prompt with NEW SENTIMENT SCALE."""
numbered_reviews = []
for i, review in enumerate(reviews):
numbered_reviews.append(f"[Review {i}]: {review}")
reviews_text = "\n\n".join(numbered_reviews)
prompt = f"""You are analyzing customer reviews for {restaurant_name}. Extract BOTH menu items AND aspects in ONE PASS.
REVIEWS:
{reviews_text}
YOUR TASK - Extract THREE things simultaneously:
1. **MENU ITEMS** (food & drinks mentioned)
2. **ASPECTS** (what customers care about: service, ambience, etc.)
3. **SENTIMENT** for each
SENTIMENT SCALE (IMPORTANT):
- **Positive (0.6 to 1.0):** Customer clearly enjoyed/praised this item or aspect
- **Neutral (0.0 to 0.59):** Mixed feelings, okay but not exceptional, or simply mentioned without strong opinion
- **Negative (-1.0 to -0.01):** Customer complained, criticized, or expressed disappointment
Examples:
- "The pasta was absolutely divine!" → 0.85 (Positive)
- "The pasta was decent, nothing special" → 0.3 (Neutral)
- "The pasta was undercooked and bland" → -0.6 (Negative)
RULES:
**MENU ITEMS:**
- Specific items only: "salmon sushi", "miso soup", "sake"
- Separate food from drinks
- Lowercase names
- Calculate sentiment per item using the scale above
**ASPECTS:**
- What customers discuss: "service speed", "food quality", "ambience", "value"
- Be specific: "service speed" not just "service"
- Cuisine-specific welcome: "freshness", "authenticity", "presentation"
- Lowercase names
- Calculate sentiment per aspect using the scale above
**REVIEW LINKING:**
- For EACH item/aspect, list which review NUMBERS mention it
- Use ONLY the review index numbers: 0, 1, 2, etc.
- DO NOT include review text in your response (saves tokens and prevents JSON errors)
OUTPUT (JSON) - IMPORTANT: Return ONLY review indices, NOT full text:
{{
"food_items": [
{{
"name": "salmon aburi sushi",
"mention_count": 2,
"sentiment": 0.85,
"category": "sushi",
"related_reviews": [0, 5]
}}
],
"drinks": [
{{
"name": "sake",
"mention_count": 1,
"sentiment": 0.7,
"category": "alcohol",
"related_reviews": [3]
}}
],
"aspects": [
{{
"name": "service speed",
"mention_count": 3,
"sentiment": 0.65,
"description": "How quickly food arrives",
"related_reviews": [1, 2, 7]
}}
]
}}
CRITICAL:
- related_reviews should be an array of NUMBERS ONLY: [0, 1, 5]
- DO NOT include review text or quotes
- This prevents JSON parsing errors and saves tokens
- Output ONLY valid JSON, no other text
- Use the sentiment scale: >= 0.6 positive, 0-0.59 neutral, < 0 negative
Extract everything:"""
return prompt
def _map_reviews_to_items(
self,
data: Dict[str, Any],
reviews: List[str],
start_index: int
) -> Dict[str, Any]:
"""
Map review indices back to full review text.
Claude returns just indices to avoid JSON breaking.
We add the full text back here.
"""
for item in data.get('food_items', []):
indices = item.get('related_reviews', [])
item['related_reviews'] = []
for idx in indices:
if isinstance(idx, int) and 0 <= idx < len(reviews):
item['related_reviews'].append({
'review_index': start_index + idx,
'review_text': reviews[idx]
})
for item in data.get('drinks', []):
indices = item.get('related_reviews', [])
item['related_reviews'] = []
for idx in indices:
if isinstance(idx, int) and 0 <= idx < len(reviews):
item['related_reviews'].append({
'review_index': start_index + idx,
'review_text': reviews[idx]
})
for aspect in data.get('aspects', []):
indices = aspect.get('related_reviews', [])
aspect['related_reviews'] = []
for idx in indices:
if isinstance(idx, int) and 0 <= idx < len(reviews):
aspect['related_reviews'].append({
'review_index': start_index + idx,
'review_text': reviews[idx]
})
return data
def _normalize_data(self, data: Dict[str, Any]) -> Dict[str, Any]:
"""Normalize names to lowercase."""
for item in data.get('food_items', []):
if 'name' in item:
item['name'] = item['name'].lower()
for drink in data.get('drinks', []):
if 'name' in drink:
drink['name'] = drink['name'].lower()
for aspect in data.get('aspects', []):
if 'name' in aspect:
aspect['name'] = aspect['name'].lower()
return data |