Spaces:

TushP
/

restaurant-intelligence-agent

Sleeping

App Files Files Community

TushP commited on Nov 27, 2025

Commit

37e60ef

verified ·

1 Parent(s): df41fce

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

modal_backend.py +210 -4

modal_backend.py CHANGED Viewed

@@ -312,6 +312,148 @@ def _fallback_insights(role: str) -> Dict[str, Any]:
     }
 # ============================================================================
 # MAIN ANALYSIS FUNCTION - PARALLEL OPTIMIZED
 # ============================================================================
@@ -361,11 +503,35 @@ def full_analysis_parallel(url: str, max_reviews: int = 100) -> Dict[str, Any]:
     print(f"✅ Scraping complete in {time.time() - scrape_start:.1f}s")
-    # Process reviews
-    from src.data_processing import process_reviews, clean_reviews_for_ai
-    df = process_reviews(result)
-    reviews = clean_reviews_for_ai(df["review_text"].tolist(), verbose=False)
     print(f"📊 Total reviews: {len(reviews)}")
@@ -481,6 +647,46 @@ def full_analysis_parallel(url: str, max_reviews: int = 100) -> Dict[str, Any]:
     print(f"📊 Discovered: {len(food_list)} food + {len(drinks_list)} drinks + {len(aspects_list)} aspects")
     # Build analysis data
     analysis_data = {
         "menu_analysis": {

     }
+# ============================================================================
+# SUMMARY GENERATION - Single API call for ALL summaries (like original)
+# ============================================================================
+@app.function(
+    image=image,
+    secrets=[modal.Secret.from_name("anthropic-api-key")],
+    timeout=120,
+)
+def generate_all_summaries(
+    food_items: List[Dict[str, Any]],
+    drinks: List[Dict[str, Any]],
+    aspects: List[Dict[str, Any]],
+    restaurant_name: str
+) -> Dict[str, Dict[str, str]]:
+    """
+    Generate ALL summaries in a SINGLE API call.
+    This matches the original batch_generate_summaries() approach:
+    - 1 API call for everything (not 4-5 separate calls)
+    - Same cost as before
+    - Same quality summaries
+    Returns:
+        {"food": {"item_name": "summary"}, "drinks": {...}, "aspects": {...}}
+    """
+    from anthropic import Anthropic
+    import os
+    import re
+    client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
+    # Build compact data for prompt (top items only)
+    food_data = []
+    for f in food_items[:15]:
+        reviews_sample = []
+        for r in f.get('related_reviews', [])[:2]:
+            if isinstance(r, dict):
+                text = r.get('review_text', '')[:150]
+            else:
+                text = str(r)[:150]
+            if text:
+                reviews_sample.append(text)
+        food_data.append({
+            'name': f.get('name', 'unknown'),
+            'sentiment': f.get('sentiment', 0),
+            'mentions': f.get('mention_count', 0),
+            'reviews': reviews_sample
+        })
+    drink_data = []
+    for d in drinks[:10]:
+        reviews_sample = []
+        for r in d.get('related_reviews', [])[:2]:
+            if isinstance(r, dict):
+                text = r.get('review_text', '')[:150]
+            else:
+                text = str(r)[:150]
+            if text:
+                reviews_sample.append(text)
+        drink_data.append({
+            'name': d.get('name', 'unknown'),
+            'sentiment': d.get('sentiment', 0),
+            'mentions': d.get('mention_count', 0),
+            'reviews': reviews_sample
+        })
+    aspect_data = []
+    for a in aspects[:15]:
+        reviews_sample = []
+        for r in a.get('related_reviews', [])[:2]:
+            if isinstance(r, dict):
+                text = r.get('review_text', '')[:150]
+            else:
+                text = str(r)[:150]
+            if text:
+                reviews_sample.append(text)
+        aspect_data.append({
+            'name': a.get('name', 'unknown'),
+            'sentiment': a.get('sentiment', 0),
+            'mentions': a.get('mention_count', 0),
+            'reviews': reviews_sample
+        })
+    prompt = f"""You are a restaurant review analyst for {restaurant_name}. Generate brief, specific summaries for each item.
+FOOD ITEMS:
+{json.dumps(food_data, indent=2)}
+DRINKS:
+{json.dumps(drink_data, indent=2)}
+ASPECTS:
+{json.dumps(aspect_data, indent=2)}
+For EACH item, write a 2-3 sentence summary that:
+1. Synthesizes what customers say (use the sample reviews provided)
+2. Reflects the sentiment score (positive if >= 0.6, negative if < 0, neutral otherwise)
+3. Gives actionable insight for restaurant staff
+OUTPUT FORMAT (JSON):
+{{
+  "food": {{
+    "item name": "2-3 sentence summary based on reviews...",
+    "another item": "summary..."
+  }},
+  "drinks": {{
+    "drink name": "summary..."
+  }},
+  "aspects": {{
+    "aspect name": "summary..."
+  }}
+}}
+CRITICAL: Output ONLY valid JSON. Generate summaries for ALL items listed above."""
+    try:
+        response = client.messages.create(
+            model="claude-sonnet-4-20250514",
+            max_tokens=4000,
+            temperature=0.4,
+            messages=[{"role": "user", "content": prompt}]
+        )
+        result_text = response.content[0].text.strip()
+        result_text = result_text.replace('```json', '').replace('```', '').strip()
+        # Parse JSON
+        match = re.search(r'\{[\s\S]*\}', result_text)
+        if match:
+            summaries = json.loads(match.group())
+            print(f"✅ Generated summaries: {len(summaries.get('food', {}))} food, {len(summaries.get('drinks', {}))} drinks, {len(summaries.get('aspects', {}))} aspects")
+            return summaries
+        else:
+            print("⚠️ No JSON found in summary response")
+            return {"food": {}, "drinks": {}, "aspects": {}}
+    except Exception as e:
+        print(f"⚠️ Summary generation error: {e}")
+        return {"food": {}, "drinks": {}, "aspects": {}}
 # ============================================================================
 # MAIN ANALYSIS FUNCTION - PARALLEL OPTIMIZED
 # ============================================================================
     print(f"✅ Scraping complete in {time.time() - scrape_start:.1f}s")
+    # Process reviews - FIXED: Handle both old and new scraper formats
+    from src.data_processing import clean_reviews_for_ai
+    import pandas as pd
+    # The scraper returns data at top level, not nested under 'reviews'
+    # Build DataFrame directly from scraper result
+    if 'names' in result:
+        # New format: data at top level
+        df = pd.DataFrame({
+            'name': result.get('names', []),
+            'date': result.get('dates', []),
+            'overall_rating': result.get('overall_ratings', []),
+            'food_rating': result.get('food_ratings', []),
+            'service_rating': result.get('service_ratings', []),
+            'ambience_rating': result.get('ambience_ratings', []),
+            'review_text': result.get('reviews', [])
+        })
+    else:
+        # Fallback: try old format with process_reviews
+        from src.data_processing import process_reviews
+        df = process_reviews(result)
+    # Convert ratings to numeric
+    for col in ['overall_rating', 'food_rating', 'service_rating', 'ambience_rating']:
+        if col in df.columns:
+            df[col] = pd.to_numeric(df[col], errors='coerce')
+    # Get clean review texts
+    reviews = clean_reviews_for_ai(df["review_text"].dropna().tolist(), verbose=False)
     print(f"📊 Total reviews: {len(reviews)}")
     print(f"📊 Discovered: {len(food_list)} food + {len(drinks_list)} drinks + {len(aspects_list)} aspects")
+    # Phase 2.5: Generate ALL summaries in ONE API call (like original)
+    print("📝 Phase 2.5: Generating summaries (single API call)...")
+    summary_start = time.time()
+    # Call the single summary function
+    summaries = generate_all_summaries.remote(
+        food_items=food_list[:15],
+        drinks=drinks_list[:10],
+        aspects=aspects_list[:15],
+        restaurant_name=restaurant_name
+    )
+    # Apply summaries to items
+    food_summaries = summaries.get('food', {})
+    drink_summaries = summaries.get('drinks', {})
+    aspect_summaries = summaries.get('aspects', {})
+    for item in food_list:
+        name = item.get('name', '').lower()
+        if name in food_summaries:
+            item['summary'] = food_summaries[name]
+        elif name.title() in food_summaries:
+            item['summary'] = food_summaries[name.title()]
+    for item in drinks_list:
+        name = item.get('name', '').lower()
+        if name in drink_summaries:
+            item['summary'] = drink_summaries[name]
+        elif name.title() in drink_summaries:
+            item['summary'] = drink_summaries[name.title()]
+    for item in aspects_list:
+        name = item.get('name', '').lower()
+        if name in aspect_summaries:
+            item['summary'] = aspect_summaries[name]
+        elif name.title() in aspect_summaries:
+            item['summary'] = aspect_summaries[name.title()]
+    print(f"✅ Summaries complete in {time.time() - summary_start:.1f}s")
     # Build analysis data
     analysis_data = {
         "menu_analysis": {