TushP commited on
Commit
37e60ef
·
verified ·
1 Parent(s): df41fce

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. modal_backend.py +210 -4
modal_backend.py CHANGED
@@ -312,6 +312,148 @@ def _fallback_insights(role: str) -> Dict[str, Any]:
312
  }
313
 
314
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  # ============================================================================
316
  # MAIN ANALYSIS FUNCTION - PARALLEL OPTIMIZED
317
  # ============================================================================
@@ -361,11 +503,35 @@ def full_analysis_parallel(url: str, max_reviews: int = 100) -> Dict[str, Any]:
361
 
362
  print(f"✅ Scraping complete in {time.time() - scrape_start:.1f}s")
363
 
364
- # Process reviews
365
- from src.data_processing import process_reviews, clean_reviews_for_ai
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
- df = process_reviews(result)
368
- reviews = clean_reviews_for_ai(df["review_text"].tolist(), verbose=False)
 
 
 
 
 
369
 
370
  print(f"📊 Total reviews: {len(reviews)}")
371
 
@@ -481,6 +647,46 @@ def full_analysis_parallel(url: str, max_reviews: int = 100) -> Dict[str, Any]:
481
 
482
  print(f"📊 Discovered: {len(food_list)} food + {len(drinks_list)} drinks + {len(aspects_list)} aspects")
483
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
  # Build analysis data
485
  analysis_data = {
486
  "menu_analysis": {
 
312
  }
313
 
314
 
315
+ # ============================================================================
316
+ # SUMMARY GENERATION - Single API call for ALL summaries (like original)
317
+ # ============================================================================
318
+
319
+ @app.function(
320
+ image=image,
321
+ secrets=[modal.Secret.from_name("anthropic-api-key")],
322
+ timeout=120,
323
+ )
324
+ def generate_all_summaries(
325
+ food_items: List[Dict[str, Any]],
326
+ drinks: List[Dict[str, Any]],
327
+ aspects: List[Dict[str, Any]],
328
+ restaurant_name: str
329
+ ) -> Dict[str, Dict[str, str]]:
330
+ """
331
+ Generate ALL summaries in a SINGLE API call.
332
+
333
+ This matches the original batch_generate_summaries() approach:
334
+ - 1 API call for everything (not 4-5 separate calls)
335
+ - Same cost as before
336
+ - Same quality summaries
337
+
338
+ Returns:
339
+ {"food": {"item_name": "summary"}, "drinks": {...}, "aspects": {...}}
340
+ """
341
+ from anthropic import Anthropic
342
+ import os
343
+ import re
344
+
345
+ client = Anthropic(api_key=os.environ["ANTHROPIC_API_KEY"])
346
+
347
+ # Build compact data for prompt (top items only)
348
+ food_data = []
349
+ for f in food_items[:15]:
350
+ reviews_sample = []
351
+ for r in f.get('related_reviews', [])[:2]:
352
+ if isinstance(r, dict):
353
+ text = r.get('review_text', '')[:150]
354
+ else:
355
+ text = str(r)[:150]
356
+ if text:
357
+ reviews_sample.append(text)
358
+ food_data.append({
359
+ 'name': f.get('name', 'unknown'),
360
+ 'sentiment': f.get('sentiment', 0),
361
+ 'mentions': f.get('mention_count', 0),
362
+ 'reviews': reviews_sample
363
+ })
364
+
365
+ drink_data = []
366
+ for d in drinks[:10]:
367
+ reviews_sample = []
368
+ for r in d.get('related_reviews', [])[:2]:
369
+ if isinstance(r, dict):
370
+ text = r.get('review_text', '')[:150]
371
+ else:
372
+ text = str(r)[:150]
373
+ if text:
374
+ reviews_sample.append(text)
375
+ drink_data.append({
376
+ 'name': d.get('name', 'unknown'),
377
+ 'sentiment': d.get('sentiment', 0),
378
+ 'mentions': d.get('mention_count', 0),
379
+ 'reviews': reviews_sample
380
+ })
381
+
382
+ aspect_data = []
383
+ for a in aspects[:15]:
384
+ reviews_sample = []
385
+ for r in a.get('related_reviews', [])[:2]:
386
+ if isinstance(r, dict):
387
+ text = r.get('review_text', '')[:150]
388
+ else:
389
+ text = str(r)[:150]
390
+ if text:
391
+ reviews_sample.append(text)
392
+ aspect_data.append({
393
+ 'name': a.get('name', 'unknown'),
394
+ 'sentiment': a.get('sentiment', 0),
395
+ 'mentions': a.get('mention_count', 0),
396
+ 'reviews': reviews_sample
397
+ })
398
+
399
+ prompt = f"""You are a restaurant review analyst for {restaurant_name}. Generate brief, specific summaries for each item.
400
+
401
+ FOOD ITEMS:
402
+ {json.dumps(food_data, indent=2)}
403
+
404
+ DRINKS:
405
+ {json.dumps(drink_data, indent=2)}
406
+
407
+ ASPECTS:
408
+ {json.dumps(aspect_data, indent=2)}
409
+
410
+ For EACH item, write a 2-3 sentence summary that:
411
+ 1. Synthesizes what customers say (use the sample reviews provided)
412
+ 2. Reflects the sentiment score (positive if >= 0.6, negative if < 0, neutral otherwise)
413
+ 3. Gives actionable insight for restaurant staff
414
+
415
+ OUTPUT FORMAT (JSON):
416
+ {{
417
+ "food": {{
418
+ "item name": "2-3 sentence summary based on reviews...",
419
+ "another item": "summary..."
420
+ }},
421
+ "drinks": {{
422
+ "drink name": "summary..."
423
+ }},
424
+ "aspects": {{
425
+ "aspect name": "summary..."
426
+ }}
427
+ }}
428
+
429
+ CRITICAL: Output ONLY valid JSON. Generate summaries for ALL items listed above."""
430
+
431
+ try:
432
+ response = client.messages.create(
433
+ model="claude-sonnet-4-20250514",
434
+ max_tokens=4000,
435
+ temperature=0.4,
436
+ messages=[{"role": "user", "content": prompt}]
437
+ )
438
+
439
+ result_text = response.content[0].text.strip()
440
+ result_text = result_text.replace('```json', '').replace('```', '').strip()
441
+
442
+ # Parse JSON
443
+ match = re.search(r'\{[\s\S]*\}', result_text)
444
+ if match:
445
+ summaries = json.loads(match.group())
446
+ print(f"✅ Generated summaries: {len(summaries.get('food', {}))} food, {len(summaries.get('drinks', {}))} drinks, {len(summaries.get('aspects', {}))} aspects")
447
+ return summaries
448
+ else:
449
+ print("⚠️ No JSON found in summary response")
450
+ return {"food": {}, "drinks": {}, "aspects": {}}
451
+
452
+ except Exception as e:
453
+ print(f"⚠️ Summary generation error: {e}")
454
+ return {"food": {}, "drinks": {}, "aspects": {}}
455
+
456
+
457
  # ============================================================================
458
  # MAIN ANALYSIS FUNCTION - PARALLEL OPTIMIZED
459
  # ============================================================================
 
503
 
504
  print(f"✅ Scraping complete in {time.time() - scrape_start:.1f}s")
505
 
506
+ # Process reviews - FIXED: Handle both old and new scraper formats
507
+ from src.data_processing import clean_reviews_for_ai
508
+ import pandas as pd
509
+
510
+ # The scraper returns data at top level, not nested under 'reviews'
511
+ # Build DataFrame directly from scraper result
512
+ if 'names' in result:
513
+ # New format: data at top level
514
+ df = pd.DataFrame({
515
+ 'name': result.get('names', []),
516
+ 'date': result.get('dates', []),
517
+ 'overall_rating': result.get('overall_ratings', []),
518
+ 'food_rating': result.get('food_ratings', []),
519
+ 'service_rating': result.get('service_ratings', []),
520
+ 'ambience_rating': result.get('ambience_ratings', []),
521
+ 'review_text': result.get('reviews', [])
522
+ })
523
+ else:
524
+ # Fallback: try old format with process_reviews
525
+ from src.data_processing import process_reviews
526
+ df = process_reviews(result)
527
 
528
+ # Convert ratings to numeric
529
+ for col in ['overall_rating', 'food_rating', 'service_rating', 'ambience_rating']:
530
+ if col in df.columns:
531
+ df[col] = pd.to_numeric(df[col], errors='coerce')
532
+
533
+ # Get clean review texts
534
+ reviews = clean_reviews_for_ai(df["review_text"].dropna().tolist(), verbose=False)
535
 
536
  print(f"📊 Total reviews: {len(reviews)}")
537
 
 
647
 
648
  print(f"📊 Discovered: {len(food_list)} food + {len(drinks_list)} drinks + {len(aspects_list)} aspects")
649
 
650
+ # Phase 2.5: Generate ALL summaries in ONE API call (like original)
651
+ print("📝 Phase 2.5: Generating summaries (single API call)...")
652
+ summary_start = time.time()
653
+
654
+ # Call the single summary function
655
+ summaries = generate_all_summaries.remote(
656
+ food_items=food_list[:15],
657
+ drinks=drinks_list[:10],
658
+ aspects=aspects_list[:15],
659
+ restaurant_name=restaurant_name
660
+ )
661
+
662
+ # Apply summaries to items
663
+ food_summaries = summaries.get('food', {})
664
+ drink_summaries = summaries.get('drinks', {})
665
+ aspect_summaries = summaries.get('aspects', {})
666
+
667
+ for item in food_list:
668
+ name = item.get('name', '').lower()
669
+ if name in food_summaries:
670
+ item['summary'] = food_summaries[name]
671
+ elif name.title() in food_summaries:
672
+ item['summary'] = food_summaries[name.title()]
673
+
674
+ for item in drinks_list:
675
+ name = item.get('name', '').lower()
676
+ if name in drink_summaries:
677
+ item['summary'] = drink_summaries[name]
678
+ elif name.title() in drink_summaries:
679
+ item['summary'] = drink_summaries[name.title()]
680
+
681
+ for item in aspects_list:
682
+ name = item.get('name', '').lower()
683
+ if name in aspect_summaries:
684
+ item['summary'] = aspect_summaries[name]
685
+ elif name.title() in aspect_summaries:
686
+ item['summary'] = aspect_summaries[name.title()]
687
+
688
+ print(f"✅ Summaries complete in {time.time() - summary_start:.1f}s")
689
+
690
  # Build analysis data
691
  analysis_data = {
692
  "menu_analysis": {