Spaces:
Sleeping
Sleeping
File size: 4,089 Bytes
bb9baa9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 | """
Integration Script: Scraper β Agent β Analysis
Wires together the complete pipeline.
"""
import pandas as pd
from src.scrapers.opentable_scraper import scrape_opentable
from src.data_processing import process_reviews
from src.agent.base_agent import RestaurantAnalysisAgent
from src.data_processing import clean_reviews_for_ai
print("=" * 80)
print("π₯ COMPLETE PIPELINE: Scraper β Agent β Analysis")
print("=" * 80 + "\n")
# Step 1: Scrape reviews
print("π₯ Step 1: Scraping OpenTable...")
url = "https://www.opentable.ca/r/nightingale-vancouver?originId=a3c30a8e-25aa-43b9-9f09-9f0980f22365&corrid=a3c30a8e-25aa-43b9-9f09-9f0980f22365&avt=eyJ2IjoyLCJtIjoxLCJwIjowLCJzIjoxLCJuIjowfQ"
restaurant_name = "Nightingale"
scraper_result = scrape_opentable(url, max_reviews=50, headless=True)
if not scraper_result['success']:
print(f"β Scraping failed: {scraper_result.get('error')}")
exit(1)
print(f"β
Scraped {scraper_result['total_reviews']} reviews\n")
# Step 2: Process to DataFrame
print("βοΈ Step 2: Processing data...")
df = process_reviews(scraper_result)
print(f"β
Processed {len(df)} reviews into DataFrame\n")
# Step 3: Convert to format agents expect (List[str])
print("π Step 3: Converting to agent format...")
review_texts = df['review_text'].dropna().tolist()
review_texts = clean_reviews_for_ai(review_texts, verbose=True)
print(f"β
Converted to {len(review_texts)} review texts\n")
# Step 4: Initialize agent
print("π€ Step 4: Initializing Restaurant Analysis Agent...")
agent = RestaurantAnalysisAgent()
print("β
Agent initialized with all sub-agents\n")
# Step 5: Run complete analysis
print("π Step 5: Running complete analysis...")
print("-" * 80)
results = agent.analyze_restaurant(
restaurant_url=url,
restaurant_name=restaurant_name,
reviews=review_texts, # β Pass list of strings
review_count=str(len(review_texts))
)
print("\n" + "=" * 80)
print("π ANALYSIS RESULTS")
print("=" * 80 + "\n")
if results['success']:
print(f"β
Analysis completed successfully!\n")
# Menu analysis
menu_count = len(results['menu_analysis'].get('food_items', []))
drink_count = len(results['menu_analysis'].get('drinks', []))
print(f"π½οΈ Menu Items Discovered: {menu_count} food + {drink_count} drinks")
# Aspect analysis
aspect_count = len(results['aspect_analysis'].get('aspects', []))
print(f"π Aspects Discovered: {aspect_count}")
# Insights
print(f"\nπ‘ Insights Generated:")
print(f" β’ Chef insights: {len(results['insights']['chef'].get('recommendations', []))} recommendations")
print(f" β’ Manager insights: {len(results['insights']['manager'].get('recommendations', []))} recommendations")
# Step 6: Export everything
print("\n" + "=" * 80)
print("πΎ Step 6: Exporting results...")
print("=" * 80 + "\n")
# Save raw data
from src.data_processing import save_to_csv
save_to_csv(df, 'data/raw/miku_reviews.csv')
# Save analysis
saved_files = agent.export_analysis('outputs')
print("β
Saved analysis files:")
for key, path in saved_files.items():
print(f" β’ {key}: {path}")
# Step 7: Test MCP tools
print("\n" + "=" * 80)
print("π§ Step 7: Testing MCP Tools")
print("=" * 80 + "\n")
# Q&A
print("π€ Q&A Test:")
question = "What do customers say about the sushi?"
answer = agent.ask_question(question)
print(f" Q: {question}")
print(f" A: {answer[:200]}...\n")
# Save report
print("π Save Report Test:")
report_path = agent.save_analysis_report('reports')
print(f" β
Report saved to: {report_path}\n")
# Generate charts
print("π Generate Charts Test:")
charts = agent.generate_visualizations()
for chart_type, path in charts.items():
print(f" β
{chart_type}: {path}")
else:
print(f"β Analysis failed: {results.get('error')}")
print("\n" + "=" * 80)
print("π COMPLETE PIPELINE TEST FINISHED!")
print("=" * 80)
|