Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| import asyncio | |
| import json | |
| import logging | |
| from datetime import datetime, timedelta, timezone | |
| from fastapi import FastAPI | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from playwright.async_api import async_playwright | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") | |
| log = logging.getLogger("trustpilot-proxy") | |
| TRUSTPILOT_URL = "https://www.trustpilot.com/review/collectionsdarchitectes.fr" | |
| REFRESH_INTERVAL = timedelta(hours=48) | |
| USER_AGENT = ( | |
| "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " | |
| "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" | |
| ) | |
| app = FastAPI() | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["https://collectionsdarchitectes.fr"], | |
| allow_methods=["GET"], | |
| allow_headers=["*"], | |
| ) | |
| state = {"data": None, "fetched_at": None, "error": None} | |
| _lock = asyncio.Lock() | |
| async def scrape_once() -> dict: | |
| async with async_playwright() as p: | |
| browser = await p.chromium.launch(headless=True) | |
| ctx = await browser.new_context( | |
| user_agent=USER_AGENT, | |
| locale="fr-FR", | |
| viewport={"width": 1280, "height": 900}, | |
| ) | |
| page = await ctx.new_page() | |
| await page.goto(TRUSTPILOT_URL, wait_until="domcontentloaded", timeout=60_000) | |
| await page.wait_for_selector("script#__NEXT_DATA__", state="attached", timeout=45_000) | |
| raw = await page.locator("script#__NEXT_DATA__").inner_text() | |
| await browser.close() | |
| next_data = json.loads(raw) | |
| props = next_data.get("props", {}).get("pageProps", {}) | |
| biz = props.get("businessUnit", {}) or {} | |
| reviews = props.get("reviews", []) or [] | |
| return { | |
| "score": biz.get("trustScore"), | |
| "total": biz.get("numberOfReviews"), | |
| "reviews": [ | |
| { | |
| "id": r.get("id"), | |
| "stars": r.get("rating"), | |
| "title": r.get("title"), | |
| "text": r.get("text"), | |
| "author": (r.get("consumer") or {}).get("displayName"), | |
| "date": (r.get("dates") or {}).get("publishedDate"), | |
| } | |
| for r in reviews | |
| ], | |
| } | |
| async def refresh() -> None: | |
| async with _lock: | |
| try: | |
| data = await scrape_once() | |
| state["data"] = data | |
| state["fetched_at"] = datetime.now(timezone.utc) | |
| state["error"] = None | |
| log.info("refreshed: score=%s total=%s reviews=%d", | |
| data.get("score"), data.get("total"), len(data.get("reviews", []))) | |
| except Exception as e: | |
| log.exception("scrape failed") | |
| state["error"] = f"{type(e).__name__}: {e}" | |
| async def refresh_loop() -> None: | |
| while True: | |
| await refresh() | |
| await asyncio.sleep(REFRESH_INTERVAL.total_seconds()) | |
| async def _startup() -> None: | |
| asyncio.create_task(refresh_loop()) | |
| async def get_reviews(): | |
| if state["data"] is None: | |
| return {"error": state["error"] or "warming up"} | |
| return state["data"] | |
| async def health(): | |
| return { | |
| "has_data": state["data"] is not None, | |
| "fetched_at": state["fetched_at"].isoformat() if state["fetched_at"] else None, | |
| "error": state["error"], | |
| } | |