import asyncio import json import logging from datetime import datetime, timedelta, timezone from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from playwright.async_api import async_playwright logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s") log = logging.getLogger("trustpilot-proxy") TRUSTPILOT_URL = "https://www.trustpilot.com/review/collectionsdarchitectes.fr" REFRESH_INTERVAL = timedelta(hours=48) USER_AGENT = ( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" ) app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["https://collectionsdarchitectes.fr"], allow_methods=["GET"], allow_headers=["*"], ) state = {"data": None, "fetched_at": None, "error": None} _lock = asyncio.Lock() async def scrape_once() -> dict: async with async_playwright() as p: browser = await p.chromium.launch(headless=True) ctx = await browser.new_context( user_agent=USER_AGENT, locale="fr-FR", viewport={"width": 1280, "height": 900}, ) page = await ctx.new_page() await page.goto(TRUSTPILOT_URL, wait_until="domcontentloaded", timeout=60_000) await page.wait_for_selector("script#__NEXT_DATA__", state="attached", timeout=45_000) raw = await page.locator("script#__NEXT_DATA__").inner_text() await browser.close() next_data = json.loads(raw) props = next_data.get("props", {}).get("pageProps", {}) biz = props.get("businessUnit", {}) or {} reviews = props.get("reviews", []) or [] return { "score": biz.get("trustScore"), "total": biz.get("numberOfReviews"), "reviews": [ { "id": r.get("id"), "stars": r.get("rating"), "title": r.get("title"), "text": r.get("text"), "author": (r.get("consumer") or {}).get("displayName"), "date": (r.get("dates") or {}).get("publishedDate"), } for r in reviews ], } async def refresh() -> None: async with _lock: try: data = await scrape_once() state["data"] = data state["fetched_at"] = datetime.now(timezone.utc) state["error"] = None log.info("refreshed: score=%s total=%s reviews=%d", data.get("score"), data.get("total"), len(data.get("reviews", []))) except Exception as e: log.exception("scrape failed") state["error"] = f"{type(e).__name__}: {e}" async def refresh_loop() -> None: while True: await refresh() await asyncio.sleep(REFRESH_INTERVAL.total_seconds()) @app.on_event("startup") async def _startup() -> None: asyncio.create_task(refresh_loop()) @app.get("/reviews") async def get_reviews(): if state["data"] is None: return {"error": state["error"] or "warming up"} return state["data"] @app.get("/health") async def health(): return { "has_data": state["data"] is not None, "fetched_at": state["fetched_at"].isoformat() if state["fetched_at"] else None, "error": state["error"], }