File size: 3,307 Bytes
702687b
 
 
 
 
ad7cdc3
e681fab
702687b
e681fab
702687b
 
e681fab
702687b
 
 
 
 
 
 
 
e681fab
 
 
 
 
 
 
702687b
 
ad7cdc3
 
702687b
 
 
 
 
 
 
 
 
 
 
 
 
e681fab
702687b
 
 
 
 
ad7cdc3
 
 
 
ec60df8
ad7cdc3
 
 
702687b
 
ad7cdc3
 
 
 
 
702687b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e681fab
 
 
ad7cdc3
702687b
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import asyncio
import json
import logging
from datetime import datetime, timedelta, timezone

from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from playwright.async_api import async_playwright

logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
log = logging.getLogger("trustpilot-proxy")

TRUSTPILOT_URL = "https://www.trustpilot.com/review/collectionsdarchitectes.fr"
REFRESH_INTERVAL = timedelta(hours=48)
USER_AGENT = (
    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
    "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
)

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["https://collectionsdarchitectes.fr"],
    allow_methods=["GET"],
    allow_headers=["*"],
)

state = {"data": None, "fetched_at": None, "error": None}
_lock = asyncio.Lock()


async def scrape_once() -> dict:
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        ctx = await browser.new_context(
            user_agent=USER_AGENT,
            locale="fr-FR",
            viewport={"width": 1280, "height": 900},
        )
        page = await ctx.new_page()
        await page.goto(TRUSTPILOT_URL, wait_until="domcontentloaded", timeout=60_000)
        await page.wait_for_selector("script#__NEXT_DATA__", state="attached", timeout=45_000)
        raw = await page.locator("script#__NEXT_DATA__").inner_text()
        await browser.close()

    next_data = json.loads(raw)
    props = next_data.get("props", {}).get("pageProps", {})
    biz = props.get("businessUnit", {}) or {}
    reviews = props.get("reviews", []) or []
    return {
        "score": biz.get("trustScore"),
        "total": biz.get("numberOfReviews"),
        "reviews": [
            {
                "id": r.get("id"),
                "stars": r.get("rating"),
                "title": r.get("title"),
                "text": r.get("text"),
                "author": (r.get("consumer") or {}).get("displayName"),
                "date": (r.get("dates") or {}).get("publishedDate"),
            }
            for r in reviews
        ],
    }


async def refresh() -> None:
    async with _lock:
        try:
            data = await scrape_once()
            state["data"] = data
            state["fetched_at"] = datetime.now(timezone.utc)
            state["error"] = None
            log.info("refreshed: score=%s total=%s reviews=%d",
                     data.get("score"), data.get("total"), len(data.get("reviews", [])))
        except Exception as e:
            log.exception("scrape failed")
            state["error"] = f"{type(e).__name__}: {e}"


async def refresh_loop() -> None:
    while True:
        await refresh()
        await asyncio.sleep(REFRESH_INTERVAL.total_seconds())


@app.on_event("startup")
async def _startup() -> None:
    asyncio.create_task(refresh_loop())


@app.get("/reviews")
async def get_reviews():
    if state["data"] is None:
        return {"error": state["error"] or "warming up"}
    return state["data"]


@app.get("/health")
async def health():
    return {
        "has_data": state["data"] is not None,
        "fetched_at": state["fetched_at"].isoformat() if state["fetched_at"] else None,
        "error": state["error"],
    }