XHS / service /app.py
Trae Bot
Support XHS_STORAGE_STATE_JSON secret injection
26f14ba
from __future__ import annotations
import sys
from pathlib import Path
from fastapi import FastAPI, Request
from fastapi.exceptions import RequestValidationError
from fastapi.responses import JSONResponse, RedirectResponse
from starlette.exceptions import HTTPException as StarletteHTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
_pkg_root = Path(__file__).resolve().parents[1]
if str(_pkg_root) not in sys.path:
sys.path.insert(0, str(_pkg_root))
from .api import router as base_router
from .config import load_config
from .errors import ApiException, ERROR_CODE_INVALID_TARGET, ERROR_CODE_PARSE, ERROR_SPECS
from .logging_setup import configure_logging
from .raw_cleanup import run_raw_snapshot_cleanup_once, start_raw_snapshot_cleanup_worker
from .rate_limit import PerIPRateLimitMiddleware
from .runner import TaskRunner
from .response import ApiResponse
from contextlib import asynccontextmanager
import os
from .proxy_pool import proxy_pool
from .storage import LocalJsonStorage
def create_app() -> FastAPI:
config = load_config()
@asynccontextmanager
async def lifespan(app: FastAPI):
# Check for XHS_STORAGE_STATE_JSON secret and write to file
storage_state_content = os.getenv("XHS_STORAGE_STATE_JSON")
if storage_state_content:
state_path = _pkg_root / "storage" / "state.json"
state_path.parent.mkdir(parents=True, exist_ok=True)
with open(state_path, "w", encoding="utf-8") as f:
f.write(storage_state_content)
print(f"Loaded storage state from secret into {state_path}")
configure_logging(app.state.config.storage_dir)
storage = LocalJsonStorage(app.state.config.storage_dir)
storage.init()
run_raw_snapshot_cleanup_once(
storage=storage,
retention_days=int(getattr(app.state.config, "raw_data_retention_days", 7) or 7),
)
start_raw_snapshot_cleanup_worker(
storage=storage,
retention_days=int(getattr(app.state.config, "raw_data_retention_days", 7) or 7),
)
runner = TaskRunner(storage=storage, config=app.state.config)
app.state.storage = storage
app.state.runner = runner
await proxy_pool.start()
yield
await proxy_pool.stop()
app = FastAPI(title="Spider_XHS Service", lifespan=lifespan)
app.state.config = config
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
app.add_middleware(
PerIPRateLimitMiddleware,
capacity=100,
window_seconds=60,
enable_legacy_routes=config.enable_legacy_routes,
)
app.include_router(base_router, prefix="/api/v1")
if config.enable_legacy_routes:
app.include_router(base_router)
# Serve built frontend static files
frontend_dist = _pkg_root / "frontend" / "dist"
if frontend_dist.exists() and frontend_dist.is_dir():
app.mount("/", StaticFiles(directory=str(frontend_dist), html=True), name="frontend")
else:
@app.get("/", include_in_schema=False)
def root_redirect():
return RedirectResponse(url="/docs")
@app.exception_handler(ApiException)
async def _handle_api_exception(_: Request, exc: ApiException) -> JSONResponse:
payload = ApiResponse(code=exc.code, msg=exc.msg, data=exc.data).model_dump()
return JSONResponse(status_code=exc.http_status, content=payload)
@app.exception_handler(RequestValidationError)
async def _handle_validation_error(_: Request, exc: RequestValidationError) -> JSONResponse:
payload = ApiResponse(code=ERROR_CODE_INVALID_TARGET, msg="invalid target", data=exc.errors()).model_dump()
return JSONResponse(status_code=400, content=payload)
@app.exception_handler(StarletteHTTPException)
async def _handle_http_exception(_: Request, exc: StarletteHTTPException) -> JSONResponse:
status = int(getattr(exc, "status_code", 500) or 500)
detail = getattr(exc, "detail", None)
if status in (401, 403):
spec = ERROR_SPECS["auth"]
payload = ApiResponse(code=spec.code, msg=str(detail or spec.default_msg), data=None).model_dump()
return JSONResponse(status_code=spec.http_status, content=payload)
if status == 429:
spec = ERROR_SPECS["rate"]
payload = ApiResponse(code=spec.code, msg=str(detail or spec.default_msg), data=None).model_dump()
return JSONResponse(status_code=spec.http_status, content=payload)
if status == 409:
spec = ERROR_SPECS["task_not_ready"]
payload = ApiResponse(code=spec.code, msg=str(detail or spec.default_msg), data=None).model_dump()
return JSONResponse(status_code=spec.http_status, content=payload)
if status == 404:
spec = ERROR_SPECS["invalid_target"]
payload = ApiResponse(code=spec.code, msg="not found", data={"detail": detail}).model_dump()
return JSONResponse(status_code=404, content=payload)
if status in (400, 405, 422):
spec = ERROR_SPECS["invalid_target"]
payload = ApiResponse(code=spec.code, msg=str(detail or spec.default_msg), data={"detail": detail}).model_dump()
return JSONResponse(status_code=400, content=payload)
payload = ApiResponse(code=ERROR_CODE_PARSE, msg=str(detail or "internal error"), data={"detail": detail}).model_dump()
return JSONResponse(status_code=status if status >= 400 else 500, content=payload)
@app.exception_handler(Exception)
async def _handle_unexpected_exception(_: Request, exc: Exception) -> JSONResponse:
payload = ApiResponse(code=ERROR_CODE_PARSE, msg="internal error", data={"error": str(exc)}).model_dump()
return JSONResponse(status_code=500, content=payload)
return app
app = create_app()
if __name__ == "__main__":
import uvicorn
uvicorn.run("Spider_XHS.service.app:app", host="0.0.0.0", port=8000)