from __future__ import annotations import sys from pathlib import Path from fastapi import FastAPI, Request from fastapi.exceptions import RequestValidationError from fastapi.responses import JSONResponse, RedirectResponse from starlette.exceptions import HTTPException as StarletteHTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.staticfiles import StaticFiles _pkg_root = Path(__file__).resolve().parents[1] if str(_pkg_root) not in sys.path: sys.path.insert(0, str(_pkg_root)) from .api import router as base_router from .config import load_config from .errors import ApiException, ERROR_CODE_INVALID_TARGET, ERROR_CODE_PARSE, ERROR_SPECS from .logging_setup import configure_logging from .raw_cleanup import run_raw_snapshot_cleanup_once, start_raw_snapshot_cleanup_worker from .rate_limit import PerIPRateLimitMiddleware from .runner import TaskRunner from .response import ApiResponse from contextlib import asynccontextmanager import os from .proxy_pool import proxy_pool from .storage import LocalJsonStorage def create_app() -> FastAPI: config = load_config() @asynccontextmanager async def lifespan(app: FastAPI): # Check for XHS_STORAGE_STATE_JSON secret and write to file storage_state_content = os.getenv("XHS_STORAGE_STATE_JSON") if storage_state_content: state_path = _pkg_root / "storage" / "state.json" state_path.parent.mkdir(parents=True, exist_ok=True) with open(state_path, "w", encoding="utf-8") as f: f.write(storage_state_content) print(f"Loaded storage state from secret into {state_path}") configure_logging(app.state.config.storage_dir) storage = LocalJsonStorage(app.state.config.storage_dir) storage.init() run_raw_snapshot_cleanup_once( storage=storage, retention_days=int(getattr(app.state.config, "raw_data_retention_days", 7) or 7), ) start_raw_snapshot_cleanup_worker( storage=storage, retention_days=int(getattr(app.state.config, "raw_data_retention_days", 7) or 7), ) runner = TaskRunner(storage=storage, config=app.state.config) app.state.storage = storage app.state.runner = runner await proxy_pool.start() yield await proxy_pool.stop() app = FastAPI(title="Spider_XHS Service", lifespan=lifespan) app.state.config = config app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) app.add_middleware( PerIPRateLimitMiddleware, capacity=100, window_seconds=60, enable_legacy_routes=config.enable_legacy_routes, ) app.include_router(base_router, prefix="/api/v1") if config.enable_legacy_routes: app.include_router(base_router) # Serve built frontend static files frontend_dist = _pkg_root / "frontend" / "dist" if frontend_dist.exists() and frontend_dist.is_dir(): app.mount("/", StaticFiles(directory=str(frontend_dist), html=True), name="frontend") else: @app.get("/", include_in_schema=False) def root_redirect(): return RedirectResponse(url="/docs") @app.exception_handler(ApiException) async def _handle_api_exception(_: Request, exc: ApiException) -> JSONResponse: payload = ApiResponse(code=exc.code, msg=exc.msg, data=exc.data).model_dump() return JSONResponse(status_code=exc.http_status, content=payload) @app.exception_handler(RequestValidationError) async def _handle_validation_error(_: Request, exc: RequestValidationError) -> JSONResponse: payload = ApiResponse(code=ERROR_CODE_INVALID_TARGET, msg="invalid target", data=exc.errors()).model_dump() return JSONResponse(status_code=400, content=payload) @app.exception_handler(StarletteHTTPException) async def _handle_http_exception(_: Request, exc: StarletteHTTPException) -> JSONResponse: status = int(getattr(exc, "status_code", 500) or 500) detail = getattr(exc, "detail", None) if status in (401, 403): spec = ERROR_SPECS["auth"] payload = ApiResponse(code=spec.code, msg=str(detail or spec.default_msg), data=None).model_dump() return JSONResponse(status_code=spec.http_status, content=payload) if status == 429: spec = ERROR_SPECS["rate"] payload = ApiResponse(code=spec.code, msg=str(detail or spec.default_msg), data=None).model_dump() return JSONResponse(status_code=spec.http_status, content=payload) if status == 409: spec = ERROR_SPECS["task_not_ready"] payload = ApiResponse(code=spec.code, msg=str(detail or spec.default_msg), data=None).model_dump() return JSONResponse(status_code=spec.http_status, content=payload) if status == 404: spec = ERROR_SPECS["invalid_target"] payload = ApiResponse(code=spec.code, msg="not found", data={"detail": detail}).model_dump() return JSONResponse(status_code=404, content=payload) if status in (400, 405, 422): spec = ERROR_SPECS["invalid_target"] payload = ApiResponse(code=spec.code, msg=str(detail or spec.default_msg), data={"detail": detail}).model_dump() return JSONResponse(status_code=400, content=payload) payload = ApiResponse(code=ERROR_CODE_PARSE, msg=str(detail or "internal error"), data={"detail": detail}).model_dump() return JSONResponse(status_code=status if status >= 400 else 500, content=payload) @app.exception_handler(Exception) async def _handle_unexpected_exception(_: Request, exc: Exception) -> JSONResponse: payload = ApiResponse(code=ERROR_CODE_PARSE, msg="internal error", data={"error": str(exc)}).model_dump() return JSONResponse(status_code=500, content=payload) return app app = create_app() if __name__ == "__main__": import uvicorn uvicorn.run("Spider_XHS.service.app:app", host="0.0.0.0", port=8000)