| from __future__ import annotations |
|
|
| import sys |
| from pathlib import Path |
|
|
| from fastapi import FastAPI, Request |
| from fastapi.exceptions import RequestValidationError |
| from fastapi.responses import JSONResponse, RedirectResponse |
| from starlette.exceptions import HTTPException as StarletteHTTPException |
| from fastapi.middleware.cors import CORSMiddleware |
| from fastapi.staticfiles import StaticFiles |
|
|
| _pkg_root = Path(__file__).resolve().parents[1] |
| if str(_pkg_root) not in sys.path: |
| sys.path.insert(0, str(_pkg_root)) |
|
|
| from .api import router as base_router |
| from .config import load_config |
| from .errors import ApiException, ERROR_CODE_INVALID_TARGET, ERROR_CODE_PARSE, ERROR_SPECS |
| from .logging_setup import configure_logging |
| from .raw_cleanup import run_raw_snapshot_cleanup_once, start_raw_snapshot_cleanup_worker |
| from .rate_limit import PerIPRateLimitMiddleware |
| from .runner import TaskRunner |
| from .response import ApiResponse |
| from contextlib import asynccontextmanager |
| import os |
|
|
| from .proxy_pool import proxy_pool |
| from .storage import LocalJsonStorage |
|
|
|
|
| def create_app() -> FastAPI: |
| config = load_config() |
|
|
| @asynccontextmanager |
| async def lifespan(app: FastAPI): |
| |
| storage_state_content = os.getenv("XHS_STORAGE_STATE_JSON") |
| if storage_state_content: |
| state_path = _pkg_root / "storage" / "state.json" |
| state_path.parent.mkdir(parents=True, exist_ok=True) |
| with open(state_path, "w", encoding="utf-8") as f: |
| f.write(storage_state_content) |
| print(f"Loaded storage state from secret into {state_path}") |
|
|
| configure_logging(app.state.config.storage_dir) |
| storage = LocalJsonStorage(app.state.config.storage_dir) |
| storage.init() |
| run_raw_snapshot_cleanup_once( |
| storage=storage, |
| retention_days=int(getattr(app.state.config, "raw_data_retention_days", 7) or 7), |
| ) |
| start_raw_snapshot_cleanup_worker( |
| storage=storage, |
| retention_days=int(getattr(app.state.config, "raw_data_retention_days", 7) or 7), |
| ) |
| runner = TaskRunner(storage=storage, config=app.state.config) |
| app.state.storage = storage |
| app.state.runner = runner |
|
|
| await proxy_pool.start() |
| yield |
| await proxy_pool.stop() |
|
|
| app = FastAPI(title="Spider_XHS Service", lifespan=lifespan) |
| app.state.config = config |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| app.add_middleware( |
| PerIPRateLimitMiddleware, |
| capacity=100, |
| window_seconds=60, |
| enable_legacy_routes=config.enable_legacy_routes, |
| ) |
|
|
| app.include_router(base_router, prefix="/api/v1") |
| if config.enable_legacy_routes: |
| app.include_router(base_router) |
|
|
| |
| frontend_dist = _pkg_root / "frontend" / "dist" |
| if frontend_dist.exists() and frontend_dist.is_dir(): |
| app.mount("/", StaticFiles(directory=str(frontend_dist), html=True), name="frontend") |
| else: |
| @app.get("/", include_in_schema=False) |
| def root_redirect(): |
| return RedirectResponse(url="/docs") |
|
|
| @app.exception_handler(ApiException) |
| async def _handle_api_exception(_: Request, exc: ApiException) -> JSONResponse: |
| payload = ApiResponse(code=exc.code, msg=exc.msg, data=exc.data).model_dump() |
| return JSONResponse(status_code=exc.http_status, content=payload) |
|
|
| @app.exception_handler(RequestValidationError) |
| async def _handle_validation_error(_: Request, exc: RequestValidationError) -> JSONResponse: |
| payload = ApiResponse(code=ERROR_CODE_INVALID_TARGET, msg="invalid target", data=exc.errors()).model_dump() |
| return JSONResponse(status_code=400, content=payload) |
|
|
| @app.exception_handler(StarletteHTTPException) |
| async def _handle_http_exception(_: Request, exc: StarletteHTTPException) -> JSONResponse: |
| status = int(getattr(exc, "status_code", 500) or 500) |
| detail = getattr(exc, "detail", None) |
|
|
| if status in (401, 403): |
| spec = ERROR_SPECS["auth"] |
| payload = ApiResponse(code=spec.code, msg=str(detail or spec.default_msg), data=None).model_dump() |
| return JSONResponse(status_code=spec.http_status, content=payload) |
|
|
| if status == 429: |
| spec = ERROR_SPECS["rate"] |
| payload = ApiResponse(code=spec.code, msg=str(detail or spec.default_msg), data=None).model_dump() |
| return JSONResponse(status_code=spec.http_status, content=payload) |
|
|
| if status == 409: |
| spec = ERROR_SPECS["task_not_ready"] |
| payload = ApiResponse(code=spec.code, msg=str(detail or spec.default_msg), data=None).model_dump() |
| return JSONResponse(status_code=spec.http_status, content=payload) |
|
|
| if status == 404: |
| spec = ERROR_SPECS["invalid_target"] |
| payload = ApiResponse(code=spec.code, msg="not found", data={"detail": detail}).model_dump() |
| return JSONResponse(status_code=404, content=payload) |
|
|
| if status in (400, 405, 422): |
| spec = ERROR_SPECS["invalid_target"] |
| payload = ApiResponse(code=spec.code, msg=str(detail or spec.default_msg), data={"detail": detail}).model_dump() |
| return JSONResponse(status_code=400, content=payload) |
|
|
| payload = ApiResponse(code=ERROR_CODE_PARSE, msg=str(detail or "internal error"), data={"detail": detail}).model_dump() |
| return JSONResponse(status_code=status if status >= 400 else 500, content=payload) |
|
|
| @app.exception_handler(Exception) |
| async def _handle_unexpected_exception(_: Request, exc: Exception) -> JSONResponse: |
| payload = ApiResponse(code=ERROR_CODE_PARSE, msg="internal error", data={"error": str(exc)}).model_dump() |
| return JSONResponse(status_code=500, content=payload) |
|
|
| return app |
|
|
|
|
| app = create_app() |
|
|
|
|
| if __name__ == "__main__": |
| import uvicorn |
|
|
| uvicorn.run("Spider_XHS.service.app:app", host="0.0.0.0", port=8000) |
|
|