import logging import os import time from typing import Dict, Optional import aiohttp from fastapi import FastAPI, HTTPException, Request from fastapi.responses import HTMLResponse, Response, StreamingResponse logger = logging.getLogger("modelscope_proxy") logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO").upper()) MODELSCOPE_COOKIE = os.getenv("MODELSCOPE_COOKIE", "") TOKEN_TTL_SECONDS = int(os.getenv("MODELSCOPE_TOKEN_TTL_SECONDS", "3300")) TOKEN_URL = "https://modelscope.cn/api/v1/studios/token" ROUTES = { "/image": { "url": "https://chuansir-qwen-image.ms.show/image", "methods": {"POST"}, }, "/edit-image": { "url": "https://chuansir-qwen-image.ms.show/edit-image", "methods": {"POST"}, }, "/v1/chat/completions": { "url": "https://chuansir-qwen3-5-27b-claude-4-6-opus-reasoning-dis.ms.show/v1/chat/completions", "methods": {"POST"}, }, "/v1/messages": { "url": "https://chuansir-qwen3-5-27b-claude-4-6-opus-reasoning-dis.ms.show/v1/messages", "methods": {"POST"}, }, "/v1/models": { "url": "https://chuansir-qwen3-5-27b-claude-4-6-opus-reasoning-dis.ms.show/v1/models", "methods": {"GET"}, }, "/tts": { "url": "https://chuansir-index-tts-vllm.ms.show/tts", "methods": {"GET", "POST"}, }, "/stt/transcribe": { "url": "https://chuansir-index-tts-vllm.ms.show/stt/transcribe", "methods": {"POST"}, }, } HOP_BY_HOP_HEADERS = { "connection", "keep-alive", "proxy-authenticate", "proxy-authorization", "te", "trailers", "transfer-encoding", "upgrade", "host", "content-length", } app = FastAPI(title="ModelScope Studio API Proxy") class ModelScopeTokenCache: def __init__(self, cookie: str, ttl_seconds: int) -> None: self.cookie = cookie self.ttl_seconds = ttl_seconds self._token: Optional[str] = None self._expires_at = 0.0 async def get(self, session: aiohttp.ClientSession) -> str: now = time.time() if self._token and now < self._expires_at: return self._token token = await self._get_modelscope_token(session, self._token_headers()) self._token = token self._expires_at = now + self.ttl_seconds return token async def _get_modelscope_token( self, session: aiohttp.ClientSession, headers: Dict[str, str] ) -> str: async with session.get(TOKEN_URL, headers=headers) as response: res_text = await response.text() logger.debug(res_text) response.raise_for_status() token_data = await response.json() return token_data["Data"]["Token"] def _token_headers(self) -> Dict[str, str]: if not self.cookie: raise HTTPException( status_code=500, detail="MODELSCOPE_COOKIE environment variable is not set", ) return { "User-Agent": ( "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/121.0.0.0 Safari/537.36" ), "Cookie": self.cookie, } token_cache = ModelScopeTokenCache(MODELSCOPE_COOKIE, TOKEN_TTL_SECONDS) @app.get("/", response_class=HTMLResponse) async def index() -> str: return """ ModelScope API Proxy

ModelScope API Proxy

""" @app.get("/health") async def health() -> Dict[str, str]: return {"status": "ok"} @app.on_event("startup") async def startup() -> None: timeout = aiohttp.ClientTimeout(total=None, sock_connect=60, sock_read=None) app.state.session = aiohttp.ClientSession(timeout=timeout) @app.on_event("shutdown") async def shutdown() -> None: await app.state.session.close() @app.api_route( "/{path:path}", methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS", "HEAD"], ) async def proxy(path: str, request: Request) -> Response: route_path = "/" + path route = ROUTES.get(route_path) if not route: raise HTTPException(status_code=404, detail="Proxy route not found") if request.method not in route["methods"]: raise HTTPException(status_code=405, detail="Method not allowed") session: aiohttp.ClientSession = app.state.session token = await token_cache.get(session) headers = _build_forward_headers(request, token) body = await request.body() try: upstream = await session.request( method=request.method, url=route["url"], params=request.query_params, headers=headers, data=body if body else None, ) except aiohttp.ClientError as exc: logger.exception("Upstream request failed") raise HTTPException(status_code=502, detail=str(exc)) from exc response_headers = _build_response_headers(upstream.headers) if _should_stream(upstream): return StreamingResponse( upstream.content.iter_chunked(64 * 1024), status_code=upstream.status, headers=response_headers, media_type=upstream.headers.get("content-type"), background=_CloseAiohttpResponse(upstream), ) content = await upstream.read() upstream.release() return Response( content=content, status_code=upstream.status, headers=response_headers, media_type=upstream.headers.get("content-type"), ) def _build_forward_headers(request: Request, token: str) -> Dict[str, str]: headers = { key: value for key, value in request.headers.items() if key.lower() not in HOP_BY_HOP_HEADERS } headers["x-studio-token"] = token return headers def _build_response_headers(headers: aiohttp.typedefs.LooseHeaders) -> Dict[str, str]: return { key: value for key, value in headers.items() if key.lower() not in HOP_BY_HOP_HEADERS } def _should_stream(response: aiohttp.ClientResponse) -> bool: content_type = response.headers.get("content-type", "").lower() return ( "text/event-stream" in content_type or "application/octet-stream" in content_type or response.headers.get("transfer-encoding", "").lower() == "chunked" ) class _CloseAiohttpResponse: def __init__(self, response: aiohttp.ClientResponse) -> None: self.response = response async def __call__(self) -> None: self.response.release() if __name__ == "__main__": import uvicorn port = int(os.getenv("PORT", "7860")) uvicorn.run(app, host="0.0.0.0", port=port)