Spaces:

bobocup
/

XAI

Paused

App Files Files Community

bobocup commited on Nov 10, 2024

Commit

9129169

verified ·

1 Parent(s): b860111

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -62

app.py CHANGED Viewed

@@ -132,35 +132,18 @@ async def access_control(request: Request, call_next):
     return await call_next(request)
-# 修改 stream_generator 函数
 async def stream_generator(response):
-    buffer = ""
-    first_chunk = True
     try:
         async for chunk in response.aiter_bytes():
-            if first_chunk:
-                # 第一个响应来得快一些
-                await asyncio.sleep(0.1)
-                first_chunk = False
-            else:
-                # 后续响应平滑输出
-                await asyncio.sleep(0.05)
-            chunk_str = chunk.decode('utf-8')
-            buffer += chunk_str
-            while '\n' in buffer:
-                line, buffer = buffer.split('\n', 1)
-                line = line.strip()
-                if line:
-                    if line.startswith('data: '):
-                        yield f"{line}\n\n"
-                    else:
-                        yield f"data: {line}\n\n"
     except Exception as e:
         print(f"Stream error: {str(e)}")
-        yield f"data: [ERROR] {str(e)}\n\n"
 # 修改 handle_api_request 函数中的超时设置
 async def handle_api_request(url: str, headers: dict, method: str = "GET", body: dict = None, for_chat: bool = False):
@@ -324,60 +307,50 @@ async def add_key(request: Request):
 @app.post("/api/v1/chat/completions")
 async def chat_completions(request: Request):
     try:
-        # 获取请求体
         body = await request.body()
         body_json = json.loads(body)
         headers = {
             "Content-Type": "application/json",
-            "Accept": "text/event-stream" if body_json.get("stream") else "application/json"
         }
         print("Starting chat completion request...")
-        response = await handle_api_request(
-            url=f"{Config.OPENAI_API_BASE}/chat/completions",
-            headers=headers,
-            method="POST",
-            body=body_json,
-            for_chat=True
-        )
-        if not response:
-            raise HTTPException(status_code=500, detail="Failed to get response from API")
-        print(f"Chat completion response status: {response.status_code}")
-        # 处理流式响应
-        if body_json.get("stream"):
-            return StreamingResponse(
-                stream_generator(response),
-                media_type="text/event-stream",
-                headers={
-                    "Cache-Control": "no-cache",
-                    "Connection": "keep-alive",
-                    "Content-Type": "text/event-stream",
-                    "X-Accel-Buffering": "no"  # 禁用 Nginx 缓冲
-                }
-            )
-        # 处理普通响应
-        return Response(
-            content=response.text,
-            media_type="application/json",
-            status_code=response.status_code
-        )
     except Exception as e:
         print(f"Chat completion error: {str(e)}")
-        error_detail = str(e)
-        if len(error_detail) > 200:  # 如果错误信息太长，只保留前200个字符
-            error_detail = error_detail[:200] + "..."
-        raise HTTPException(status_code=500, detail=error_detail)
 # 代理其他请求
-@app.api_route("/api/v1/{path:path}", methods=["GET", "POST", "PUT", "DELETE"])
-async def proxy(path: str, request: Request):
-    if path == "chat/completions":
         return await chat_completions(request)
     try:

     return await call_next(request)
+# 流式响应生成器
 async def stream_generator(response):
     try:
         async for chunk in response.aiter_bytes():
+            if chunk:
+                yield chunk
+                await asyncio.sleep(0.01)  # 小延迟保持平滑
     except Exception as e:
         print(f"Stream error: {str(e)}")
+        error_msg = json.dumps({"error": str(e)})
+        yield f"data: {error_msg}\n\n"
 # 修改 handle_api_request 函数中的超时设置
 async def handle_api_request(url: str, headers: dict, method: str = "GET", body: dict = None, for_chat: bool = False):
 @app.post("/api/v1/chat/completions")
 async def chat_completions(request: Request):
     try:
         body = await request.body()
         body_json = json.loads(body)
+        # 强制使用流式响应
+        body_json["stream"] = True
         headers = {
             "Content-Type": "application/json",
+            "Accept": "text/event-stream",
+            "Connection": "keep-alive"
         }
         print("Starting chat completion request...")
+        async with httpx.AsyncClient(timeout=None) as client:
+            key = get_chat_key()
+            headers["Authorization"] = f"Bearer {key}"
+            async with client.stream(
+                method="POST",
+                url=f"{Config.OPENAI_API_BASE}/chat/completions",
+                headers=headers,
+                json=body_json
+            ) as response:
+                if response.status_code != 200:
+                    raise HTTPException(status_code=response.status_code, detail="API request failed")
+                return StreamingResponse(
+                    stream_generator(response),
+                    media_type="text/event-stream",
+                    headers={
+                        "Cache-Control": "no-cache",
+                        "Connection": "keep-alive",
+                        "Content-Type": "text/event-stream",
+                        "X-Accel-Buffering": "no"
+                    }
+                )
     except Exception as e:
         print(f"Chat completion error: {str(e)}")
+        raise HTTPException(status_code=500, detail=str(e))
 # 代理其他请求
         return await chat_completions(request)
     try: