Spaces:

nomid2
/

Replicate

Sleeping

App Files Files Community

nomid2 commited on Jun 7, 2025

Commit

93eb401

verified ·

1 Parent(s): 8d2c197

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -99

app.py CHANGED Viewed

@@ -150,83 +150,58 @@ async def create_replicate_prediction(session: aiohttp.ClientSession, model: str
         logger.error(f"Error creating prediction: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Prediction creation error: {str(e)}")
-def transform_replicate_to_openai_stream(event_data: str, model: str, prediction_id: str) -> str:
-    """将Replicate流式响应转换为OpenAI格式"""
-    # 处理不同的事件格式
-    if event_data.startswith("data: "):
-        content = event_data[6:]  # 移除 "data: " 前缀
-        # 检查是否是结束标记
-        if content.strip() in ["[DONE]", ""]:
-            # 发送结束标记
-            openai_response = {
-                "id": f"chatcmpl-{prediction_id}",
-                "object": "chat.completion.chunk",
-                "created": int(asyncio.get_event_loop().time()),
-                "model": model,
-                "choices": [{
-                    "index": 0,
-                    "delta": {},
-                    "finish_reason": "stop"
-                }]
-            }
-            return f"data: {json.dumps(openai_response)}\n\ndata: [DONE]\n\n"
-        # 尝试解析为JSON（用于其他事件类型）
-        try:
-            data = json.loads(content)
-            if data.get("event") == "output":
-                openai_response = {
-                    "id": f"chatcmpl-{prediction_id}",
-                    "object": "chat.completion.chunk",
-                    "created": int(asyncio.get_event_loop().time()),
-                    "model": model,
-                    "choices": [{
-                        "index": 0,
-                        "delta": {
-                            "content": data.get("data", "")
-                        },
-                        "finish_reason": None
-                    }]
-                }
-                return f"data: {json.dumps(openai_response)}\n\n"
-            elif data.get("event") == "done":
-                openai_response = {
-                    "id": f"chatcmpl-{prediction_id}",
-                    "object": "chat.completion.chunk",
-                    "created": int(asyncio.get_event_loop().time()),
-                    "model": model,
-                    "choices": [{
-                        "index": 0,
-                        "delta": {},
-                        "finish_reason": "stop"
-                    }]
                 }
-                return f"data: {json.dumps(openai_response)}\n\ndata: [DONE]\n\n"
-        except json.JSONDecodeError:
-            # 不是JSON，作为直接文本内容处理
-            if content.strip():  # 只有非空内容才发送
-                openai_response = {
-                    "id": f"chatcmpl-{prediction_id}",
-                    "object": "chat.completion.chunk",
-                    "created": int(asyncio.get_event_loop().time()),
-                    "model": model,
-                    "choices": [{
-                        "index": 0,
-                        "delta": {
-                            "content": content
-                        },
-                        "finish_reason": None
-                    }]
-                }
-                return f"data: {json.dumps(openai_response)}\n\n"
-    # 处理其他类型的事件（如event: output等）
-    elif event_data.startswith("event: "):
-        return ""  # 忽略事件类型行
-    return ""
 @app.get("/")
 async def root():
@@ -328,9 +303,9 @@ async def chat_completions(request: Request):
                             "Cache-Control": "no-store"
                         }
-                        stream_finished = False
-                        async with session.get(stream_url, headers=headers, timeout=300) as response:
                             if response.status != 200:
                                 error_text = await response.text()
                                 logger.error(f"Stream error: {response.status} - {error_text}")
@@ -344,32 +319,40 @@ async def chat_completions(request: Request):
                                 return
                             async for line in response.content:
-                                line = line.decode('utf-8').strip()
-                                if line:
-                                    logger.info(f"Received stream line: {line}")
-                                    openai_event = transform_replicate_to_openai_stream(line, model, prediction_id)
-                                    if openai_event:
-                                        yield openai_event
-                                        # 检查是否是结束事件
-                                        if "[DONE]" in openai_event:
-                                            stream_finished = True
-                                            break
-                        # 如果流没有正常结束，发送结束标记
-                        if not stream_finished:
-                            final_response = {
-                                "id": f"chatcmpl-{prediction_id}",
-                                "object": "chat.completion.chunk",
-                                "created": int(asyncio.get_event_loop().time()),
-                                "model": model,
-                                "choices": [{
-                                    "index": 0,
-                                    "delta": {},
-                                    "finish_reason": "stop"
-                                }]
-                            }
-                            yield f"data: {json.dumps(final_response)}\n\ndata: [DONE]\n\n"
                     except Exception as e:
                         logger.error(f"Stream generation error: {e}")
                         error_response = {
@@ -387,6 +370,7 @@ async def chat_completions(request: Request):
                     "Cache-Control": "no-cache",
                     "Connection": "keep-alive",
                     "Access-Control-Allow-Origin": "*",
                 }
             )

         logger.error(f"Error creating prediction: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Prediction creation error: {str(e)}")
+class SSEParser:
+    """Server-Sent Events 解析器"""
+    def __init__(self):
+        self.event_type = None
+        self.event_id = None
+        self.data_buffer = []
+    def parse_line(self, line: str):
+        """解析 SSE 格式的一行"""
+        if line.startswith('event: '):
+            self.event_type = line[7:].strip()
+        elif line.startswith('id: '):
+            self.event_id = line[4:].strip()
+        elif line.startswith('data: '):
+            self.data_buffer.append(line[6:])
+        elif line.startswith(': '):
+            # 注释行，忽略
+            pass
+        elif line == '':
+            # 空行表示事件结束
+            if self.data_buffer or self.event_type:
+                data = '\n'.join(self.data_buffer)
+                event = {
+                    'event': self.event_type,
+                    'id': self.event_id,
+                    'data': data
                 }
+                # 重置缓冲区
+                self.event_type = None
+                self.event_id = None
+                self.data_buffer = []
+                return event
+        return None
+def create_openai_chunk(content: str, model: str, prediction_id: str, finish_reason=None):
+    """创建 OpenAI 格式的流式响应块"""
+    chunk = {
+        "id": f"chatcmpl-{prediction_id}",
+        "object": "chat.completion.chunk",
+        "created": int(asyncio.get_event_loop().time()),
+        "model": model,
+        "choices": [{
+            "index": 0,
+            "delta": {},
+            "finish_reason": finish_reason
+        }]
+    }
+    if content and not finish_reason:
+        chunk["choices"][0]["delta"]["content"] = content
+    return f"data: {json.dumps(chunk)}\n\n"
 @app.get("/")
 async def root():
                             "Cache-Control": "no-store"
                         }
+                        sse_parser = SSEParser()
+                        async with session.get(stream_url, headers=headers, timeout=120) as response:
                             if response.status != 200:
                                 error_text = await response.text()
                                 logger.error(f"Stream error: {response.status} - {error_text}")
                                 return
                             async for line in response.content:
+                                line = line.decode('utf-8').rstrip('\r\n')
+                                # 跳过超时或错误消息
+                                if '408' in line or 'timeout' in line.lower():
+                                    logger.info(f"Ignoring timeout message: {line}")
+                                    continue
+                                # 解析 SSE 事件
+                                event = sse_parser.parse_line(line)
+                                if event:
+                                    event_type = event.get('event')
+                                    data = event.get('data', '')
+                                    logger.info(f"Parsed SSE event: {event_type}, data: {data[:50]}...")
+                                    if event_type == 'output' and data.strip():
+                                        # 输出事件，包含实际内容
+                                        yield create_openai_chunk(data, model, prediction_id)
+                                    elif event_type == 'done':
+                                        # 完成事件
+                                        logger.info("Stream completed with done event")
+                                        yield create_openai_chunk("", model, prediction_id, "stop")
+                                        yield "data: [DONE]\n\n"
+                                        return
+                        # 如果没有收到 done 事件，手动发送结束
+                        logger.info("Stream ended without done event, sending manual completion")
+                        yield create_openai_chunk("", model, prediction_id, "stop")
+                        yield "data: [DONE]\n\n"
+                    except asyncio.TimeoutError:
+                        logger.error("Stream timeout")
+                        yield create_openai_chunk("", model, prediction_id or "unknown", "stop")
+                        yield "data: [DONE]\n\n"
                     except Exception as e:
                         logger.error(f"Stream generation error: {e}")
                         error_response = {
                     "Cache-Control": "no-cache",
                     "Connection": "keep-alive",
                     "Access-Control-Allow-Origin": "*",
+                    "X-Accel-Buffering": "no",  # 禁用 Nginx 缓冲
                 }
             )