Spaces:

bibibi12345
/

flowith

Sleeping

App Files Files Community

bibibi12345 commited on May 26, 2025

Commit

f5fea6c

1 Parent(s): 2cdaad6

test real streaming

Browse files

Files changed (1) hide show

main.py +92 -135

main.py CHANGED Viewed

@@ -166,11 +166,11 @@ async def chat_completions(
     # 5. Prepare Headers for Flowith Request
     # Headers exactly matching the curl -H flags provided
     headers = {
-        'accept': '*/*',
         'accept-language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7,zh-TW;q=0.6,ja;q=0.5',
         'authorization': FLOWITH_AUTH_TOKEN, # Send only the token, no "Bearer " prefix
         'content-type': 'application/json',
-        'responsetype': 'stream', # Restore this header
         'origin': 'https://flowith.net',
         'priority': 'u=1, i',
         'referer': 'https://flowith.net/',
@@ -189,147 +189,104 @@ async def chat_completions(
     # Need JSONResponse
     from fastapi.responses import JSONResponse
     async with httpx.AsyncClient(timeout=300.0) as client:
         try:
-            # Serialize payload manually
-            payload_bytes = json.dumps(flowith_payload.dict()).encode('utf-8')
-            # Make a non-streaming POST request to Flowith
-            response = await client.post(
-                FLOWITH_API_URL,
-                content=payload_bytes,
-                headers=headers,
-            )
-            # Check status code after receiving the full response
-            if response.status_code != 200:
                 try:
-                    error_detail = response.text # Use .text for non-streaming
-                    detail_msg = f"Flowith API Error ({response.status_code}): {error_detail}"
-                except Exception:
-                    detail_msg = f"Flowith API Error ({response.status_code})"
-                raise HTTPException(status_code=response.status_code, detail=detail_msg)
-            # Get the plain text response directly
-            flowith_text = response.text
-            print(f"response preview: {flowith_text[:500]}")
-            # 7. Handle response based on *client's* request.stream preference
-            if not request.stream:
-                # Client wants non-streaming: Construct OpenAI-compatible JSON from plain text
-                completion_id = f"chatcmpl-{uuid.uuid4()}"
-                created_timestamp = int(time.time())
-                response_payload = {
-                    "id": completion_id,
-                    "object": "chat.completion",
-                    "created": created_timestamp,
-                    "model": request.model, # Use the model from the original request
-                    "choices": [{
-                        "index": 0,
-                        "message": {
-                            "role": "assistant",
-                            "content": flowith_text # Use the plain text here
-                        },
-                        "finish_reason": "stop" # Assume stop
-                    }],
-                    # "usage": {...} # Usage stats are typically not available/meaningful here
-                }
-                return JSONResponse(content=response_payload)
-            else:
-                # Client wants streaming: Implement keep-alive while fetching, then stream response
-                async def stream_generator():
-                    response_ready_event = asyncio.Event()
-                    fetch_task = None
-                    flowith_text_local = None # Use a local variable to avoid confusion with outer scope
-                    error_occurred = None
-                    sse_model_name = request.model # Use the model requested by the client
-                    # Coroutine to fetch data and signal completion
-                    async def fetch_and_process(event):
-                        nonlocal flowith_text_local, error_occurred
                         try:
-                            # === Logic to prepare request (model_name, flowith_messages, etc.) ===
-                            # These variables are captured from the outer scope:
-                            # FLOWITH_API_URL, headers, flowith_payload
-                            # The actual request is made here now, not before calling stream_generator
-                            async with httpx.AsyncClient(timeout=300.0) as client:
-                                payload_bytes = json.dumps(flowith_payload.dict()).encode('utf-8') # Use outer flowith_payload
-                                response = await client.post(
-                                    FLOWITH_API_URL, headers=headers, content=payload_bytes # Use outer headers
-                                )
-                            response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
-                            flowith_text_local = response.text # Store in local variable
                         except Exception as e:
-                            # print(f"Error fetching from Flowith: {e}") # Optional debug
-                            error_occurred = e # Store error to yield later
-                        finally:
-                            event.set() # Signal completion or failure
-                    # Start fetching in the background
-                    fetch_task = asyncio.create_task(fetch_and_process(response_ready_event))
-                    try:
-                        # Send keep-alive chunks while waiting
-                        while not response_ready_event.is_set():
-                            keep_alive_data = {"id": "chatcmpl-keepalive", "object": "chat.completion.chunk", "created": int(time.time()), "model": sse_model_name, "choices": [{"delta": {"content": ""}, "index": 0, "finish_reason": None}]}
-                            yield f"data: {json.dumps(keep_alive_data)}\n\n"
-                            # Wait for a short period or until the event is set
-                            try:
-                                await asyncio.wait_for(response_ready_event.wait(), timeout=3.0)
-                            except asyncio.TimeoutError:
-                                pass # Timeout means event not set, continue loop
-                        # Event is set, check if fetch task had an error
-                        if fetch_task.done() and fetch_task.exception():
-                            # If fetch_task failed before setting event (shouldn't happen with finally, but check anyway)
-                            error_occurred = fetch_task.exception()
-                        if error_occurred:
-                            # Yield an error chunk (optional, depends on desired behavior)
-                            # print(f"Yielding error chunk: {error_occurred}") # Optional debug
-                            error_content = f"Error processing request: {type(error_occurred).__name__}"
-                            error_chunk = {"id": f"chatcmpl-error-{uuid.uuid4()}", "object": "chat.completion.chunk", "created": int(time.time()), "model": sse_model_name, "choices": [{"delta": {"content": error_content }, "index": 0, "finish_reason": "error"}]} # Use finish_reason 'error' if possible
-                            yield f"data: {json.dumps(error_chunk)}\n\n"
-                        elif flowith_text_local is not None:
-                            # Fetch succeeded, yield content chunks
-                            chunk_id = f"chatcmpl-{uuid.uuid4()}"
-                            chunk_size = 20
-                            full_content = flowith_text_local
-                            for i in range(0, len(full_content), chunk_size):
-                                content_piece = full_content[i:i + chunk_size]
-                                chunk = {
-                                    "id": chunk_id,
-                                    "object": "chat.completion.chunk",
-                                    "created": int(time.time()),
-                                    "model": sse_model_name,
-                                    "choices": [{"index": 0, "delta": {"content": content_piece}, "finish_reason": None}]
-                                }
-                                yield f"data: {json.dumps(chunk)}\n\n"
-                            # Yield final chunk
-                            final_chunk = {
-                                "id": chunk_id,
-                                "object": "chat.completion.chunk",
-                                "created": int(time.time()),
-                                "model": sse_model_name,
-                                "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
-                            }
-                            yield f"data: {json.dumps(final_chunk)}\n\n"
-                    except asyncio.CancelledError:
-                         # print("Stream generator cancelled (client disconnected)") # Optional debug
-                         raise # Re-raise cancellation
-                    finally:
-                        # Ensure fetch task is cancelled if generator exits early
-                        if fetch_task and not fetch_task.done():
-                            fetch_task.cancel()
-                        # Send DONE message regardless of success/failure/cancellation
-                        yield "data: [DONE]\n\n"
-                # The return statement remains the same
-                return StreamingResponse(stream_generator(), media_type="text/event-stream")
         except httpx.RequestError as exc:
             print(f"Error requesting Flowith: {exc}")
             raise HTTPException(status_code=503, detail=f"Error connecting to Flowith service: {exc}")

     # 5. Prepare Headers for Flowith Request
     # Headers exactly matching the curl -H flags provided
     headers = {
+        'accept': 'text/event-stream', # Changed for streaming
         'accept-language': 'en-US,en;q=0.9,zh-CN;q=0.8,zh;q=0.7,zh-TW;q=0.6,ja;q=0.5',
         'authorization': FLOWITH_AUTH_TOKEN, # Send only the token, no "Bearer " prefix
         'content-type': 'application/json',
+        'responsetype': 'stream', # Keep this header
         'origin': 'https://flowith.net',
         'priority': 'u=1, i',
         'referer': 'https://flowith.net/',
     # Need JSONResponse
     from fastapi.responses import JSONResponse
+    # Need JSONResponse
+    from fastapi.responses import JSONResponse
     async with httpx.AsyncClient(timeout=300.0) as client:
         try:
+            # Serialize payload manually for the stream request
+            payload_json = flowith_payload.dict()
+            # Use client.stream for real streaming
+            async with client.stream("POST", FLOWITH_API_URL, headers=headers, json=payload_json, timeout=300.0) as response:
+                # Check status *after* starting stream read or getting headers
+                # It's often better to check after reading the first chunk or headers
+                # For simplicity here, we might check early, but be aware Flowith might send errors mid-stream
+                # Let's check status right away, but handle potential errors during iteration too.
                 try:
+                    response.raise_for_status() # Check initial status
+                except httpx.HTTPStatusError as status_exc:
+                     # Attempt to read body for more details if possible
+                    try:
+                        error_body = await status_exc.response.aread()
+                        detail_msg = f"Flowith API Error ({status_exc.response.status_code}): {error_body.decode('utf-8', errors='replace')}"
+                    except Exception:
+                        detail_msg = f"Flowith API Error ({status_exc.response.status_code})"
+                    raise HTTPException(status_code=status_exc.response.status_code, detail=detail_msg) from status_exc
+                # 7. Handle response based on *client's* request.stream preference
+                if request.stream:
+                    # Client wants streaming: Forward Flowith's stream directly
+                    async def stream_forwarder():
                         try:
+                            async for chunk in response.aiter_bytes():
+                                # Assuming Flowith sends SSE-compatible chunks (or raw data we want to forward)
+                                # If Flowith sends *only* text content per chunk, we might need to format it:
+                                # yield f"data: {json.dumps({'choices': [{'delta': {'content': chunk.decode()}}]})}\n\n"
+                                # For now, let's assume Flowith sends pre-formatted SSE or raw bytes are okay.
+                                yield chunk
+                            # Optionally yield a final [DONE] message if Flowith doesn't
+                            # yield b"data: [DONE]\n\n"
+                        except httpx.RequestError as stream_exc:
+                            print(f"Error during Flowith stream read: {stream_exc}")
+                            # Decide how to signal this error to the client.
+                            # Option 1: Raise an exception (FastAPI might handle it)
+                            # raise HTTPException(status_code=503, detail=f"Stream read error: {stream_exc}")
+                            # Option 2: Yield an error message within the stream (if client supports it)
+                            error_content = f"Stream read error: {type(stream_exc).__name__}"
+                            error_chunk = {"id": f"chatcmpl-streamerror-{uuid.uuid4()}", "object": "chat.completion.chunk", "created": int(time.time()), "model": request.model, "choices": [{"delta": {"content": error_content }, "index": 0, "finish_reason": "error"}]}
+                            yield f"data: {json.dumps(error_chunk)}\n\n".encode('utf-8')
+                            yield b"data: [DONE]\n\n" # Still send DONE after error chunk
                         except Exception as e:
+                            print(f"Unexpected error during stream forward: {e}")
+                            error_content = f"Unexpected stream error: {type(e).__name__}"
+                            error_chunk = {"id": f"chatcmpl-streamerror-{uuid.uuid4()}", "object": "chat.completion.chunk", "created": int(time.time()), "model": request.model, "choices": [{"delta": {"content": error_content }, "index": 0, "finish_reason": "error"}]}
+                            yield f"data: {json.dumps(error_chunk)}\n\n".encode('utf-8')
+                            yield b"data: [DONE]\n\n" # Still send DONE after error chunk
+                    return StreamingResponse(stream_forwarder(), media_type="text/event-stream")
+                else:
+                    # Client wants non-streaming: Accumulate chunks
+                    chunks = []
+                    try:
+                        async for chunk in response.aiter_bytes():
+                            chunks.append(chunk)
+                    except httpx.RequestError as stream_exc:
+                         print(f"Error during Flowith stream read (non-streaming mode): {stream_exc}")
+                         raise HTTPException(status_code=503, detail=f"Stream read error: {stream_exc}")
+                    except Exception as e:
+                         print(f"Unexpected error during stream accumulation: {e}")
+                         raise HTTPException(status_code=500, detail=f"Unexpected stream error: {e}")
+                    full_response_bytes = b"".join(chunks)
+                    flowith_text = full_response_bytes.decode('utf-8', errors='replace')
+                    print(f"Accumulated response preview: {flowith_text[:500]}")
+                    # Construct OpenAI-compatible JSON
+                    completion_id = f"chatcmpl-{uuid.uuid4()}"
+                    created_timestamp = int(time.time())
+                    response_payload = {
+                        "id": completion_id,
+                        "object": "chat.completion",
+                        "created": created_timestamp,
+                        "model": request.model, # Use the model from the original request
+                        "choices": [{
+                            "index": 0,
+                            "message": {
+                                "role": "assistant",
+                                "content": flowith_text
+                            },
+                            "finish_reason": "stop" # Assume stop
+                        }],
+                        # "usage": {...} # Usage stats are typically not available/meaningful here
+                    }
+                    return JSONResponse(content=response_payload)
+        # Keep outer exception handling for initial connection errors etc.
         except httpx.RequestError as exc:
             print(f"Error requesting Flowith: {exc}")
             raise HTTPException(status_code=503, detail=f"Error connecting to Flowith service: {exc}")