Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,7 +13,7 @@ from fastapi import FastAPI, HTTPException
|
|
| 13 |
from fastapi.responses import StreamingResponse
|
| 14 |
from fastapi.middleware.cors import CORSMiddleware
|
| 15 |
from pydantic import BaseModel
|
| 16 |
-
from typing import List, Optional, AsyncGenerator
|
| 17 |
import asyncio
|
| 18 |
import gradio as gr
|
| 19 |
|
|
@@ -361,7 +361,7 @@ print(f"\n🎉 Ready! Default model: {current_backend.get_name()}")
|
|
| 361 |
# Streaming Generator
|
| 362 |
# ==============================================================================
|
| 363 |
|
| 364 |
-
async def generate_stream(prompt: str, backend, temperature: float) -> AsyncGenerator[str]:
|
| 365 |
encoded_prompt = tokenizer.encode(prompt)
|
| 366 |
input_ids = [i for i in encoded_prompt.ids if i != eos_token_id]
|
| 367 |
generated = input_ids.copy()
|
|
@@ -460,7 +460,7 @@ async def chat_completions(request: ChatCompletionRequest):
|
|
| 460 |
}]
|
| 461 |
}
|
| 462 |
yield f"data: {json.dumps(chunk)}\n\n"
|
| 463 |
-
yield "
|
| 464 |
|
| 465 |
if request.stream:
|
| 466 |
return StreamingResponse(event_stream(), media_type="text/event-stream")
|
|
@@ -468,7 +468,7 @@ async def chat_completions(request: ChatCompletionRequest):
|
|
| 468 |
full = ""
|
| 469 |
async for token in event_stream():
|
| 470 |
if "[DONE]" not in token:
|
| 471 |
-
data = json.loads(token.replace("
|
| 472 |
full += data["choices"][0]["delta"]["content"]
|
| 473 |
return {"choices": [{"message": {"content": full}}]}
|
| 474 |
|
|
|
|
| 13 |
from fastapi.responses import StreamingResponse
|
| 14 |
from fastapi.middleware.cors import CORSMiddleware
|
| 15 |
from pydantic import BaseModel
|
| 16 |
+
from typing import List, Optional, AsyncGenerator # ✅ AsyncGenerator import
|
| 17 |
import asyncio
|
| 18 |
import gradio as gr
|
| 19 |
|
|
|
|
| 361 |
# Streaming Generator
|
| 362 |
# ==============================================================================
|
| 363 |
|
| 364 |
+
async def generate_stream(prompt: str, backend, temperature: float) -> AsyncGenerator[str, None]: # ✅ Fixed type hint
|
| 365 |
encoded_prompt = tokenizer.encode(prompt)
|
| 366 |
input_ids = [i for i in encoded_prompt.ids if i != eos_token_id]
|
| 367 |
generated = input_ids.copy()
|
|
|
|
| 460 |
}]
|
| 461 |
}
|
| 462 |
yield f"data: {json.dumps(chunk)}\n\n"
|
| 463 |
+
yield " [DONE]\n\n"
|
| 464 |
|
| 465 |
if request.stream:
|
| 466 |
return StreamingResponse(event_stream(), media_type="text/event-stream")
|
|
|
|
| 468 |
full = ""
|
| 469 |
async for token in event_stream():
|
| 470 |
if "[DONE]" not in token:
|
| 471 |
+
data = json.loads(token.replace(" ", "").strip())
|
| 472 |
full += data["choices"][0]["delta"]["content"]
|
| 473 |
return {"choices": [{"message": {"content": full}}]}
|
| 474 |
|