Spaces:
Runtime error
Runtime error
tech-envision commited on
Commit ·
3e5cdc3
1
Parent(s): 05db9e8
Remove non-stream chat
Browse files- README.md +21 -0
- requirements.txt +1 -0
- run.py +0 -1
- src/api.py +73 -0
- src/chat.py +0 -27
README.md
CHANGED
|
@@ -121,3 +121,24 @@ python run.py
|
|
| 121 |
|
| 122 |
The custom VM includes typical utilities like ``sudo`` and ``curl`` so it behaves
|
| 123 |
more like a standard Ubuntu installation.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
The custom VM includes typical utilities like ``sudo`` and ``curl`` so it behaves
|
| 123 |
more like a standard Ubuntu installation.
|
| 124 |
+
|
| 125 |
+
## REST API
|
| 126 |
+
|
| 127 |
+
Start the API server using ``uvicorn``:
|
| 128 |
+
|
| 129 |
+
```bash
|
| 130 |
+
uvicorn src.api:app --host 0.0.0.0 --port 8000
|
| 131 |
+
```
|
| 132 |
+
|
| 133 |
+
### Endpoints
|
| 134 |
+
|
| 135 |
+
- ``POST /chat/stream`` – Stream the assistant's response as plain text.
|
| 136 |
+
- ``POST /upload`` – Upload a document so it can be referenced in chats.
|
| 137 |
+
|
| 138 |
+
Example request:
|
| 139 |
+
|
| 140 |
+
```bash
|
| 141 |
+
curl -N -X POST http://localhost:8000/chat/stream \
|
| 142 |
+
-H 'Content-Type: application/json' \
|
| 143 |
+
-d '{"user":"demo","session":"default","prompt":"Hello"}'
|
| 144 |
+
```
|
requirements.txt
CHANGED
|
@@ -6,3 +6,4 @@ colorama
|
|
| 6 |
python-dotenv
|
| 7 |
fastapi
|
| 8 |
uvicorn
|
|
|
|
|
|
| 6 |
python-dotenv
|
| 7 |
fastapi
|
| 8 |
uvicorn
|
| 9 |
+
python-multipart
|
run.py
CHANGED
|
@@ -10,7 +10,6 @@ async def _main() -> None:
|
|
| 10 |
async with ChatSession(user="demo_user", session="demo_session") as chat:
|
| 11 |
doc_path = chat.upload_document("note.pdf")
|
| 12 |
# print(f"Document uploaded to VM at: {doc_path}")
|
| 13 |
-
# answer = await chat.chat(f"Remove all contents of test.txt and add the text 'Hello, World!' to it.")
|
| 14 |
# async for resp in chat.chat_stream("Erase the contents of test.txt and write 'Hello, World!' to it."):
|
| 15 |
# async for resp in chat.chat_stream("Verify that the file test.txt exists and contains the text 'Hello, World!'."):
|
| 16 |
# async for resp in chat.chat_stream("Inspect the contents of note.pdf and summarize it."):
|
|
|
|
| 10 |
async with ChatSession(user="demo_user", session="demo_session") as chat:
|
| 11 |
doc_path = chat.upload_document("note.pdf")
|
| 12 |
# print(f"Document uploaded to VM at: {doc_path}")
|
|
|
|
| 13 |
# async for resp in chat.chat_stream("Erase the contents of test.txt and write 'Hello, World!' to it."):
|
| 14 |
# async for resp in chat.chat_stream("Verify that the file test.txt exists and contains the text 'Hello, World!'."):
|
| 15 |
# async for resp in chat.chat_stream("Inspect the contents of note.pdf and summarize it."):
|
src/api.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from fastapi import FastAPI, UploadFile, File, Form
|
| 4 |
+
from fastapi.responses import StreamingResponse
|
| 5 |
+
from pydantic import BaseModel
|
| 6 |
+
import asyncio
|
| 7 |
+
import os
|
| 8 |
+
import tempfile
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
from .chat import ChatSession
|
| 12 |
+
from .log import get_logger
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
_LOG = get_logger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class ChatRequest(BaseModel):
|
| 19 |
+
user: str = "default"
|
| 20 |
+
session: str = "default"
|
| 21 |
+
prompt: str
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def create_app() -> FastAPI:
|
| 25 |
+
app = FastAPI(title="LLM Backend API")
|
| 26 |
+
|
| 27 |
+
@app.post("/chat/stream")
|
| 28 |
+
async def chat_stream(req: ChatRequest):
|
| 29 |
+
async def stream() -> asyncio.AsyncIterator[str]:
|
| 30 |
+
async with ChatSession(user=req.user, session=req.session) as chat:
|
| 31 |
+
try:
|
| 32 |
+
async for part in chat.chat_stream(req.prompt):
|
| 33 |
+
yield part
|
| 34 |
+
except Exception as exc: # pragma: no cover - runtime failures
|
| 35 |
+
_LOG.error("Streaming chat failed: %s", exc)
|
| 36 |
+
yield f"Error: {exc}"
|
| 37 |
+
|
| 38 |
+
return StreamingResponse(stream(), media_type="text/plain")
|
| 39 |
+
|
| 40 |
+
@app.post("/upload")
|
| 41 |
+
async def upload_document(
|
| 42 |
+
user: str = Form(...),
|
| 43 |
+
session: str = Form("default"),
|
| 44 |
+
file: UploadFile = File(...),
|
| 45 |
+
):
|
| 46 |
+
async with ChatSession(user=user, session=session) as chat:
|
| 47 |
+
tmpdir = tempfile.mkdtemp(prefix="upload_")
|
| 48 |
+
tmp_path = Path(tmpdir) / file.filename
|
| 49 |
+
try:
|
| 50 |
+
contents = await file.read()
|
| 51 |
+
tmp_path.write_bytes(contents)
|
| 52 |
+
vm_path = chat.upload_document(str(tmp_path))
|
| 53 |
+
finally:
|
| 54 |
+
try:
|
| 55 |
+
os.remove(tmp_path)
|
| 56 |
+
os.rmdir(tmpdir)
|
| 57 |
+
except OSError:
|
| 58 |
+
pass
|
| 59 |
+
return {"path": vm_path}
|
| 60 |
+
|
| 61 |
+
@app.get("/health")
|
| 62 |
+
async def health():
|
| 63 |
+
return {"status": "ok"}
|
| 64 |
+
|
| 65 |
+
return app
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
app = create_app()
|
| 69 |
+
|
| 70 |
+
if __name__ == "__main__": # pragma: no cover - manual start
|
| 71 |
+
import uvicorn
|
| 72 |
+
|
| 73 |
+
uvicorn.run(app, host="0.0.0.0", port=int(os.getenv("PORT", "8000")))
|
src/chat.py
CHANGED
|
@@ -294,33 +294,6 @@ class ChatSession:
|
|
| 294 |
async with self._lock:
|
| 295 |
self._state = "idle"
|
| 296 |
|
| 297 |
-
async def _handle_tool_calls(
|
| 298 |
-
self,
|
| 299 |
-
messages: List[Msg],
|
| 300 |
-
response: ChatResponse,
|
| 301 |
-
conversation: Conversation,
|
| 302 |
-
depth: int = 0,
|
| 303 |
-
) -> ChatResponse:
|
| 304 |
-
final = response
|
| 305 |
-
gen = self._handle_tool_calls_stream(messages, response, conversation, depth)
|
| 306 |
-
async for final in gen:
|
| 307 |
-
pass
|
| 308 |
-
return final
|
| 309 |
-
|
| 310 |
-
async def chat(self, prompt: str) -> str:
|
| 311 |
-
DBMessage.create(conversation=self._conversation, role="user", content=prompt)
|
| 312 |
-
self._messages.append({"role": "user", "content": prompt})
|
| 313 |
-
|
| 314 |
-
response = await self.ask(self._messages)
|
| 315 |
-
self._messages.append(response.message.model_dump())
|
| 316 |
-
self._store_assistant_message(self._conversation, response.message)
|
| 317 |
-
|
| 318 |
-
_LOG.info("Thinking:\n%s", response.message.thinking or "<no thinking trace>")
|
| 319 |
-
|
| 320 |
-
final_resp = await self._handle_tool_calls(
|
| 321 |
-
self._messages, response, self._conversation
|
| 322 |
-
)
|
| 323 |
-
return self._format_output(final_resp.message)
|
| 324 |
|
| 325 |
async def chat_stream(self, prompt: str) -> AsyncIterator[str]:
|
| 326 |
async with self._lock:
|
|
|
|
| 294 |
async with self._lock:
|
| 295 |
self._state = "idle"
|
| 296 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
|
| 298 |
async def chat_stream(self, prompt: str) -> AsyncIterator[str]:
|
| 299 |
async with self._lock:
|