Manus Agent
Initial commit: Ollama FastAPI Streaming Server
b86ec86
import os
import json
import asyncio
import httpx
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from typing import Optional, List, Dict, Any
app = FastAPI()
# Hardcoded connect key
CONNECT_KEY = "connectkey"
MODEL_NAME = "aiasistentworld/Kimi-VL-A3B-Thinking-2506-LLM:latest"
OLLAMA_URL = "http://localhost:11434/api/generate"
class ChatRequest(BaseModel):
prompt: str
key: str
@app.get("/")
async def root():
return {"status": "running", "model": MODEL_NAME}
@app.post("/")
async def generate(request: ChatRequest):
if request.key != CONNECT_KEY:
raise HTTPException(status_code=403, detail="Invalid connect key")
async def stream_generator():
payload = {
"model": MODEL_NAME,
"prompt": request.prompt,
"stream": True
}
async with httpx.AsyncClient(timeout=None) as client:
try:
async with client.stream("POST", OLLAMA_URL, json=payload) as response:
if response.status_code != 200:
yield json.dumps({"error": "Ollama error"}).encode()
return
async for line in response.aiter_lines():
if line:
try:
data = json.loads(line)
yield (data.get("response", "")).encode()
if data.get("done"):
break
except json.JSONDecodeError:
continue
except Exception as e:
yield json.dumps({"error": str(e)}).encode()
return StreamingResponse(stream_generator(), media_type="text/event-stream")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)