import os import json import asyncio import httpx from fastapi import FastAPI, Request, HTTPException from fastapi.responses import StreamingResponse from pydantic import BaseModel from typing import Optional, List, Dict, Any app = FastAPI() # Hardcoded connect key CONNECT_KEY = "connectkey" MODEL_NAME = "aiasistentworld/Kimi-VL-A3B-Thinking-2506-LLM:latest" OLLAMA_URL = "http://localhost:11434/api/generate" class ChatRequest(BaseModel): prompt: str key: str @app.get("/") async def root(): return {"status": "running", "model": MODEL_NAME} @app.post("/") async def generate(request: ChatRequest): if request.key != CONNECT_KEY: raise HTTPException(status_code=403, detail="Invalid connect key") async def stream_generator(): payload = { "model": MODEL_NAME, "prompt": request.prompt, "stream": True } async with httpx.AsyncClient(timeout=None) as client: try: async with client.stream("POST", OLLAMA_URL, json=payload) as response: if response.status_code != 200: yield json.dumps({"error": "Ollama error"}).encode() return async for line in response.aiter_lines(): if line: try: data = json.loads(line) yield (data.get("response", "")).encode() if data.get("done"): break except json.JSONDecodeError: continue except Exception as e: yield json.dumps({"error": str(e)}).encode() return StreamingResponse(stream_generator(), media_type="text/event-stream") if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)