Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import asyncio | |
| import httpx | |
| from fastapi import FastAPI, Request, HTTPException | |
| from fastapi.responses import StreamingResponse | |
| from pydantic import BaseModel | |
| from typing import Optional, List, Dict, Any | |
| app = FastAPI() | |
| # Hardcoded connect key | |
| CONNECT_KEY = "connectkey" | |
| MODEL_NAME = "aiasistentworld/Kimi-VL-A3B-Thinking-2506-LLM:latest" | |
| OLLAMA_URL = "http://localhost:11434/api/generate" | |
| class ChatRequest(BaseModel): | |
| prompt: str | |
| key: str | |
| async def root(): | |
| return {"status": "running", "model": MODEL_NAME} | |
| async def generate(request: ChatRequest): | |
| if request.key != CONNECT_KEY: | |
| raise HTTPException(status_code=403, detail="Invalid connect key") | |
| async def stream_generator(): | |
| payload = { | |
| "model": MODEL_NAME, | |
| "prompt": request.prompt, | |
| "stream": True | |
| } | |
| async with httpx.AsyncClient(timeout=None) as client: | |
| try: | |
| async with client.stream("POST", OLLAMA_URL, json=payload) as response: | |
| if response.status_code != 200: | |
| yield json.dumps({"error": "Ollama error"}).encode() | |
| return | |
| async for line in response.aiter_lines(): | |
| if line: | |
| try: | |
| data = json.loads(line) | |
| yield (data.get("response", "")).encode() | |
| if data.get("done"): | |
| break | |
| except json.JSONDecodeError: | |
| continue | |
| except Exception as e: | |
| yield json.dumps({"error": str(e)}).encode() | |
| return StreamingResponse(stream_generator(), media_type="text/event-stream") | |
| if __name__ == "__main__": | |
| import uvicorn | |
| uvicorn.run(app, host="0.0.0.0", port=7860) | |