File size: 1,953 Bytes
b86ec86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
import json
import asyncio
import httpx
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from typing import Optional, List, Dict, Any

app = FastAPI()

# Hardcoded connect key
CONNECT_KEY = "connectkey"
MODEL_NAME = "aiasistentworld/Kimi-VL-A3B-Thinking-2506-LLM:latest"
OLLAMA_URL = "http://localhost:11434/api/generate"

class ChatRequest(BaseModel):
    prompt: str
    key: str

@app.get("/")
async def root():
    return {"status": "running", "model": MODEL_NAME}

@app.post("/")
async def generate(request: ChatRequest):
    if request.key != CONNECT_KEY:
        raise HTTPException(status_code=403, detail="Invalid connect key")

    async def stream_generator():
        payload = {
            "model": MODEL_NAME,
            "prompt": request.prompt,
            "stream": True
        }
        
        async with httpx.AsyncClient(timeout=None) as client:
            try:
                async with client.stream("POST", OLLAMA_URL, json=payload) as response:
                    if response.status_code != 200:
                        yield json.dumps({"error": "Ollama error"}).encode()
                        return

                    async for line in response.aiter_lines():
                        if line:
                            try:
                                data = json.loads(line)
                                yield (data.get("response", "")).encode()
                                if data.get("done"):
                                    break
                            except json.JSONDecodeError:
                                continue
            except Exception as e:
                yield json.dumps({"error": str(e)}).encode()

    return StreamingResponse(stream_generator(), media_type="text/event-stream")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)