gemmaapi / proxy.py
oki692's picture
Upload 4 files
3154e52 verified
raw
history blame
3.95 kB
import os
from fastapi import FastAPI, Request, HTTPException, Depends
from fastapi.responses import StreamingResponse
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
import httpx
import json
import time
import uuid
app = FastAPI()
security = HTTPBearer()
API_KEY = os.environ.get("API_KEY", "!TU MUSISZ EDYTOWAC!") # np. "moj-tajny-klucz"
MODEL = os.environ.get("MODEL", "!TU MUSISZ EDYTOWAC!") # np. "deepseek-r1:14b" albo "hf.co/unsloth/GLM-4.7-Flash-GGUF:UD-TQ1_0"
OLLAMA_BASE = "http://127.0.0.1:11434"
if "!TU MUSISZ EDYTOWAC!" in (API_KEY, MODEL):
raise RuntimeError("Ustaw zmienne API_KEY i MODEL w HF Space Settings -> Variables")
def verify_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
if credentials.credentials != API_KEY:
raise HTTPException(status_code=401, detail="Invalid API key")
return credentials.credentials
@app.get("/v1/models")
async def list_models(key: str = Depends(verify_key)):
return {
"object": "list",
"data": [{
"id": MODEL,
"object": "model",
"created": int(time.time()),
"owned_by": "ollama",
}]
}
@app.post("/v1/chat/completions")
async def chat_completions(request: Request, key: str = Depends(verify_key)):
body = await request.json()
messages = body.get("messages", [])
temperature = body.get("temperature", 0.6) # !TU MUSISZ EDYTOWAC! domyślna temperatura (0.0-2.0)
top_p = body.get("top_p", 0.95) # !TU MUSISZ EDYTOWAC! domyślne top_p (0.0-1.0)
options = {"temperature": temperature, "top_p": top_p}
if "max_tokens" in body:
options["num_predict"] = body["max_tokens"]
ollama_payload = {
"model": MODEL,
"messages": messages,
"stream": True,
"options": options,
}
completion_id = f"chatcmpl-{uuid.uuid4().hex}"
created = int(time.time())
async def generate():
async with httpx.AsyncClient(timeout=300.0) as client:
async with client.stream("POST", f"{OLLAMA_BASE}/api/chat", json=ollama_payload) as resp:
async for line in resp.aiter_lines():
if not line:
continue
try:
chunk = json.loads(line)
except Exception:
continue
msg = chunk.get("message", {})
done = chunk.get("done", False)
if done:
delta = {}
else:
delta = {}
if msg.get("thinking") is not None:
delta["reasoning_content"] = msg["thinking"]
if msg.get("content") is not None:
delta["content"] = msg["content"]
data = {
"id": completion_id,
"object": "chat.completion.chunk",
"created": created,
"model": MODEL,
"choices": [{
"index": 0,
"delta": delta,
"finish_reason": "stop" if done else None,
}]
}
yield f"data: {json.dumps(data)}\n\n"
if done:
break
yield "data: [DONE]\n\n"
return StreamingResponse(generate(), media_type="text/event-stream")
@app.get("/health")
async def health():
async with httpx.AsyncClient(timeout=5.0) as client:
try:
r = await client.get(f"{OLLAMA_BASE}/api/version")
ollama_ok = r.status_code == 200
except Exception:
ollama_ok = False
return {"status": "ok" if ollama_ok else "starting", "model": MODEL}