Spaces:
Sleeping
Sleeping
| import os | |
| from fastapi import FastAPI, Request, HTTPException, Depends | |
| from fastapi.responses import StreamingResponse | |
| from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials | |
| import httpx | |
| import json | |
| import time | |
| import uuid | |
| app = FastAPI() | |
| security = HTTPBearer() | |
| API_KEY = os.environ.get("API_KEY", "!TU MUSISZ EDYTOWAC!") # np. "moj-tajny-klucz" | |
| MODEL = os.environ.get("MODEL", "!TU MUSISZ EDYTOWAC!") # np. "deepseek-r1:14b" albo "hf.co/unsloth/GLM-4.7-Flash-GGUF:UD-TQ1_0" | |
| OLLAMA_BASE = "http://127.0.0.1:11434" | |
| if "!TU MUSISZ EDYTOWAC!" in (API_KEY, MODEL): | |
| raise RuntimeError("Ustaw zmienne API_KEY i MODEL w HF Space Settings -> Variables") | |
| def verify_key(credentials: HTTPAuthorizationCredentials = Depends(security)): | |
| if credentials.credentials != API_KEY: | |
| raise HTTPException(status_code=401, detail="Invalid API key") | |
| return credentials.credentials | |
| async def list_models(key: str = Depends(verify_key)): | |
| return { | |
| "object": "list", | |
| "data": [{ | |
| "id": MODEL, | |
| "object": "model", | |
| "created": int(time.time()), | |
| "owned_by": "ollama", | |
| }] | |
| } | |
| async def chat_completions(request: Request, key: str = Depends(verify_key)): | |
| body = await request.json() | |
| messages = body.get("messages", []) | |
| temperature = body.get("temperature", 0.6) # !TU MUSISZ EDYTOWAC! domyślna temperatura (0.0-2.0) | |
| top_p = body.get("top_p", 0.95) # !TU MUSISZ EDYTOWAC! domyślne top_p (0.0-1.0) | |
| options = {"temperature": temperature, "top_p": top_p} | |
| if "max_tokens" in body: | |
| options["num_predict"] = body["max_tokens"] | |
| ollama_payload = { | |
| "model": MODEL, | |
| "messages": messages, | |
| "stream": True, | |
| "options": options, | |
| } | |
| completion_id = f"chatcmpl-{uuid.uuid4().hex}" | |
| created = int(time.time()) | |
| async def generate(): | |
| async with httpx.AsyncClient(timeout=300.0) as client: | |
| async with client.stream("POST", f"{OLLAMA_BASE}/api/chat", json=ollama_payload) as resp: | |
| async for line in resp.aiter_lines(): | |
| if not line: | |
| continue | |
| try: | |
| chunk = json.loads(line) | |
| except Exception: | |
| continue | |
| msg = chunk.get("message", {}) | |
| done = chunk.get("done", False) | |
| if done: | |
| delta = {} | |
| else: | |
| delta = {} | |
| if msg.get("thinking") is not None: | |
| delta["reasoning_content"] = msg["thinking"] | |
| if msg.get("content") is not None: | |
| delta["content"] = msg["content"] | |
| data = { | |
| "id": completion_id, | |
| "object": "chat.completion.chunk", | |
| "created": created, | |
| "model": MODEL, | |
| "choices": [{ | |
| "index": 0, | |
| "delta": delta, | |
| "finish_reason": "stop" if done else None, | |
| }] | |
| } | |
| yield f"data: {json.dumps(data)}\n\n" | |
| if done: | |
| break | |
| yield "data: [DONE]\n\n" | |
| return StreamingResponse(generate(), media_type="text/event-stream") | |
| async def health(): | |
| async with httpx.AsyncClient(timeout=5.0) as client: | |
| try: | |
| r = await client.get(f"{OLLAMA_BASE}/api/version") | |
| ollama_ok = r.status_code == 200 | |
| except Exception: | |
| ollama_ok = False | |
| return {"status": "ok" if ollama_ok else "starting", "model": MODEL} | |