| | import os |
| | from fastapi import FastAPI, Request, HTTPException, Depends |
| | from fastapi.responses import StreamingResponse |
| | from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials |
| | import httpx |
| | import json |
| | import time |
| | import uuid |
| |
|
| | app = FastAPI() |
| | security = HTTPBearer() |
| |
|
| | API_KEY = os.environ.get("API_KEY", "!TU MUSISZ EDYTOWAC!") |
| | MODEL = os.environ.get("MODEL", "!TU MUSISZ EDYTOWAC!") |
| | OLLAMA_BASE = "http://127.0.0.1:11434" |
| |
|
| | if "!TU MUSISZ EDYTOWAC!" in (API_KEY, MODEL): |
| | raise RuntimeError("Ustaw zmienne API_KEY i MODEL w HF Space Settings -> Variables") |
| |
|
| |
|
| | def verify_key(credentials: HTTPAuthorizationCredentials = Depends(security)): |
| | if credentials.credentials != API_KEY: |
| | raise HTTPException(status_code=401, detail="Invalid API key") |
| | return credentials.credentials |
| |
|
| |
|
| | @app.get("/v1/models") |
| | async def list_models(key: str = Depends(verify_key)): |
| | return { |
| | "object": "list", |
| | "data": [{ |
| | "id": MODEL, |
| | "object": "model", |
| | "created": int(time.time()), |
| | "owned_by": "ollama", |
| | }] |
| | } |
| |
|
| |
|
| | @app.post("/v1/chat/completions") |
| | async def chat_completions(request: Request, key: str = Depends(verify_key)): |
| | body = await request.json() |
| |
|
| | messages = body.get("messages", []) |
| | temperature = body.get("temperature", 0.6) |
| | top_p = body.get("top_p", 0.95) |
| |
|
| | options = {"temperature": temperature, "top_p": top_p} |
| | if "max_tokens" in body: |
| | options["num_predict"] = body["max_tokens"] |
| |
|
| | ollama_payload = { |
| | "model": MODEL, |
| | "messages": messages, |
| | "stream": True, |
| | "options": options, |
| | } |
| |
|
| | completion_id = f"chatcmpl-{uuid.uuid4().hex}" |
| | created = int(time.time()) |
| |
|
| | async def generate(): |
| | async with httpx.AsyncClient(timeout=300.0) as client: |
| | async with client.stream("POST", f"{OLLAMA_BASE}/api/chat", json=ollama_payload) as resp: |
| | async for line in resp.aiter_lines(): |
| | if not line: |
| | continue |
| | try: |
| | chunk = json.loads(line) |
| | except Exception: |
| | continue |
| |
|
| | msg = chunk.get("message", {}) |
| | done = chunk.get("done", False) |
| |
|
| | if done: |
| | delta = {} |
| | else: |
| | delta = {} |
| | if msg.get("thinking") is not None: |
| | delta["reasoning_content"] = msg["thinking"] |
| | if msg.get("content") is not None: |
| | delta["content"] = msg["content"] |
| |
|
| | data = { |
| | "id": completion_id, |
| | "object": "chat.completion.chunk", |
| | "created": created, |
| | "model": MODEL, |
| | "choices": [{ |
| | "index": 0, |
| | "delta": delta, |
| | "finish_reason": "stop" if done else None, |
| | }] |
| | } |
| | yield f"data: {json.dumps(data)}\n\n" |
| |
|
| | if done: |
| | break |
| |
|
| | yield "data: [DONE]\n\n" |
| |
|
| | return StreamingResponse(generate(), media_type="text/event-stream") |
| |
|
| |
|
| | @app.get("/health") |
| | async def health(): |
| | async with httpx.AsyncClient(timeout=5.0) as client: |
| | try: |
| | r = await client.get(f"{OLLAMA_BASE}/api/version") |
| | ollama_ok = r.status_code == 200 |
| | except Exception: |
| | ollama_ok = False |
| | return {"status": "ok" if ollama_ok else "starting", "model": MODEL} |
| |
|