ollama-test

Sleeping

App Files Files Community

ollama-test / proxy.py

oki692

Upload 4 files

3154e52 verified 22 days ago

raw

history blame contribute delete

3.95 kB

	import os
	from fastapi import FastAPI, Request, HTTPException, Depends
	from fastapi.responses import StreamingResponse
	from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
	import httpx
	import json
	import time
	import uuid

	app = FastAPI()
	security = HTTPBearer()

	API_KEY = os.environ.get("API_KEY", "!TU MUSISZ EDYTOWAC!") # np. "moj-tajny-klucz"
	MODEL = os.environ.get("MODEL", "!TU MUSISZ EDYTOWAC!") # np. "deepseek-r1:14b" albo "hf.co/unsloth/GLM-4.7-Flash-GGUF:UD-TQ1_0"
	OLLAMA_BASE = "http://127.0.0.1:11434"

	if "!TU MUSISZ EDYTOWAC!" in (API_KEY, MODEL):
	raise RuntimeError("Ustaw zmienne API_KEY i MODEL w HF Space Settings -> Variables")


	def verify_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
	if credentials.credentials != API_KEY:
	raise HTTPException(status_code=401, detail="Invalid API key")
	return credentials.credentials


	@app.get("/v1/models")
	async def list_models(key: str = Depends(verify_key)):
	return {
	"object": "list",
	"data": [{
	"id": MODEL,
	"object": "model",
	"created": int(time.time()),
	"owned_by": "ollama",
	}]
	}


	@app.post("/v1/chat/completions")
	async def chat_completions(request: Request, key: str = Depends(verify_key)):
	body = await request.json()

	messages = body.get("messages", [])
	temperature = body.get("temperature", 0.6) # !TU MUSISZ EDYTOWAC! domyślna temperatura (0.0-2.0)
	top_p = body.get("top_p", 0.95) # !TU MUSISZ EDYTOWAC! domyślne top_p (0.0-1.0)

	options = {"temperature": temperature, "top_p": top_p}
	if "max_tokens" in body:
	options["num_predict"] = body["max_tokens"]

	ollama_payload = {
	"model": MODEL,
	"messages": messages,
	"stream": True,
	"options": options,
	}

	completion_id = f"chatcmpl-{uuid.uuid4().hex}"
	created = int(time.time())

	async def generate():
	async with httpx.AsyncClient(timeout=300.0) as client:
	async with client.stream("POST", f"{OLLAMA_BASE}/api/chat", json=ollama_payload) as resp:
	async for line in resp.aiter_lines():
	if not line:
	continue
	try:
	chunk = json.loads(line)
	except Exception:
	continue

	msg = chunk.get("message", {})
	done = chunk.get("done", False)

	if done:
	delta = {}
	else:
	delta = {}
	if msg.get("thinking") is not None:
	delta["reasoning_content"] = msg["thinking"]
	if msg.get("content") is not None:
	delta["content"] = msg["content"]

	data = {
	"id": completion_id,
	"object": "chat.completion.chunk",
	"created": created,
	"model": MODEL,
	"choices": [{
	"index": 0,
	"delta": delta,
	"finish_reason": "stop" if done else None,
	}]
	}
	yield f"data: {json.dumps(data)}\n\n"

	if done:
	break

	yield "data: [DONE]\n\n"

	return StreamingResponse(generate(), media_type="text/event-stream")


	@app.get("/health")
	async def health():
	async with httpx.AsyncClient(timeout=5.0) as client:
	try:
	r = await client.get(f"{OLLAMA_BASE}/api/version")
	ollama_ok = r.status_code == 200
	except Exception:
	ollama_ok = False
	return {"status": "ok" if ollama_ok else "starting", "model": MODEL}