oki692 commited on
Commit
7ff2724
·
verified ·
1 Parent(s): b534ef7

Delete proxy.py

Browse files
Files changed (1) hide show
  1. proxy.py +0 -114
proxy.py DELETED
@@ -1,114 +0,0 @@
1
- import os
2
- from fastapi import FastAPI, Request, HTTPException, Depends
3
- from fastapi.responses import StreamingResponse
4
- from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
5
- import httpx
6
- import json
7
- import time
8
- import uuid
9
-
10
- app = FastAPI()
11
- security = HTTPBearer()
12
-
13
- API_KEY = os.environ.get("API_KEY", "!TU MUSISZ EDYTOWAC!") # np. "moj-tajny-klucz"
14
- MODEL = os.environ.get("MODEL", "!TU MUSISZ EDYTOWAC!") # np. "deepseek-r1:14b" albo "hf.co/unsloth/GLM-4.7-Flash-GGUF:UD-TQ1_0"
15
- OLLAMA_BASE = "http://127.0.0.1:11434"
16
-
17
- if "!TU MUSISZ EDYTOWAC!" in (API_KEY, MODEL):
18
- raise RuntimeError("Ustaw zmienne API_KEY i MODEL w HF Space Settings -> Variables")
19
-
20
-
21
- def verify_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
22
- if credentials.credentials != API_KEY:
23
- raise HTTPException(status_code=401, detail="Invalid API key")
24
- return credentials.credentials
25
-
26
-
27
- @app.get("/v1/models")
28
- async def list_models(key: str = Depends(verify_key)):
29
- return {
30
- "object": "list",
31
- "data": [{
32
- "id": MODEL,
33
- "object": "model",
34
- "created": int(time.time()),
35
- "owned_by": "ollama",
36
- }]
37
- }
38
-
39
-
40
- @app.post("/v1/chat/completions")
41
- async def chat_completions(request: Request, key: str = Depends(verify_key)):
42
- body = await request.json()
43
-
44
- messages = body.get("messages", [])
45
- temperature = body.get("temperature", 0.6) # !TU MUSISZ EDYTOWAC! domyślna temperatura (0.0-2.0)
46
- top_p = body.get("top_p", 0.95) # !TU MUSISZ EDYTOWAC! domyślne top_p (0.0-1.0)
47
-
48
- options = {"temperature": temperature, "top_p": top_p}
49
- if "max_tokens" in body:
50
- options["num_predict"] = body["max_tokens"]
51
-
52
- ollama_payload = {
53
- "model": MODEL,
54
- "messages": messages,
55
- "stream": True,
56
- "options": options,
57
- }
58
-
59
- completion_id = f"chatcmpl-{uuid.uuid4().hex}"
60
- created = int(time.time())
61
-
62
- async def generate():
63
- async with httpx.AsyncClient(timeout=300.0) as client:
64
- async with client.stream("POST", f"{OLLAMA_BASE}/api/chat", json=ollama_payload) as resp:
65
- async for line in resp.aiter_lines():
66
- if not line:
67
- continue
68
- try:
69
- chunk = json.loads(line)
70
- except Exception:
71
- continue
72
-
73
- msg = chunk.get("message", {})
74
- done = chunk.get("done", False)
75
-
76
- if done:
77
- delta = {}
78
- else:
79
- delta = {}
80
- if msg.get("thinking") is not None:
81
- delta["reasoning_content"] = msg["thinking"]
82
- if msg.get("content") is not None:
83
- delta["content"] = msg["content"]
84
-
85
- data = {
86
- "id": completion_id,
87
- "object": "chat.completion.chunk",
88
- "created": created,
89
- "model": MODEL,
90
- "choices": [{
91
- "index": 0,
92
- "delta": delta,
93
- "finish_reason": "stop" if done else None,
94
- }]
95
- }
96
- yield f"data: {json.dumps(data)}\n\n"
97
-
98
- if done:
99
- break
100
-
101
- yield "data: [DONE]\n\n"
102
-
103
- return StreamingResponse(generate(), media_type="text/event-stream")
104
-
105
-
106
- @app.get("/health")
107
- async def health():
108
- async with httpx.AsyncClient(timeout=5.0) as client:
109
- try:
110
- r = await client.get(f"{OLLAMA_BASE}/api/version")
111
- ollama_ok = r.status_code == 200
112
- except Exception:
113
- ollama_ok = False
114
- return {"status": "ok" if ollama_ok else "starting", "model": MODEL}