jarvis0852 commited on
Commit
85e6d30
·
verified ·
1 Parent(s): 49b4fd0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py CHANGED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import os
3
+ import json
4
+ import time
5
+ from typing import Dict
6
+ from fastapi import FastAPI, Request, HTTPException
7
+ from fastapi.middleware.cors import CORSMiddleware
8
+ import httpx
9
+
10
+ HF_TOKEN = os.environ.get("HF_TOKEN") # set this as a Space secret
11
+ REPO_ID = "deepseek-ai/DeepSeek-V3.2-Exp-Base" # change if needed
12
+ INFERENCE_URL = f"https://api-inference.huggingface.co/models/{REPO_ID}"
13
+ # Optional: require a simple API key header from frontend (set as secret API_KEY)
14
+ API_KEY = os.environ.get("API_KEY")
15
+
16
+ app = FastAPI(title="HF Space Model Proxy")
17
+
18
+ # Allow CORS from your frontend OR allow all for testing. Replace '*' with your origin in production.
19
+ origins = ["*"] # <-- replace with ["https://your-frontend-domain.com"] in production
20
+ app.add_middleware(
21
+ CORSMiddleware,
22
+ allow_origins=origins,
23
+ allow_credentials=True,
24
+ allow_methods=["POST", "OPTIONS"],
25
+ allow_headers=["*"],
26
+ )
27
+
28
+ # simple health check
29
+ @app.get("/health")
30
+ async def health():
31
+ return {"status": "ok", "time": time.time()}
32
+
33
+ @app.post("/predict")
34
+ async def predict(request: Request):
35
+ # optional API key check (frontend should send header "x-api-key")
36
+ if API_KEY:
37
+ key = request.headers.get("x-api-key")
38
+ if key != API_KEY:
39
+ raise HTTPException(status_code=401, detail="Invalid API key")
40
+
41
+ payload = await request.json()
42
+ # Expect payload like: {"messages": [{"role":"user","content":"Who are you?"}], "max_new_tokens":256}
43
+ messages = payload.get("messages") or payload.get("inputs") or payload.get("prompt")
44
+ if messages is None:
45
+ raise HTTPException(status_code=400, detail="Missing 'messages' or 'inputs' or 'prompt' in JSON body")
46
+
47
+ max_new_tokens = int(payload.get("max_new_tokens", 256))
48
+
49
+ headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
50
+ body = {
51
+ "inputs": messages,
52
+ "parameters": {"max_new_tokens": max_new_tokens},
53
+ "options": {"wait_for_model": True}
54
+ }
55
+
56
+ async with httpx.AsyncClient(timeout=120) as client:
57
+ resp = await client.post(INFERENCE_URL, headers=headers, json=body)
58
+ if resp.status_code != 200:
59
+ # bubble up error with introspection
60
+ detail = {"hf_status": resp.status_code, "content": resp.text}
61
+ raise HTTPException(status_code=502, detail=detail)
62
+ data = resp.json()
63
+
64
+ # Normalize output to simple string if possible
65
+ if isinstance(data, list) and len(data) > 0:
66
+ first = data[0]
67
+ if isinstance(first, dict) and "generated_text" in first:
68
+ out_text = first["generated_text"]
69
+ elif isinstance(first, str):
70
+ out_text = first
71
+ else:
72
+ out_text = json.dumps(data)
73
+ elif isinstance(data, dict) and "generated_text" in data:
74
+ out_text = data["generated_text"]
75
+ else:
76
+ out_text = json.dumps(data)
77
+
78
+ return {"output": out_text, "raw": data}