Alibrown commited on
Commit
78e15c9
Β·
verified Β·
1 Parent(s): 12e4ca0

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +42 -24
main.py CHANGED
@@ -6,20 +6,25 @@
6
  # Apache License V2 + ESOL 1.1
7
  # =============================================================================
8
  # Hub connects via:
9
- # base_url = "https://codey-lab-smollm-service.hf.space/v1"
10
  # β†’ POST /v1/chat/completions (OpenAI-compatible)
11
  # β†’ GET /v1/health (status check)
 
 
 
 
 
12
  # =============================================================================
13
 
14
  import logging
 
15
  import time
16
  import uuid
17
  from contextlib import asynccontextmanager
 
18
 
19
- from fastapi import FastAPI, HTTPException, Request
20
- from fastapi.responses import JSONResponse
21
  from pydantic import BaseModel
22
- from typing import List, Optional
23
 
24
  import smollm
25
  import model as model_module
@@ -34,13 +39,28 @@ logger = logging.getLogger("main")
34
  # ── ADI ───────────────────────────────────────────────────────────────────────
35
  adi_analyzer = DumpindexAnalyzer(enable_logging=False)
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  # ── Startup ───────────────────────────────────────────────────────────────────
39
  @asynccontextmanager
40
  async def lifespan(app: FastAPI):
41
  logger.info("=== SmolLM2 Service starting ===")
42
  logger.info(f"Model config: {model_module.status()}")
43
- smollm.load() # preload model on startup
44
  yield
45
  logger.info("=== SmolLM2 Service stopped ===")
46
 
@@ -56,19 +76,11 @@ class Message(BaseModel):
56
  content: str
57
 
58
  class ChatCompletionRequest(BaseModel):
59
- model: Optional[str] = "smollm2-360m"
60
  messages: List[Message]
61
- max_tokens: Optional[int] = 150
62
  temperature: Optional[float] = 0.2
63
- stream: Optional[bool] = False
64
-
65
- class ChatCompletionResponse(BaseModel):
66
- id: str
67
- object: str = "chat.completion"
68
- created: int
69
- model: str
70
- choices: List[dict]
71
- adi: Optional[dict] = None # ADI result attached to response
72
 
73
 
74
  # =============================================================================
@@ -81,21 +93,29 @@ async def root():
81
  "service": "SmolLM2 Service",
82
  "model": smollm.device_info(),
83
  "ready": smollm.is_ready(),
 
84
  "docs": "/docs",
85
  }
86
 
87
 
88
  @app.get("/v1/health")
89
- async def health():
 
90
  return {
91
- "status": "ok" if smollm.is_ready() else "loading",
92
- "device": smollm.device_info(),
93
- "model": model_module.status(),
 
94
  }
95
 
96
 
97
  @app.post("/v1/chat/completions")
98
- async def chat_completions(req: ChatCompletionRequest):
 
 
 
 
 
99
  if not req.messages:
100
  raise HTTPException(status_code=400, detail="messages cannot be empty")
101
 
@@ -124,7 +144,6 @@ async def chat_completions(req: ChatCompletionRequest):
124
  "Your request needs more detail before I can help. "
125
  "Suggestions: " + " | ".join(adi_result["recommendations"])
126
  )
127
- # Log to dataset
128
  model_module.push_log({
129
  "prompt": user_prompt,
130
  "system_prompt": system_prompt,
@@ -150,7 +169,6 @@ async def chat_completions(req: ChatCompletionRequest):
150
 
151
  except Exception as e:
152
  logger.warning(f"SmolLM2 failed: {type(e).__name__} οΏ½οΏ½ triggering hub fallback")
153
- # Return 503 so hub's fallback chain kicks in
154
  raise HTTPException(
155
  status_code=503,
156
  detail={
@@ -195,4 +213,4 @@ def _build_response(model: str, content: str, adi_result: dict) -> dict:
195
  "decision": adi_result["decision"],
196
  "metrics": adi_result["metrics"],
197
  }
198
- }
 
6
  # Apache License V2 + ESOL 1.1
7
  # =============================================================================
8
  # Hub connects via:
9
+ # base_url = "https://codey-lab-smollm2-customs.hf.space/v1"
10
  # β†’ POST /v1/chat/completions (OpenAI-compatible)
11
  # β†’ GET /v1/health (status check)
12
+ #
13
+ # AUTH:
14
+ # Set API_KEY in HF Space Secrets to lock down the endpoint.
15
+ # Hub sends it as: Authorization: Bearer <API_KEY>
16
+ # If API_KEY not set β†’ open access (dev mode, log warning)
17
  # =============================================================================
18
 
19
  import logging
20
+ import os
21
  import time
22
  import uuid
23
  from contextlib import asynccontextmanager
24
+ from typing import List, Optional
25
 
26
+ from fastapi import FastAPI, Header, HTTPException
 
27
  from pydantic import BaseModel
 
28
 
29
  import smollm
30
  import model as model_module
 
39
  # ── ADI ───────────────────────────────────────────────────────────────────────
40
  adi_analyzer = DumpindexAnalyzer(enable_logging=False)
41
 
42
+ # ── API Key Auth ──────────────────────────────────────────────────────────────
43
+ _API_KEY = os.environ.get("API_KEY", "")
44
+ if not _API_KEY:
45
+ logger.warning("API_KEY not set β€” running in open access mode!")
46
+ else:
47
+ logger.info("API_KEY set β€” endpoint is protected")
48
+
49
+ def _check_auth(authorization: Optional[str]) -> None:
50
+ """Validate Bearer token. Skipped if API_KEY secret not set (dev mode)."""
51
+ if not _API_KEY:
52
+ return
53
+ if authorization != f"Bearer {_API_KEY}":
54
+ logger.warning("Unauthorized request β€” invalid or missing token")
55
+ raise HTTPException(status_code=401, detail="Unauthorized")
56
+
57
 
58
  # ── Startup ───────────────────────────────────────────────────────────────────
59
  @asynccontextmanager
60
  async def lifespan(app: FastAPI):
61
  logger.info("=== SmolLM2 Service starting ===")
62
  logger.info(f"Model config: {model_module.status()}")
63
+ smollm.load()
64
  yield
65
  logger.info("=== SmolLM2 Service stopped ===")
66
 
 
76
  content: str
77
 
78
  class ChatCompletionRequest(BaseModel):
79
+ model: Optional[str] = "smollm2-360m"
80
  messages: List[Message]
81
+ max_tokens: Optional[int] = 150
82
  temperature: Optional[float] = 0.2
83
+ stream: Optional[bool] = False
 
 
 
 
 
 
 
 
84
 
85
 
86
  # =============================================================================
 
93
  "service": "SmolLM2 Service",
94
  "model": smollm.device_info(),
95
  "ready": smollm.is_ready(),
96
+ "auth": "protected" if _API_KEY else "open",
97
  "docs": "/docs",
98
  }
99
 
100
 
101
  @app.get("/v1/health")
102
+ async def health(authorization: Optional[str] = Header(None)):
103
+ _check_auth(authorization)
104
  return {
105
+ "status": "ok" if smollm.is_ready() else "loading",
106
+ "device": smollm.device_info(),
107
+ "model": model_module.status(),
108
+ "auth": "protected" if _API_KEY else "open",
109
  }
110
 
111
 
112
  @app.post("/v1/chat/completions")
113
+ async def chat_completions(
114
+ req: ChatCompletionRequest,
115
+ authorization: Optional[str] = Header(None),
116
+ ):
117
+ _check_auth(authorization)
118
+
119
  if not req.messages:
120
  raise HTTPException(status_code=400, detail="messages cannot be empty")
121
 
 
144
  "Your request needs more detail before I can help. "
145
  "Suggestions: " + " | ".join(adi_result["recommendations"])
146
  )
 
147
  model_module.push_log({
148
  "prompt": user_prompt,
149
  "system_prompt": system_prompt,
 
169
 
170
  except Exception as e:
171
  logger.warning(f"SmolLM2 failed: {type(e).__name__} οΏ½οΏ½ triggering hub fallback")
 
172
  raise HTTPException(
173
  status_code=503,
174
  detail={
 
213
  "decision": adi_result["decision"],
214
  "metrics": adi_result["metrics"],
215
  }
216
+ }