triflix commited on
Commit
d14886f
Β·
verified Β·
1 Parent(s): 2a263c0

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +25 -43
main.py CHANGED
@@ -1,4 +1,3 @@
1
- # app.py
2
  from fastapi import FastAPI, HTTPException
3
  from pydantic import BaseModel, Field
4
  from typing import List, Dict, Any
@@ -7,32 +6,10 @@ import datetime
7
 
8
  import torch
9
  from transformers import AutoTokenizer, AutoModelForCausalLM
10
- from huggingface_hub import login, HfHubHTTPError
11
 
12
  # ==========================================
13
- # 1. CONFIGURATION (Secure Defaults)
14
- # ==========================================
15
-
16
- MODEL_ID = "google/functiongemma-270m-it"
17
- HF_TOKEN_ENV = "HF_TOKEN"
18
-
19
- def get_hf_token() -> str:
20
- """
21
- Fetch Hugging Face token from environment.
22
-
23
- Raises:
24
- RuntimeError: if token is missing
25
- """
26
- token = os.getenv(HF_TOKEN_ENV)
27
- if not token:
28
- raise RuntimeError(
29
- f"Missing required environment variable: {HF_TOKEN_ENV}"
30
- )
31
- return token
32
-
33
-
34
- # ==========================================
35
- # 2. APP SETUP
36
  # ==========================================
37
 
38
  app = FastAPI(
@@ -40,45 +17,50 @@ app = FastAPI(
40
  version="1.0.0",
41
  )
42
 
 
 
43
  tokenizer = None
44
  model = None
45
 
46
-
47
  # ==========================================
48
- # 3. DATA MODELS
49
  # ==========================================
50
 
51
  class ChatRequest(BaseModel):
52
- """
53
- Request schema for function-call generation.
54
- """
55
  query: str = Field(..., min_length=1, max_length=4096)
56
  tools: List[Dict[str, Any]]
57
  include_date: bool = True
58
 
59
-
60
  class HealthResponse(BaseModel):
61
  status: str
62
  model: str
63
- auth: str
64
-
65
 
66
  # ==========================================
67
- # 4. STARTUP (Auth + Load Model)
68
  # ==========================================
69
 
70
  @app.on_event("startup")
71
  async def startup():
72
  global tokenizer, model
73
 
74
- # A. Authenticate (fail-fast)
 
 
 
 
 
 
 
75
  try:
76
- hf_token = get_hf_token()
77
  login(token=hf_token)
78
- except (RuntimeError, HfHubHTTPError) as e:
79
- raise RuntimeError(f"Hugging Face authentication failed: {e}")
 
 
80
 
81
  # B. Load Model
 
82
  try:
83
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
84
  model = AutoModelForCausalLM.from_pretrained(
@@ -86,12 +68,13 @@ async def startup():
86
  device_map="cpu",
87
  torch_dtype=torch.float32,
88
  )
 
89
  except Exception as e:
 
90
  raise RuntimeError(f"Model load failed: {e}")
91
 
92
-
93
  # ==========================================
94
- # 5. API ENDPOINT
95
  # ==========================================
96
 
97
  @app.post("/generate")
@@ -137,11 +120,10 @@ async def generate_function_call(request: ChatRequest):
137
  except Exception as e:
138
  raise HTTPException(status_code=500, detail=str(e))
139
 
140
-
141
  @app.get("/", response_model=HealthResponse)
142
  def health_check():
143
  return {
144
  "status": "running",
145
  "model": MODEL_ID,
146
- "auth": "env",
147
- }
 
 
1
  from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel, Field
3
  from typing import List, Dict, Any
 
6
 
7
  import torch
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
9
+ from huggingface_hub import login
10
 
11
  # ==========================================
12
+ # 1. APP SETUP
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  # ==========================================
14
 
15
  app = FastAPI(
 
17
  version="1.0.0",
18
  )
19
 
20
+ # Global variables
21
+ MODEL_ID = "google/functiongemma-270m-it"
22
  tokenizer = None
23
  model = None
24
 
 
25
  # ==========================================
26
+ # 2. DATA MODELS
27
  # ==========================================
28
 
29
  class ChatRequest(BaseModel):
 
 
 
30
  query: str = Field(..., min_length=1, max_length=4096)
31
  tools: List[Dict[str, Any]]
32
  include_date: bool = True
33
 
 
34
  class HealthResponse(BaseModel):
35
  status: str
36
  model: str
37
+ auth_status: str
 
38
 
39
  # ==========================================
40
+ # 3. STARTUP (Auth + Load Model)
41
  # ==========================================
42
 
43
  @app.on_event("startup")
44
  async def startup():
45
  global tokenizer, model
46
 
47
+ # A. Authenticate using Environment Variable
48
+ print("πŸ” Checking for HF_TOKEN...")
49
+ hf_token = os.getenv("HF_TOKEN")
50
+
51
+ if not hf_token:
52
+ print("❌ Error: HF_TOKEN environment variable is missing.")
53
+ raise RuntimeError("HF_TOKEN environment variable is missing in Space Settings.")
54
+
55
  try:
 
56
  login(token=hf_token)
57
+ print("βœ… Authentication successful.")
58
+ except Exception as e:
59
+ print(f"❌ Authentication Failed: {e}")
60
+ raise RuntimeError(f"Hugging Face login failed: {e}")
61
 
62
  # B. Load Model
63
+ print(f"🧠 Loading Model: {MODEL_ID}...")
64
  try:
65
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
66
  model = AutoModelForCausalLM.from_pretrained(
 
68
  device_map="cpu",
69
  torch_dtype=torch.float32,
70
  )
71
+ print("βœ… Model Loaded Successfully.")
72
  except Exception as e:
73
+ print(f"❌ Model Load Failed: {e}")
74
  raise RuntimeError(f"Model load failed: {e}")
75
 
 
76
  # ==========================================
77
+ # 4. API ENDPOINT
78
  # ==========================================
79
 
80
  @app.post("/generate")
 
120
  except Exception as e:
121
  raise HTTPException(status_code=500, detail=str(e))
122
 
 
123
  @app.get("/", response_model=HealthResponse)
124
  def health_check():
125
  return {
126
  "status": "running",
127
  "model": MODEL_ID,
128
+ "auth_status": "secure_env",
129
+ }