bigbossmonster commited on
Commit
d678cf7
·
verified ·
1 Parent(s): 1aa3002

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +108 -65
app.py CHANGED
@@ -1,12 +1,16 @@
1
  import os
2
  import requests
3
  import json
 
4
  from fastapi import FastAPI, HTTPException
5
  from pydantic import BaseModel
6
 
7
  # --- CONFIGURATION ---
8
- # Load token from Hugging Face Secrets (Environment Variables)
9
- AI_SERVICE_TOKEN = os.environ.get("AI_SERVICE_TOKEN")
 
 
 
10
  API_URL = "https://models.inference.ai.azure.com/chat/completions"
11
  MODEL_NAME = "gpt-4o-mini"
12
 
@@ -19,67 +23,73 @@ app = FastAPI(
19
  class AnalyzeRequest(BaseModel):
20
  filename: str
21
 
 
 
 
 
 
 
 
22
  # --- ENDPOINTS ---
23
 
24
  @app.get("/")
25
  def home():
26
  """Health check endpoint."""
27
- return {"status": "active", "platform": "Hugging Face Spaces"}
28
 
29
  @app.get("/check-limit")
30
  def check_limit():
31
  """
32
- Checks the rate limit status of the configured AI Service Token.
33
  """
34
- if not AI_SERVICE_TOKEN:
35
- raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing.")
36
 
37
- headers = {
38
- "Authorization": f"Bearer {AI_SERVICE_TOKEN}",
39
- "Content-Type": "application/json"
40
- }
 
 
 
 
 
 
 
41
 
42
- # Minimal payload to trigger headers without consuming many tokens
43
- payload = {
44
- "model": MODEL_NAME,
45
- "messages": [{"role": "user", "content": "Ping."}],
46
- "temperature": 0.1,
47
- "max_tokens": 1
48
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- try:
51
- response = requests.post(API_URL, headers=headers, json=payload, timeout=10)
52
-
53
- # Extract standard rate limit headers
54
- remaining = response.headers.get('x-ratelimit-remaining-requests') or response.headers.get('x-ratelimit-remaining') or 'N/A'
55
- limit = response.headers.get('x-ratelimit-limit-requests') or response.headers.get('x-ratelimit-limit') or 'N/A'
56
- reset = response.headers.get('x-ratelimit-reset-requests') or response.headers.get('x-ratelimit-reset') or 'N/A'
57
-
58
- return {
59
- "status_code": response.status_code,
60
- "rate_limit_info": {
61
- "remaining": remaining,
62
- "limit": limit,
63
- "reset_time": reset
64
- },
65
- "message": "Token is valid." if response.status_code == 200 else f"Request failed: {response.status_code}"
66
- }
67
- except Exception as e:
68
- return {"error": str(e)}
69
 
70
  @app.post("/analyze")
71
  def analyze_filename(request: AnalyzeRequest):
72
  """
73
- Main endpoint to analyze filenames.
74
  """
75
- if not AI_SERVICE_TOKEN:
76
  raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing.")
77
 
78
- headers = {
79
- "Authorization": f"Bearer {AI_SERVICE_TOKEN}",
80
- "Content-Type": "application/json"
81
- }
82
-
83
  payload = {
84
  "model": MODEL_NAME,
85
  "messages": [
@@ -95,27 +105,60 @@ def analyze_filename(request: AnalyzeRequest):
95
  "temperature": 0.1
96
  }
97
 
98
- try:
99
- # 30-second timeout for analysis requests
100
- response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
101
-
102
- if response.status_code == 429:
103
- raise HTTPException(status_code=429, detail="Rate limit exceeded (429)")
104
-
105
- response.raise_for_status()
106
-
107
- data = response.json()
108
- content = data.get('choices', [{}])[0].get('message', {}).get('content')
109
-
110
- if content:
111
- # Clean up markdown if present to ensure valid JSON
112
- clean_content = content.replace("```json", "").replace("```", "").strip()
113
- try:
114
- return json.loads(clean_content)
115
- except json.JSONDecodeError:
116
- return {"error": "AI returned malformed JSON", "raw_content": clean_content}
117
 
118
- return {"error": "No content returned"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
- except Exception as e:
121
- raise HTTPException(status_code=500, detail=str(e))
 
1
  import os
2
  import requests
3
  import json
4
+ import random
5
  from fastapi import FastAPI, HTTPException
6
  from pydantic import BaseModel
7
 
8
  # --- CONFIGURATION ---
9
+ # Load tokens from Hugging Face Secrets (Environment Variables)
10
+ # Supports a single token or a comma-separated list of tokens for rotation
11
+ AI_SERVICE_TOKENS_RAW = os.environ.get("AI_SERVICE_TOKEN", "")
12
+ AI_SERVICE_TOKENS = [t.strip() for t in AI_SERVICE_TOKENS_RAW.split(",") if t.strip()]
13
+
14
  API_URL = "https://models.inference.ai.azure.com/chat/completions"
15
  MODEL_NAME = "gpt-4o-mini"
16
 
 
23
  class AnalyzeRequest(BaseModel):
24
  filename: str
25
 
26
+ # --- HELPERS ---
27
+ def get_headers(token):
28
+ return {
29
+ "Authorization": f"Bearer {token}",
30
+ "Content-Type": "application/json"
31
+ }
32
+
33
  # --- ENDPOINTS ---
34
 
35
  @app.get("/")
36
  def home():
37
  """Health check endpoint."""
38
+ return {"status": "active", "platform": "Hugging Face Spaces", "tokens_loaded": len(AI_SERVICE_TOKENS)}
39
 
40
  @app.get("/check-limit")
41
  def check_limit():
42
  """
43
+ Checks the rate limit status of the configured AI Service Tokens.
44
  """
45
+ if not AI_SERVICE_TOKENS:
46
+ raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing or empty.")
47
 
48
+ results = []
49
+
50
+ # Check each token individually
51
+ for i, token in enumerate(AI_SERVICE_TOKENS):
52
+ headers = get_headers(token)
53
+ payload = {
54
+ "model": MODEL_NAME,
55
+ "messages": [{"role": "user", "content": "Ping."}],
56
+ "temperature": 0.1,
57
+ "max_tokens": 1
58
+ }
59
 
60
+ try:
61
+ response = requests.post(API_URL, headers=headers, json=payload, timeout=10)
62
+
63
+ # Extract standard rate limit headers
64
+ remaining = response.headers.get('x-ratelimit-remaining-requests') or response.headers.get('x-ratelimit-remaining') or 'N/A'
65
+ limit = response.headers.get('x-ratelimit-limit-requests') or response.headers.get('x-ratelimit-limit') or 'N/A'
66
+ reset = response.headers.get('x-ratelimit-reset-requests') or response.headers.get('x-ratelimit-reset') or 'N/A'
67
+
68
+ token_status = {
69
+ "token_index": i,
70
+ "status_code": response.status_code,
71
+ "rate_limit_info": {
72
+ "remaining": remaining,
73
+ "limit": limit,
74
+ "reset_time": reset
75
+ },
76
+ "message": "Token is valid." if response.status_code == 200 else f"Request failed: {response.status_code}"
77
+ }
78
+ results.append(token_status)
79
+
80
+ except Exception as e:
81
+ results.append({"token_index": i, "error": str(e)})
82
 
83
+ return {"tokens_checked": len(results), "results": results}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  @app.post("/analyze")
86
  def analyze_filename(request: AnalyzeRequest):
87
  """
88
+ Main endpoint to analyze filenames with token rotation on 429.
89
  """
90
+ if not AI_SERVICE_TOKENS:
91
  raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing.")
92
 
 
 
 
 
 
93
  payload = {
94
  "model": MODEL_NAME,
95
  "messages": [
 
105
  "temperature": 0.1
106
  }
107
 
108
+ # Try each token until one works or all fail
109
+ # Shuffle simply to distribute load if we have multiple valid tokens,
110
+ # though deterministic iteration is also fine.
111
+ tokens_to_try = list(AI_SERVICE_TOKENS)
112
+ # random.shuffle(tokens_to_try) # Optional: Randomize order
113
+
114
+ last_error_detail = "Unknown error"
115
+
116
+ for token in tokens_to_try:
117
+ headers = get_headers(token)
 
 
 
 
 
 
 
 
 
118
 
119
+ try:
120
+ # 30-second timeout for analysis requests
121
+ response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
122
+
123
+ # If rate limited, log it and continue to the next token
124
+ if response.status_code == 429:
125
+ print(f"Token ending in ...{token[-4:]} hit rate limit (429). Trying next token.")
126
+ last_error_detail = "Rate limit exceeded (429)"
127
+ continue
128
+
129
+ # If 401/403 (Auth error), also try next token
130
+ if response.status_code in [401, 403]:
131
+ print(f"Token ending in ...{token[-4:]} failed auth ({response.status_code}). Trying next token.")
132
+ last_error_detail = f"Auth failed ({response.status_code})"
133
+ continue
134
+
135
+ response.raise_for_status()
136
+
137
+ data = response.json()
138
+ content = data.get('choices', [{}])[0].get('message', {}).get('content')
139
+
140
+ if content:
141
+ # Clean up markdown if present to ensure valid JSON
142
+ clean_content = content.replace("```json", "").replace("```", "").strip()
143
+ try:
144
+ return json.loads(clean_content)
145
+ except json.JSONDecodeError:
146
+ return {"error": "AI returned malformed JSON", "raw_content": clean_content}
147
+
148
+ return {"error": "No content returned"}
149
+
150
+ except requests.exceptions.RequestException as e:
151
+ # Network errors might be transient, could retry or fail.
152
+ # Here we treat it as a failure for this token and try next.
153
+ print(f"Network error with token ...{token[-4:]}: {e}")
154
+ last_error_detail = str(e)
155
+ continue
156
+ except Exception as e:
157
+ print(f"Unexpected error with token ...{token[-4:]}: {e}")
158
+ last_error_detail = str(e)
159
+ # Depending on severity, might want to break or continue.
160
+ # We'll continue to be safe.
161
+ continue
162
 
163
+ # If we exit the loop, all tokens failed
164
+ raise HTTPException(status_code=429, detail=f"All tokens failed. Last error: {last_error_detail}")