bigbossmonster commited on
Commit
4004dc2
·
verified ·
1 Parent(s): 09af3ee

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +164 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ import json
4
+ import random
5
+ from fastapi import FastAPI, HTTPException
6
+ from pydantic import BaseModel
7
+
8
+ # --- CONFIGURATION ---
9
+ # Load tokens from Hugging Face Secrets (Environment Variables)
10
+ # Supports a single token or a comma-separated list of tokens for rotation
11
+ AI_SERVICE_TOKENS_RAW = os.environ.get("AI_SERVICE_TOKEN", "")
12
+ AI_SERVICE_TOKENS = [t.strip() for t in AI_SERVICE_TOKENS_RAW.split(",") if t.strip()]
13
+
14
+ API_URL = "https://models.inference.ai.azure.com/chat/completions"
15
+ MODEL_NAME = "gpt-4o"
16
+
17
+ app = FastAPI(
18
+ title="AI Backend Service",
19
+ description="Running on Hugging Face Spaces (Docker SDK)"
20
+ )
21
+
22
+ # --- MODELS ---
23
+ class AnalyzeRequest(BaseModel):
24
+ filename: str
25
+
26
+ # --- HELPERS ---
27
+ def get_headers(token):
28
+ return {
29
+ "Authorization": f"Bearer {token}",
30
+ "Content-Type": "application/json"
31
+ }
32
+
33
+ # --- ENDPOINTS ---
34
+
35
+ @app.get("/")
36
+ def home():
37
+ """Health check endpoint."""
38
+ return {"status": "active", "platform": "Hugging Face Spaces", "tokens_loaded": len(AI_SERVICE_TOKENS)}
39
+
40
+ @app.get("/check-limit")
41
+ def check_limit():
42
+ """
43
+ Checks the rate limit status of the configured AI Service Tokens.
44
+ """
45
+ if not AI_SERVICE_TOKENS:
46
+ raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing or empty.")
47
+
48
+ results = []
49
+
50
+ # Check each token individually
51
+ for i, token in enumerate(AI_SERVICE_TOKENS):
52
+ headers = get_headers(token)
53
+ payload = {
54
+ "model": MODEL_NAME,
55
+ "messages": [{"role": "user", "content": "Ping."}],
56
+ "temperature": 0.1,
57
+ "max_tokens": 1
58
+ }
59
+
60
+ try:
61
+ response = requests.post(API_URL, headers=headers, json=payload, timeout=10)
62
+
63
+ # Extract standard rate limit headers
64
+ remaining = response.headers.get('x-ratelimit-remaining-requests') or response.headers.get('x-ratelimit-remaining') or 'N/A'
65
+ limit = response.headers.get('x-ratelimit-limit-requests') or response.headers.get('x-ratelimit-limit') or 'N/A'
66
+ reset = response.headers.get('x-ratelimit-reset-requests') or response.headers.get('x-ratelimit-reset') or 'N/A'
67
+
68
+ token_status = {
69
+ "token_index": i,
70
+ "status_code": response.status_code,
71
+ "rate_limit_info": {
72
+ "remaining": remaining,
73
+ "limit": limit,
74
+ "reset_time": reset
75
+ },
76
+ "message": "Token is valid." if response.status_code == 200 else f"Request failed: {response.status_code}"
77
+ }
78
+ results.append(token_status)
79
+
80
+ except Exception as e:
81
+ results.append({"token_index": i, "error": str(e)})
82
+
83
+ return {"tokens_checked": len(results), "results": results}
84
+
85
+ @app.post("/analyze")
86
+ def analyze_filename(request: AnalyzeRequest):
87
+ """
88
+ Main endpoint to analyze filenames with token rotation on 429.
89
+ """
90
+ if not AI_SERVICE_TOKENS:
91
+ raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing.")
92
+
93
+ payload = {
94
+ "model": MODEL_NAME,
95
+ "messages": [
96
+ {
97
+ "role": "system",
98
+ "content": "You are an expert Movie and TV metadata analyst. Return ONLY raw JSON in the format: {\"title\": \"...\", \"year\": \"...\", \"isSeries\": false/true}. Analyze the following filename and extract the data."
99
+ },
100
+ {
101
+ "role": "user",
102
+ "content": f"Analyze: \"{request.filename}\""
103
+ }
104
+ ],
105
+ "temperature": 0.1
106
+ }
107
+
108
+ # Try each token until one works or all fail
109
+ # Shuffle simply to distribute load if we have multiple valid tokens,
110
+ # though deterministic iteration is also fine.
111
+ tokens_to_try = list(AI_SERVICE_TOKENS)
112
+ # random.shuffle(tokens_to_try) # Optional: Randomize order
113
+
114
+ last_error_detail = "Unknown error"
115
+
116
+ for token in tokens_to_try:
117
+ headers = get_headers(token)
118
+
119
+ try:
120
+ # 30-second timeout for analysis requests
121
+ response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
122
+
123
+ # If rate limited, log it and continue to the next token
124
+ if response.status_code == 429:
125
+ print(f"Token ending in ...{token[-4:]} hit rate limit (429). Trying next token.")
126
+ last_error_detail = "Rate limit exceeded (429)"
127
+ continue
128
+
129
+ # If 401/403 (Auth error), also try next token
130
+ if response.status_code in [401, 403]:
131
+ print(f"Token ending in ...{token[-4:]} failed auth ({response.status_code}). Trying next token.")
132
+ last_error_detail = f"Auth failed ({response.status_code})"
133
+ continue
134
+
135
+ response.raise_for_status()
136
+
137
+ data = response.json()
138
+ content = data.get('choices', [{}])[0].get('message', {}).get('content')
139
+
140
+ if content:
141
+ # Clean up markdown if present to ensure valid JSON
142
+ clean_content = content.replace("```json", "").replace("```", "").strip()
143
+ try:
144
+ return json.loads(clean_content)
145
+ except json.JSONDecodeError:
146
+ return {"error": "AI returned malformed JSON", "raw_content": clean_content}
147
+
148
+ return {"error": "No content returned"}
149
+
150
+ except requests.exceptions.RequestException as e:
151
+ # Network errors might be transient, could retry or fail.
152
+ # Here we treat it as a failure for this token and try next.
153
+ print(f"Network error with token ...{token[-4:]}: {e}")
154
+ last_error_detail = str(e)
155
+ continue
156
+ except Exception as e:
157
+ print(f"Unexpected error with token ...{token[-4:]}: {e}")
158
+ last_error_detail = str(e)
159
+ # Depending on severity, might want to break or continue.
160
+ # We'll continue to be safe.
161
+ continue
162
+
163
+ # If we exit the loop, all tokens failed
164
+ raise HTTPException(status_code=429, detail=f"All tokens failed. Last error: {last_error_detail}")
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ requests
4
+ pydantic