bigbossmonster commited on
Commit
e4d3a9b
·
verified ·
1 Parent(s): 4004dc2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -95
app.py CHANGED
@@ -1,18 +1,21 @@
1
  import os
2
  import requests
3
  import json
4
- import random
5
  from fastapi import FastAPI, HTTPException
6
  from pydantic import BaseModel
7
 
8
  # --- CONFIGURATION ---
9
- # Load tokens from Hugging Face Secrets (Environment Variables)
10
- # Supports a single token or a comma-separated list of tokens for rotation
11
  AI_SERVICE_TOKENS_RAW = os.environ.get("AI_SERVICE_TOKEN", "")
12
  AI_SERVICE_TOKENS = [t.strip() for t in AI_SERVICE_TOKENS_RAW.split(",") if t.strip()]
 
 
13
 
14
- API_URL = "https://models.inference.ai.azure.com/chat/completions"
15
- MODEL_NAME = "gpt-4o"
 
 
 
16
 
17
  app = FastAPI(
18
  title="AI Backend Service",
@@ -22,6 +25,7 @@ app = FastAPI(
22
  # --- MODELS ---
23
  class AnalyzeRequest(BaseModel):
24
  filename: str
 
25
 
26
  # --- HELPERS ---
27
  def get_headers(token):
@@ -35,130 +39,141 @@ def get_headers(token):
35
  @app.get("/")
36
  def home():
37
  """Health check endpoint."""
38
- return {"status": "active", "platform": "Hugging Face Spaces", "tokens_loaded": len(AI_SERVICE_TOKENS)}
 
 
 
 
 
39
 
40
  @app.get("/check-limit")
41
  def check_limit():
42
  """
43
- Checks the rate limit status of the configured AI Service Tokens.
44
  """
45
  if not AI_SERVICE_TOKENS:
46
- raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing or empty.")
47
 
48
  results = []
49
 
50
- # Check each token individually
51
  for i, token in enumerate(AI_SERVICE_TOKENS):
52
  headers = get_headers(token)
53
  payload = {
54
- "model": MODEL_NAME,
55
  "messages": [{"role": "user", "content": "Ping."}],
56
  "temperature": 0.1,
57
- "max_tokens": 1
58
  }
59
 
60
  try:
61
- response = requests.post(API_URL, headers=headers, json=payload, timeout=10)
62
-
63
- # Extract standard rate limit headers
64
- remaining = response.headers.get('x-ratelimit-remaining-requests') or response.headers.get('x-ratelimit-remaining') or 'N/A'
65
- limit = response.headers.get('x-ratelimit-limit-requests') or response.headers.get('x-ratelimit-limit') or 'N/A'
66
- reset = response.headers.get('x-ratelimit-reset-requests') or response.headers.get('x-ratelimit-reset') or 'N/A'
67
-
68
  token_status = {
69
  "token_index": i,
70
  "status_code": response.status_code,
71
- "rate_limit_info": {
72
- "remaining": remaining,
73
- "limit": limit,
74
- "reset_time": reset
75
- },
76
- "message": "Token is valid." if response.status_code == 200 else f"Request failed: {response.status_code}"
77
  }
78
  results.append(token_status)
79
-
80
  except Exception as e:
81
- results.append({"token_index": i, "error": str(e)})
82
 
83
  return {"tokens_checked": len(results), "results": results}
84
 
85
- @app.post("/analyze")
86
- def analyze_filename(request: AnalyzeRequest):
87
- """
88
- Main endpoint to analyze filenames with token rotation on 429.
89
- """
90
- if not AI_SERVICE_TOKENS:
91
- raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing.")
92
-
93
  payload = {
94
- "model": MODEL_NAME,
95
  "messages": [
96
- {
97
- "role": "system",
98
- "content": "You are an expert Movie and TV metadata analyst. Return ONLY raw JSON in the format: {\"title\": \"...\", \"year\": \"...\", \"isSeries\": false/true}. Analyze the following filename and extract the data."
99
- },
100
- {
101
- "role": "user",
102
- "content": f"Analyze: \"{request.filename}\""
103
- }
104
  ],
105
- "temperature": 0.1
 
106
  }
107
 
108
- # Try each token until one works or all fail
109
- # Shuffle simply to distribute load if we have multiple valid tokens,
110
- # though deterministic iteration is also fine.
111
- tokens_to_try = list(AI_SERVICE_TOKENS)
112
- # random.shuffle(tokens_to_try) # Optional: Randomize order
113
-
114
- last_error_detail = "Unknown error"
115
-
116
- for token in tokens_to_try:
117
- headers = get_headers(token)
118
-
119
  try:
120
- # 30-second timeout for analysis requests
121
- response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
122
-
123
- # If rate limited, log it and continue to the next token
124
- if response.status_code == 429:
125
- print(f"Token ending in ...{token[-4:]} hit rate limit (429). Trying next token.")
126
- last_error_detail = "Rate limit exceeded (429)"
127
- continue
128
-
129
- # If 401/403 (Auth error), also try next token
130
- if response.status_code in [401, 403]:
131
- print(f"Token ending in ...{token[-4:]} failed auth ({response.status_code}). Trying next token.")
132
- last_error_detail = f"Auth failed ({response.status_code})"
133
  continue
 
 
 
 
 
 
134
 
135
- response.raise_for_status()
136
-
137
- data = response.json()
138
- content = data.get('choices', [{}])[0].get('message', {}).get('content')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
- if content:
141
- # Clean up markdown if present to ensure valid JSON
142
- clean_content = content.replace("```json", "").replace("```", "").strip()
143
- try:
144
- return json.loads(clean_content)
145
- except json.JSONDecodeError:
146
- return {"error": "AI returned malformed JSON", "raw_content": clean_content}
147
 
148
- return {"error": "No content returned"}
149
-
150
- except requests.exceptions.RequestException as e:
151
- # Network errors might be transient, could retry or fail.
152
- # Here we treat it as a failure for this token and try next.
153
- print(f"Network error with token ...{token[-4:]}: {e}")
154
- last_error_detail = str(e)
155
- continue
156
- except Exception as e:
157
- print(f"Unexpected error with token ...{token[-4:]}: {e}")
158
- last_error_detail = str(e)
159
- # Depending on severity, might want to break or continue.
160
- # We'll continue to be safe.
161
- continue
162
 
163
- # If we exit the loop, all tokens failed
164
- raise HTTPException(status_code=429, detail=f"All tokens failed. Last error: {last_error_detail}")
 
 
1
  import os
2
  import requests
3
  import json
 
4
  from fastapi import FastAPI, HTTPException
5
  from pydantic import BaseModel
6
 
7
  # --- CONFIGURATION ---
8
+ # 1. OpenAI/Azure Configuration
 
9
  AI_SERVICE_TOKENS_RAW = os.environ.get("AI_SERVICE_TOKEN", "")
10
  AI_SERVICE_TOKENS = [t.strip() for t in AI_SERVICE_TOKENS_RAW.split(",") if t.strip()]
11
+ OPENAI_API_URL = "https://models.inference.ai.azure.com/chat/completions"
12
+ OPENAI_MODEL_NAME = "gpt-4o-mini"
13
 
14
+ # 2. Google Gemini Configuration (Direct Google API)
15
+ # You need to set GOOGLE_API_KEY in your HF Space secrets
16
+ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
17
+ # Use the stable Gemini 1.5 Flash model which supports JSON mode reliable
18
+ GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemma-3-27b-it:generateContent?key={GOOGLE_API_KEY}"
19
 
20
  app = FastAPI(
21
  title="AI Backend Service",
 
25
  # --- MODELS ---
26
  class AnalyzeRequest(BaseModel):
27
  filename: str
28
+ model_provider: str = "openai" # 'openai' or 'gemma' (maps to Gemini)
29
 
30
  # --- HELPERS ---
31
  def get_headers(token):
 
39
  @app.get("/")
40
  def home():
41
  """Health check endpoint."""
42
+ return {
43
+ "status": "active",
44
+ "platform": "Hugging Face Spaces",
45
+ "tokens_loaded": len(AI_SERVICE_TOKENS),
46
+ "google_api_enabled": bool(GOOGLE_API_KEY)
47
+ }
48
 
49
  @app.get("/check-limit")
50
  def check_limit():
51
  """
52
+ Checks the rate limit status of ALL configured AI Service Tokens (OpenAI only).
53
  """
54
  if not AI_SERVICE_TOKENS:
55
+ return {"tokens_checked": 0, "results": [], "note": "OpenAI tokens missing"}
56
 
57
  results = []
58
 
 
59
  for i, token in enumerate(AI_SERVICE_TOKENS):
60
  headers = get_headers(token)
61
  payload = {
62
+ "model": OPENAI_MODEL_NAME,
63
  "messages": [{"role": "user", "content": "Ping."}],
64
  "temperature": 0.1,
65
+ "max_tokens": 1
66
  }
67
 
68
  try:
69
+ response = requests.post(OPENAI_API_URL, headers=headers, json=payload, timeout=10)
 
 
 
 
 
 
70
  token_status = {
71
  "token_index": i,
72
  "status_code": response.status_code,
73
+ "valid": response.status_code == 200,
74
+ "remaining": response.headers.get('x-ratelimit-remaining-requests', 'N/A')
 
 
 
 
75
  }
76
  results.append(token_status)
 
77
  except Exception as e:
78
+ results.append({"token_index": i, "status_code": "ERROR", "error": str(e)})
79
 
80
  return {"tokens_checked": len(results), "results": results}
81
 
82
+ def call_openai_gpt4o(filename, tokens):
 
 
 
 
 
 
 
83
  payload = {
84
+ "model": OPENAI_MODEL_NAME,
85
  "messages": [
86
+ {"role": "system", "content": "You are an expert Movie and TV metadata analyst. Return ONLY raw JSON in the format: {\"title\": \"...\", \"year\": \"...\", \"isSeries\": false/true}. Analyze the following filename and extract the data."},
87
+ {"role": "user", "content": f"Analyze: \"{filename}\""}
 
 
 
 
 
 
88
  ],
89
+ "temperature": 0.1,
90
+ "max_tokens": 500
91
  }
92
 
93
+ last_error = ""
94
+ for i, token in enumerate(tokens):
 
 
 
 
 
 
 
 
 
95
  try:
96
+ response = requests.post(OPENAI_API_URL, headers=get_headers(token), json=payload, timeout=30)
97
+ if response.status_code == 200:
98
+ content = response.json().get('choices', [{}])[0].get('message', {}).get('content')
99
+ return content
100
+ elif response.status_code in [429, 401, 403]:
101
+ last_error = f"Token {i}: {response.status_code}"
 
 
 
 
 
 
 
102
  continue
103
+ else:
104
+ last_error = f"Token {i} Error: {response.text}"
105
+ except Exception as e:
106
+ last_error = str(e)
107
+ continue
108
+ raise Exception(f"OpenAI All tokens failed. Last: {last_error}")
109
 
110
+ def call_google_gemini(filename):
111
+ if not GOOGLE_API_KEY:
112
+ raise Exception("GOOGLE_API_KEY not configured.")
113
+
114
+ # Construct the Gemini payload
115
+ prompt = f"""
116
+ You are an expert Movie and TV metadata analyst.
117
+ Analyze the filename: "{filename}"
118
+ Identify the title, year, and whether it is a series.
119
+ Return ONLY a raw JSON object with this exact format:
120
+ {{"title": "Movie Title", "year": "2024", "isSeries": false}}
121
+ """
122
+
123
+ payload = {
124
+ "contents": [{
125
+ "parts": [{"text": prompt}]
126
+ }],
127
+ "generationConfig": {
128
+ "temperature": 0.1,
129
+ "maxOutputTokens": 100,
130
+ # Removed strict responseMimeType to avoid 400 error on some models
131
+ # "responseMimeType": "application/json"
132
+ }
133
+ }
134
+
135
+ response = requests.post(GEMINI_API_URL, headers={"Content-Type": "application/json"}, json=payload, timeout=30)
136
+
137
+ if response.status_code != 200:
138
+ raise Exception(f"Google Gemini API Error {response.status_code}: {response.text}")
139
+
140
+ result = response.json()
141
+ # Extract text from Gemini response structure
142
+ try:
143
+ return result['candidates'][0]['content']['parts'][0]['text']
144
+ except (KeyError, IndexError):
145
+ raise Exception(f"Unexpected response structure from Gemini: {str(result)}")
146
+
147
+ @app.post("/analyze")
148
+ def analyze_filename(request: AnalyzeRequest):
149
+ """
150
+ Analyze filename using selected provider (openai or gemma/gemini).
151
+ """
152
+ raw_content = ""
153
+ provider_used = request.model_provider
154
+
155
+ try:
156
+ if provider_used == "gemma":
157
+ # Although the frontend sends "gemma", we map this to our Google Gemini function
158
+ raw_content = call_google_gemini(request.filename)
159
+ else:
160
+ # Default to OpenAI
161
+ if not AI_SERVICE_TOKENS: raise HTTPException(500, "OpenAI tokens missing.")
162
+ raw_content = call_openai_gpt4o(request.filename, AI_SERVICE_TOKENS)
163
+
164
+ # Parse JSON output from either provider
165
+ if raw_content:
166
+ clean_content = raw_content.replace("```json", "").replace("```", "").strip()
167
+ # Simple extraction of JSON object if surrounded by text
168
+ start = clean_content.find('{')
169
+ end = clean_content.rfind('}') + 1
170
+ if start != -1 and end != -1:
171
+ clean_content = clean_content[start:end]
172
 
173
+ return json.loads(clean_content)
 
 
 
 
 
 
174
 
175
+ return {"error": "No content returned", "provider": provider_used}
 
 
 
 
 
 
 
 
 
 
 
 
 
176
 
177
+ except Exception as e:
178
+ print(f"Analysis Error ({provider_used}): {e}")
179
+ raise HTTPException(status_code=500, detail=f"Analysis failed ({provider_used}): {str(e)}")