bigbossmonster commited on
Commit
e23b777
·
verified ·
1 Parent(s): 89f7ea1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -96
app.py CHANGED
@@ -1,18 +1,22 @@
1
  import os
2
  import requests
3
  import json
4
- import random
5
  from fastapi import FastAPI, HTTPException
6
  from pydantic import BaseModel
7
 
8
  # --- CONFIGURATION ---
9
- # Load tokens from Hugging Face Secrets (Environment Variables)
10
- # Supports a single token or a comma-separated list of tokens for rotation
11
  AI_SERVICE_TOKENS_RAW = os.environ.get("AI_SERVICE_TOKEN", "")
12
  AI_SERVICE_TOKENS = [t.strip() for t in AI_SERVICE_TOKENS_RAW.split(",") if t.strip()]
 
 
13
 
14
- API_URL = "https://models.inference.ai.azure.com/chat/completions"
15
- MODEL_NAME = "gpt-4o"
 
 
 
16
 
17
  app = FastAPI(
18
  title="AI Backend Service",
@@ -22,6 +26,7 @@ app = FastAPI(
22
  # --- MODELS ---
23
  class AnalyzeRequest(BaseModel):
24
  filename: str
 
25
 
26
  # --- HELPERS ---
27
  def get_headers(token):
@@ -35,130 +40,136 @@ def get_headers(token):
35
  @app.get("/")
36
  def home():
37
  """Health check endpoint."""
38
- return {"status": "active", "platform": "Hugging Face Spaces", "tokens_loaded": len(AI_SERVICE_TOKENS)}
 
 
 
 
 
39
 
40
  @app.get("/check-limit")
41
  def check_limit():
42
  """
43
- Checks the rate limit status of the configured AI Service Tokens.
 
44
  """
45
  if not AI_SERVICE_TOKENS:
46
- raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing or empty.")
 
47
 
48
  results = []
49
 
50
- # Check each token individually
51
  for i, token in enumerate(AI_SERVICE_TOKENS):
52
  headers = get_headers(token)
53
  payload = {
54
- "model": MODEL_NAME,
55
  "messages": [{"role": "user", "content": "Ping."}],
56
  "temperature": 0.1,
57
- "max_tokens": 1
58
  }
59
 
60
  try:
61
- response = requests.post(API_URL, headers=headers, json=payload, timeout=10)
62
-
63
- # Extract standard rate limit headers
64
- remaining = response.headers.get('x-ratelimit-remaining-requests') or response.headers.get('x-ratelimit-remaining') or 'N/A'
65
- limit = response.headers.get('x-ratelimit-limit-requests') or response.headers.get('x-ratelimit-limit') or 'N/A'
66
- reset = response.headers.get('x-ratelimit-reset-requests') or response.headers.get('x-ratelimit-reset') or 'N/A'
67
-
68
  token_status = {
69
  "token_index": i,
70
  "status_code": response.status_code,
71
- "rate_limit_info": {
72
- "remaining": remaining,
73
- "limit": limit,
74
- "reset_time": reset
75
- },
76
- "message": "Token is valid." if response.status_code == 200 else f"Request failed: {response.status_code}"
77
  }
78
  results.append(token_status)
79
-
80
  except Exception as e:
81
- results.append({"token_index": i, "error": str(e)})
82
 
83
  return {"tokens_checked": len(results), "results": results}
84
 
85
- @app.post("/analyze")
86
- def analyze_filename(request: AnalyzeRequest):
87
- """
88
- Main endpoint to analyze filenames with token rotation on 429.
89
- """
90
- if not AI_SERVICE_TOKENS:
91
- raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing.")
92
-
93
  payload = {
94
- "model": MODEL_NAME,
95
  "messages": [
96
- {
97
- "role": "system",
98
- "content": "You are an expert Movie and TV metadata analyst. Return ONLY raw JSON in the format: {\"title\": \"...\", \"year\": \"...\", \"isSeries\": false/true}. Analyze the following filename and extract the data."
99
- },
100
- {
101
- "role": "user",
102
- "content": f"Analyze: \"{request.filename}\""
103
- }
104
  ],
105
- "temperature": 0.1
 
106
  }
107
 
108
- # Try each token until one works or all fail
109
- # Shuffle simply to distribute load if we have multiple valid tokens,
110
- # though deterministic iteration is also fine.
111
- tokens_to_try = list(AI_SERVICE_TOKENS)
112
- # random.shuffle(tokens_to_try) # Optional: Randomize order
113
-
114
- last_error_detail = "Unknown error"
115
-
116
- for token in tokens_to_try:
117
- headers = get_headers(token)
118
-
119
  try:
120
- # 30-second timeout for analysis requests
121
- response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
122
-
123
- # If rate limited, log it and continue to the next token
124
- if response.status_code == 429:
125
- print(f"Token ending in ...{token[-4:]} hit rate limit (429). Trying next token.")
126
- last_error_detail = "Rate limit exceeded (429)"
127
  continue
128
-
129
- # If 401/403 (Auth error), also try next token
130
- if response.status_code in [401, 403]:
131
- print(f"Token ending in ...{token[-4:]} failed auth ({response.status_code}). Trying next token.")
132
- last_error_detail = f"Auth failed ({response.status_code})"
133
- continue
134
-
135
- response.raise_for_status()
136
-
137
- data = response.json()
138
- content = data.get('choices', [{}])[0].get('message', {}).get('content')
139
-
140
- if content:
141
- # Clean up markdown if present to ensure valid JSON
142
- clean_content = content.replace("```json", "").replace("```", "").strip()
143
- try:
144
- return json.loads(clean_content)
145
- except json.JSONDecodeError:
146
- return {"error": "AI returned malformed JSON", "raw_content": clean_content}
147
-
148
- return {"error": "No content returned"}
149
-
150
- except requests.exceptions.RequestException as e:
151
- # Network errors might be transient, could retry or fail.
152
- # Here we treat it as a failure for this token and try next.
153
- print(f"Network error with token ...{token[-4:]}: {e}")
154
- last_error_detail = str(e)
155
- continue
156
  except Exception as e:
157
- print(f"Unexpected error with token ...{token[-4:]}: {e}")
158
- last_error_detail = str(e)
159
- # Depending on severity, might want to break or continue.
160
- # We'll continue to be safe.
161
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
162
 
163
- # If we exit the loop, all tokens failed
164
- raise HTTPException(status_code=429, detail=f"All tokens failed. Last error: {last_error_detail}")
 
 
1
  import os
2
  import requests
3
  import json
4
+ import time
5
  from fastapi import FastAPI, HTTPException
6
  from pydantic import BaseModel
7
 
8
  # --- CONFIGURATION ---
9
+ # 1. OpenAI/Azure Configuration
 
10
  AI_SERVICE_TOKENS_RAW = os.environ.get("AI_SERVICE_TOKEN", "")
11
  AI_SERVICE_TOKENS = [t.strip() for t in AI_SERVICE_TOKENS_RAW.split(",") if t.strip()]
12
+ OPENAI_API_URL = "https://models.inference.ai.azure.com/chat/completions"
13
+ OPENAI_MODEL_NAME = "gpt-4o-mini"
14
 
15
+ # 2. Google Gemini Configuration (Direct Google API)
16
+ # You need to set GOOGLE_API_KEY in your HF Space secrets
17
+ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
18
+ # Using the Gemini 1.5 Flash model for speed/cost effectiveness
19
+ GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemma-3-27b-it:generateContent?key={GOOGLE_API_KEY}"
20
 
21
  app = FastAPI(
22
  title="AI Backend Service",
 
26
  # --- MODELS ---
27
  class AnalyzeRequest(BaseModel):
28
  filename: str
29
+ model_provider: str = "openai" # 'openai' or 'gemma' (maps to Gemini)
30
 
31
  # --- HELPERS ---
32
  def get_headers(token):
 
40
  @app.get("/")
41
  def home():
42
  """Health check endpoint."""
43
+ return {
44
+ "status": "active",
45
+ "platform": "Hugging Face Spaces",
46
+ "tokens_loaded": len(AI_SERVICE_TOKENS),
47
+ "google_api_enabled": bool(GOOGLE_API_KEY)
48
+ }
49
 
50
  @app.get("/check-limit")
51
  def check_limit():
52
  """
53
+ Checks the rate limit status of OpenAI tokens.
54
+ (Google API doesn't provide easy rate limit headers in the same way, skipped for now).
55
  """
56
  if not AI_SERVICE_TOKENS:
57
+ # Just return empty if no OpenAI tokens, but don't crash if Google is used
58
+ return {"tokens_checked": 0, "results": [], "note": "OpenAI tokens missing"}
59
 
60
  results = []
61
 
 
62
  for i, token in enumerate(AI_SERVICE_TOKENS):
63
  headers = get_headers(token)
64
  payload = {
65
+ "model": OPENAI_MODEL_NAME,
66
  "messages": [{"role": "user", "content": "Ping."}],
67
  "temperature": 0.1,
68
+ "max_tokens": 1
69
  }
70
 
71
  try:
72
+ response = requests.post(OPENAI_API_URL, headers=headers, json=payload, timeout=10)
 
 
 
 
 
 
73
  token_status = {
74
  "token_index": i,
75
  "status_code": response.status_code,
76
+ "valid": response.status_code == 200,
77
+ "remaining": response.headers.get('x-ratelimit-remaining-requests', 'N/A')
 
 
 
 
78
  }
79
  results.append(token_status)
 
80
  except Exception as e:
81
+ results.append({"token_index": i, "status_code": "ERROR", "error": str(e)})
82
 
83
  return {"tokens_checked": len(results), "results": results}
84
 
85
+ def call_openai_gpt4o(filename, tokens):
 
 
 
 
 
 
 
86
  payload = {
87
+ "model": OPENAI_MODEL_NAME,
88
  "messages": [
89
+ {"role": "system", "content": "You are an expert Movie and TV metadata analyst. Return ONLY raw JSON in the format: {\"title\": \"...\", \"year\": \"...\", \"isSeries\": false/true}. Analyze the following filename and extract the data."},
90
+ {"role": "user", "content": f"Analyze: \"{filename}\""}
 
 
 
 
 
 
91
  ],
92
+ "temperature": 0.1,
93
+ "max_tokens": 500
94
  }
95
 
96
+ last_error = ""
97
+ for i, token in enumerate(tokens):
 
 
 
 
 
 
 
 
 
98
  try:
99
+ response = requests.post(OPENAI_API_URL, headers=get_headers(token), json=payload, timeout=30)
100
+ if response.status_code == 200:
101
+ content = response.json().get('choices', [{}])[0].get('message', {}).get('content')
102
+ return content
103
+ elif response.status_code in [429, 401, 403]:
104
+ last_error = f"Token {i}: {response.status_code}"
 
105
  continue
106
+ else:
107
+ last_error = f"Token {i} Error: {response.text}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  except Exception as e:
109
+ last_error = str(e)
 
 
 
110
  continue
111
+ raise Exception(f"OpenAI All tokens failed. Last: {last_error}")
112
+
113
+ def call_google_gemini(filename):
114
+ if not GOOGLE_API_KEY:
115
+ raise Exception("GOOGLE_API_KEY not configured.")
116
+
117
+ # Construct the Gemini payload
118
+ prompt = f"""
119
+ You are an expert Movie and TV metadata analyst.
120
+ Analyze the filename: "{filename}"
121
+ Identify the title, year, and whether it is a series.
122
+ Return ONLY a raw JSON object with this exact format:
123
+ {{"title": "Movie Title", "year": "2024", "isSeries": false}}
124
+ """
125
+
126
+ payload = {
127
+ "contents": [{
128
+ "parts": [{"text": prompt}]
129
+ }],
130
+ "generationConfig": {
131
+ "temperature": 0.1,
132
+ "maxOutputTokens": 100,
133
+ "responseMimeType": "application/json" # Gemini supports JSON mode natively
134
+ }
135
+ }
136
+
137
+ response = requests.post(GEMINI_API_URL, headers={"Content-Type": "application/json"}, json=payload, timeout=30)
138
+
139
+ if response.status_code != 200:
140
+ raise Exception(f"Google Gemini API Error {response.status_code}: {response.text}")
141
+
142
+ result = response.json()
143
+ # Extract text from Gemini response structure
144
+ try:
145
+ return result['candidates'][0]['content']['parts'][0]['text']
146
+ except (KeyError, IndexError):
147
+ raise Exception(f"Unexpected response structure from Gemini: {str(result)}")
148
+
149
+ @app.post("/analyze")
150
+ def analyze_filename(request: AnalyzeRequest):
151
+ """
152
+ Analyze filename using selected provider (openai or gemma/gemini).
153
+ """
154
+ raw_content = ""
155
+ provider_used = request.model_provider
156
+
157
+ try:
158
+ if provider_used == "gemma":
159
+ # Although the frontend sends "gemma", we map this to our Google Gemini function
160
+ raw_content = call_google_gemini(request.filename)
161
+ else:
162
+ # Default to OpenAI
163
+ if not AI_SERVICE_TOKENS: raise HTTPException(500, "OpenAI tokens missing.")
164
+ raw_content = call_openai_gpt4o(request.filename, AI_SERVICE_TOKENS)
165
+
166
+ # Parse JSON output from either provider
167
+ if raw_content:
168
+ clean_content = raw_content.replace("```json", "").replace("```", "").strip()
169
+ return json.loads(clean_content)
170
+
171
+ return {"error": "No content returned", "provider": provider_used}
172
 
173
+ except Exception as e:
174
+ print(f"Analysis Error ({provider_used}): {e}")
175
+ raise HTTPException(status_code=500, detail=f"Analysis failed ({provider_used}): {str(e)}")