Spaces:

bigbossmonster
/

fastapi

Running

App Files Files Community

bigbossmonster commited on Dec 14, 2025

Commit

4004dc2

verified ·

1 Parent(s): 09af3ee

Upload 2 files

Browse files

Files changed (2) hide show

app.py +164 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import os
+import requests
+import json
+import random
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+# --- CONFIGURATION ---
+# Load tokens from Hugging Face Secrets (Environment Variables)
+# Supports a single token or a comma-separated list of tokens for rotation
+AI_SERVICE_TOKENS_RAW = os.environ.get("AI_SERVICE_TOKEN", "")
+AI_SERVICE_TOKENS = [t.strip() for t in AI_SERVICE_TOKENS_RAW.split(",") if t.strip()]
+API_URL = "https://models.inference.ai.azure.com/chat/completions"
+MODEL_NAME = "gpt-4o"
+app = FastAPI(
+    title="AI Backend Service",
+    description="Running on Hugging Face Spaces (Docker SDK)"
+)
+# --- MODELS ---
+class AnalyzeRequest(BaseModel):
+    filename: str
+# --- HELPERS ---
+def get_headers(token):
+    return {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json"
+    }
+# --- ENDPOINTS ---
+@app.get("/")
+def home():
+    """Health check endpoint."""
+    return {"status": "active", "platform": "Hugging Face Spaces", "tokens_loaded": len(AI_SERVICE_TOKENS)}
+@app.get("/check-limit")
+def check_limit():
+    """
+    Checks the rate limit status of the configured AI Service Tokens.
+    """
+    if not AI_SERVICE_TOKENS:
+        raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing or empty.")
+    results = []
+    # Check each token individually
+    for i, token in enumerate(AI_SERVICE_TOKENS):
+        headers = get_headers(token)
+        payload = {
+            "model": MODEL_NAME,
+            "messages": [{"role": "user", "content": "Ping."}],
+            "temperature": 0.1,
+            "max_tokens": 1
+        }
+        try:
+            response = requests.post(API_URL, headers=headers, json=payload, timeout=10)
+            # Extract standard rate limit headers
+            remaining = response.headers.get('x-ratelimit-remaining-requests') or response.headers.get('x-ratelimit-remaining') or 'N/A'
+            limit = response.headers.get('x-ratelimit-limit-requests') or response.headers.get('x-ratelimit-limit') or 'N/A'
+            reset = response.headers.get('x-ratelimit-reset-requests') or response.headers.get('x-ratelimit-reset') or 'N/A'
+            token_status = {
+                "token_index": i,
+                "status_code": response.status_code,
+                "rate_limit_info": {
+                    "remaining": remaining,
+                    "limit": limit,
+                    "reset_time": reset
+                },
+                "message": "Token is valid." if response.status_code == 200 else f"Request failed: {response.status_code}"
+            }
+            results.append(token_status)
+        except Exception as e:
+            results.append({"token_index": i, "error": str(e)})
+    return {"tokens_checked": len(results), "results": results}
+@app.post("/analyze")
+def analyze_filename(request: AnalyzeRequest):
+    """
+    Main endpoint to analyze filenames with token rotation on 429.
+    """
+    if not AI_SERVICE_TOKENS:
+        raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing.")
+    payload = {
+        "model": MODEL_NAME,
+        "messages": [
+            {
+                "role": "system",
+                "content": "You are an expert Movie and TV metadata analyst. Return ONLY raw JSON in the format: {\"title\": \"...\", \"year\": \"...\", \"isSeries\": false/true}. Analyze the following filename and extract the data."
+            },
+            {
+                "role": "user",
+                "content": f"Analyze: \"{request.filename}\""
+            }
+        ],
+        "temperature": 0.1
+    }
+    # Try each token until one works or all fail
+    # Shuffle simply to distribute load if we have multiple valid tokens,
+    # though deterministic iteration is also fine.
+    tokens_to_try = list(AI_SERVICE_TOKENS)
+    # random.shuffle(tokens_to_try) # Optional: Randomize order
+    last_error_detail = "Unknown error"
+    for token in tokens_to_try:
+        headers = get_headers(token)
+        try:
+            # 30-second timeout for analysis requests
+            response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
+            # If rate limited, log it and continue to the next token
+            if response.status_code == 429:
+                print(f"Token ending in ...{token[-4:]} hit rate limit (429). Trying next token.")
+                last_error_detail = "Rate limit exceeded (429)"
+                continue
+            # If 401/403 (Auth error), also try next token
+            if response.status_code in [401, 403]:
+                print(f"Token ending in ...{token[-4:]} failed auth ({response.status_code}). Trying next token.")
+                last_error_detail = f"Auth failed ({response.status_code})"
+                continue
+            response.raise_for_status()
+            data = response.json()
+            content = data.get('choices', [{}])[0].get('message', {}).get('content')
+            if content:
+                # Clean up markdown if present to ensure valid JSON
+                clean_content = content.replace("```json", "").replace("```", "").strip()
+                try:
+                    return json.loads(clean_content)
+                except json.JSONDecodeError:
+                    return {"error": "AI returned malformed JSON", "raw_content": clean_content}
+            return {"error": "No content returned"}
+        except requests.exceptions.RequestException as e:
+            # Network errors might be transient, could retry or fail.
+            # Here we treat it as a failure for this token and try next.
+            print(f"Network error with token ...{token[-4:]}: {e}")
+            last_error_detail = str(e)
+            continue
+        except Exception as e:
+            print(f"Unexpected error with token ...{token[-4:]}: {e}")
+            last_error_detail = str(e)
+            # Depending on severity, might want to break or continue.
+            # We'll continue to be safe.
+            continue
+    # If we exit the loop, all tokens failed
+    raise HTTPException(status_code=429, detail=f"All tokens failed. Last error: {last_error_detail}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+fastapi
+uvicorn
+requests
+pydantic