Spaces:

bigbossmonster
/

ffastapi

Sleeping

App Files Files Community

bigbossmonster commited on 20 days ago

Commit

d678cf7

verified ·

1 Parent(s): 1aa3002

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -65

app.py CHANGED Viewed

@@ -1,12 +1,16 @@
 import os
 import requests
 import json
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 # --- CONFIGURATION ---
-# Load token from Hugging Face Secrets (Environment Variables)
-AI_SERVICE_TOKEN = os.environ.get("AI_SERVICE_TOKEN")
 API_URL = "https://models.inference.ai.azure.com/chat/completions"
 MODEL_NAME = "gpt-4o-mini"
@@ -19,67 +23,73 @@ app = FastAPI(
 class AnalyzeRequest(BaseModel):
     filename: str
 # --- ENDPOINTS ---
 @app.get("/")
 def home():
     """Health check endpoint."""
-    return {"status": "active", "platform": "Hugging Face Spaces"}
 @app.get("/check-limit")
 def check_limit():
     """
-    Checks the rate limit status of the configured AI Service Token.
     """
-    if not AI_SERVICE_TOKEN:
-        raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing.")
-    headers = {
-        "Authorization": f"Bearer {AI_SERVICE_TOKEN}",
-        "Content-Type": "application/json"
-    }
-    # Minimal payload to trigger headers without consuming many tokens
-    payload = {
-        "model": MODEL_NAME,
-        "messages": [{"role": "user", "content": "Ping."}],
-        "temperature": 0.1,
-        "max_tokens": 1
-    }
-    try:
-        response = requests.post(API_URL, headers=headers, json=payload, timeout=10)
-        # Extract standard rate limit headers
-        remaining = response.headers.get('x-ratelimit-remaining-requests') or response.headers.get('x-ratelimit-remaining') or 'N/A'
-        limit = response.headers.get('x-ratelimit-limit-requests') or response.headers.get('x-ratelimit-limit') or 'N/A'
-        reset = response.headers.get('x-ratelimit-reset-requests') or response.headers.get('x-ratelimit-reset') or 'N/A'
-        return {
-            "status_code": response.status_code,
-            "rate_limit_info": {
-                "remaining": remaining,
-                "limit": limit,
-                "reset_time": reset
-            },
-            "message": "Token is valid." if response.status_code == 200 else f"Request failed: {response.status_code}"
-        }
-    except Exception as e:
-        return {"error": str(e)}
 @app.post("/analyze")
 def analyze_filename(request: AnalyzeRequest):
     """
-    Main endpoint to analyze filenames.
     """
-    if not AI_SERVICE_TOKEN:
         raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing.")
-    headers = {
-        "Authorization": f"Bearer {AI_SERVICE_TOKEN}",
-        "Content-Type": "application/json"
-    }
     payload = {
         "model": MODEL_NAME,
         "messages": [
@@ -95,27 +105,60 @@ def analyze_filename(request: AnalyzeRequest):
         "temperature": 0.1
     }
-    try:
-        # 30-second timeout for analysis requests
-        response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
-        if response.status_code == 429:
-             raise HTTPException(status_code=429, detail="Rate limit exceeded (429)")
-        response.raise_for_status()
-        data = response.json()
-        content = data.get('choices', [{}])[0].get('message', {}).get('content')
-        if content:
-            # Clean up markdown if present to ensure valid JSON
-            clean_content = content.replace("```json", "").replace("```", "").strip()
-            try:
-                return json.loads(clean_content)
-            except json.JSONDecodeError:
-                return {"error": "AI returned malformed JSON", "raw_content": clean_content}
-        return {"error": "No content returned"}
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=str(e))

 import os
 import requests
 import json
+import random
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 # --- CONFIGURATION ---
+# Load tokens from Hugging Face Secrets (Environment Variables)
+# Supports a single token or a comma-separated list of tokens for rotation
+AI_SERVICE_TOKENS_RAW = os.environ.get("AI_SERVICE_TOKEN", "")
+AI_SERVICE_TOKENS = [t.strip() for t in AI_SERVICE_TOKENS_RAW.split(",") if t.strip()]
 API_URL = "https://models.inference.ai.azure.com/chat/completions"
 MODEL_NAME = "gpt-4o-mini"
 class AnalyzeRequest(BaseModel):
     filename: str
+# --- HELPERS ---
+def get_headers(token):
+    return {
+        "Authorization": f"Bearer {token}",
+        "Content-Type": "application/json"
+    }
 # --- ENDPOINTS ---
 @app.get("/")
 def home():
     """Health check endpoint."""
+    return {"status": "active", "platform": "Hugging Face Spaces", "tokens_loaded": len(AI_SERVICE_TOKENS)}
 @app.get("/check-limit")
 def check_limit():
     """
+    Checks the rate limit status of the configured AI Service Tokens.
     """
+    if not AI_SERVICE_TOKENS:
+        raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing or empty.")
+    results = []
+    # Check each token individually
+    for i, token in enumerate(AI_SERVICE_TOKENS):
+        headers = get_headers(token)
+        payload = {
+            "model": MODEL_NAME,
+            "messages": [{"role": "user", "content": "Ping."}],
+            "temperature": 0.1,
+            "max_tokens": 1
+        }
+        try:
+            response = requests.post(API_URL, headers=headers, json=payload, timeout=10)
+            # Extract standard rate limit headers
+            remaining = response.headers.get('x-ratelimit-remaining-requests') or response.headers.get('x-ratelimit-remaining') or 'N/A'
+            limit = response.headers.get('x-ratelimit-limit-requests') or response.headers.get('x-ratelimit-limit') or 'N/A'
+            reset = response.headers.get('x-ratelimit-reset-requests') or response.headers.get('x-ratelimit-reset') or 'N/A'
+            token_status = {
+                "token_index": i,
+                "status_code": response.status_code,
+                "rate_limit_info": {
+                    "remaining": remaining,
+                    "limit": limit,
+                    "reset_time": reset
+                },
+                "message": "Token is valid." if response.status_code == 200 else f"Request failed: {response.status_code}"
+            }
+            results.append(token_status)
+        except Exception as e:
+            results.append({"token_index": i, "error": str(e)})
+    return {"tokens_checked": len(results), "results": results}
 @app.post("/analyze")
 def analyze_filename(request: AnalyzeRequest):
     """
+    Main endpoint to analyze filenames with token rotation on 429.
     """
+    if not AI_SERVICE_TOKENS:
         raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing.")
     payload = {
         "model": MODEL_NAME,
         "messages": [
         "temperature": 0.1
     }
+    # Try each token until one works or all fail
+    # Shuffle simply to distribute load if we have multiple valid tokens,
+    # though deterministic iteration is also fine.
+    tokens_to_try = list(AI_SERVICE_TOKENS)
+    # random.shuffle(tokens_to_try) # Optional: Randomize order
+    last_error_detail = "Unknown error"
+    for token in tokens_to_try:
+        headers = get_headers(token)
+        try:
+            # 30-second timeout for analysis requests
+            response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
+            # If rate limited, log it and continue to the next token
+            if response.status_code == 429:
+                print(f"Token ending in ...{token[-4:]} hit rate limit (429). Trying next token.")
+                last_error_detail = "Rate limit exceeded (429)"
+                continue
+            # If 401/403 (Auth error), also try next token
+            if response.status_code in [401, 403]:
+                print(f"Token ending in ...{token[-4:]} failed auth ({response.status_code}). Trying next token.")
+                last_error_detail = f"Auth failed ({response.status_code})"
+                continue
+            response.raise_for_status()
+            data = response.json()
+            content = data.get('choices', [{}])[0].get('message', {}).get('content')
+            if content:
+                # Clean up markdown if present to ensure valid JSON
+                clean_content = content.replace("```json", "").replace("```", "").strip()
+                try:
+                    return json.loads(clean_content)
+                except json.JSONDecodeError:
+                    return {"error": "AI returned malformed JSON", "raw_content": clean_content}
+            return {"error": "No content returned"}
+        except requests.exceptions.RequestException as e:
+            # Network errors might be transient, could retry or fail.
+            # Here we treat it as a failure for this token and try next.
+            print(f"Network error with token ...{token[-4:]}: {e}")
+            last_error_detail = str(e)
+            continue
+        except Exception as e:
+            print(f"Unexpected error with token ...{token[-4:]}: {e}")
+            last_error_detail = str(e)
+            # Depending on severity, might want to break or continue.
+            # We'll continue to be safe.
+            continue
+    # If we exit the loop, all tokens failed
+    raise HTTPException(status_code=429, detail=f"All tokens failed. Last error: {last_error_detail}")