Spaces:

bigbossmonster
/

ffastapi

Sleeping

App Files Files Community

bigbossmonster commited on 18 days ago

Commit

e23b777

verified ·

1 Parent(s): 89f7ea1

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -96

app.py CHANGED Viewed

@@ -1,18 +1,22 @@
 import os
 import requests
 import json
-import random
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 # --- CONFIGURATION ---
-# Load tokens from Hugging Face Secrets (Environment Variables)
-# Supports a single token or a comma-separated list of tokens for rotation
 AI_SERVICE_TOKENS_RAW = os.environ.get("AI_SERVICE_TOKEN", "")
 AI_SERVICE_TOKENS = [t.strip() for t in AI_SERVICE_TOKENS_RAW.split(",") if t.strip()]
-API_URL = "https://models.inference.ai.azure.com/chat/completions"
-MODEL_NAME = "gpt-4o"
 app = FastAPI(
     title="AI Backend Service",
@@ -22,6 +26,7 @@ app = FastAPI(
 # --- MODELS ---
 class AnalyzeRequest(BaseModel):
     filename: str
 # --- HELPERS ---
 def get_headers(token):
@@ -35,130 +40,136 @@ def get_headers(token):
 @app.get("/")
 def home():
     """Health check endpoint."""
-    return {"status": "active", "platform": "Hugging Face Spaces", "tokens_loaded": len(AI_SERVICE_TOKENS)}
 @app.get("/check-limit")
 def check_limit():
     """
-    Checks the rate limit status of the configured AI Service Tokens.
     """
     if not AI_SERVICE_TOKENS:
-        raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing or empty.")
     results = []
-    # Check each token individually
     for i, token in enumerate(AI_SERVICE_TOKENS):
         headers = get_headers(token)
         payload = {
-            "model": MODEL_NAME,
             "messages": [{"role": "user", "content": "Ping."}],
             "temperature": 0.1,
-            "max_tokens": 1
         }
         try:
-            response = requests.post(API_URL, headers=headers, json=payload, timeout=10)
-            # Extract standard rate limit headers
-            remaining = response.headers.get('x-ratelimit-remaining-requests') or response.headers.get('x-ratelimit-remaining') or 'N/A'
-            limit = response.headers.get('x-ratelimit-limit-requests') or response.headers.get('x-ratelimit-limit') or 'N/A'
-            reset = response.headers.get('x-ratelimit-reset-requests') or response.headers.get('x-ratelimit-reset') or 'N/A'
             token_status = {
                 "token_index": i,
                 "status_code": response.status_code,
-                "rate_limit_info": {
-                    "remaining": remaining,
-                    "limit": limit,
-                    "reset_time": reset
-                },
-                "message": "Token is valid." if response.status_code == 200 else f"Request failed: {response.status_code}"
             }
             results.append(token_status)
         except Exception as e:
-            results.append({"token_index": i, "error": str(e)})
     return {"tokens_checked": len(results), "results": results}
-@app.post("/analyze")
-def analyze_filename(request: AnalyzeRequest):
-    """
-    Main endpoint to analyze filenames with token rotation on 429.
-    """
-    if not AI_SERVICE_TOKENS:
-        raise HTTPException(status_code=500, detail="AI_SERVICE_TOKEN secret is missing.")
     payload = {
-        "model": MODEL_NAME,
         "messages": [
-            {
-                "role": "system",
-                "content": "You are an expert Movie and TV metadata analyst. Return ONLY raw JSON in the format: {\"title\": \"...\", \"year\": \"...\", \"isSeries\": false/true}. Analyze the following filename and extract the data."
-            },
-            {
-                "role": "user",
-                "content": f"Analyze: \"{request.filename}\""
-            }
         ],
-        "temperature": 0.1
     }
-    # Try each token until one works or all fail
-    # Shuffle simply to distribute load if we have multiple valid tokens,
-    # though deterministic iteration is also fine.
-    tokens_to_try = list(AI_SERVICE_TOKENS)
-    # random.shuffle(tokens_to_try) # Optional: Randomize order
-    last_error_detail = "Unknown error"
-    for token in tokens_to_try:
-        headers = get_headers(token)
         try:
-            # 30-second timeout for analysis requests
-            response = requests.post(API_URL, headers=headers, json=payload, timeout=30)
-            # If rate limited, log it and continue to the next token
-            if response.status_code == 429:
-                print(f"Token ending in ...{token[-4:]} hit rate limit (429). Trying next token.")
-                last_error_detail = "Rate limit exceeded (429)"
                 continue
-            # If 401/403 (Auth error), also try next token
-            if response.status_code in [401, 403]:
-                print(f"Token ending in ...{token[-4:]} failed auth ({response.status_code}). Trying next token.")
-                last_error_detail = f"Auth failed ({response.status_code})"
-                continue
-            response.raise_for_status()
-            data = response.json()
-            content = data.get('choices', [{}])[0].get('message', {}).get('content')
-            if content:
-                # Clean up markdown if present to ensure valid JSON
-                clean_content = content.replace("```json", "").replace("```", "").strip()
-                try:
-                    return json.loads(clean_content)
-                except json.JSONDecodeError:
-                    return {"error": "AI returned malformed JSON", "raw_content": clean_content}
-            return {"error": "No content returned"}
-        except requests.exceptions.RequestException as e:
-            # Network errors might be transient, could retry or fail.
-            # Here we treat it as a failure for this token and try next.
-            print(f"Network error with token ...{token[-4:]}: {e}")
-            last_error_detail = str(e)
-            continue
         except Exception as e:
-            print(f"Unexpected error with token ...{token[-4:]}: {e}")
-            last_error_detail = str(e)
-            # Depending on severity, might want to break or continue.
-            # We'll continue to be safe.
             continue
-    # If we exit the loop, all tokens failed
-    raise HTTPException(status_code=429, detail=f"All tokens failed. Last error: {last_error_detail}")

 import os
 import requests
 import json
+import time
 from fastapi import FastAPI, HTTPException
 from pydantic import BaseModel
 # --- CONFIGURATION ---
+# 1. OpenAI/Azure Configuration
 AI_SERVICE_TOKENS_RAW = os.environ.get("AI_SERVICE_TOKEN", "")
 AI_SERVICE_TOKENS = [t.strip() for t in AI_SERVICE_TOKENS_RAW.split(",") if t.strip()]
+OPENAI_API_URL = "https://models.inference.ai.azure.com/chat/completions"
+OPENAI_MODEL_NAME = "gpt-4o-mini"
+# 2. Google Gemini Configuration (Direct Google API)
+# You need to set GOOGLE_API_KEY in your HF Space secrets
+GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
+# Using the Gemini 1.5 Flash model for speed/cost effectiveness
+GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemma-3-27b-it:generateContent?key={GOOGLE_API_KEY}"
 app = FastAPI(
     title="AI Backend Service",
 # --- MODELS ---
 class AnalyzeRequest(BaseModel):
     filename: str
+    model_provider: str = "openai" # 'openai' or 'gemma' (maps to Gemini)
 # --- HELPERS ---
 def get_headers(token):
 @app.get("/")
 def home():
     """Health check endpoint."""
+    return {
+        "status": "active",
+        "platform": "Hugging Face Spaces",
+        "tokens_loaded": len(AI_SERVICE_TOKENS),
+        "google_api_enabled": bool(GOOGLE_API_KEY)
+    }
 @app.get("/check-limit")
 def check_limit():
     """
+    Checks the rate limit status of OpenAI tokens.
+    (Google API doesn't provide easy rate limit headers in the same way, skipped for now).
     """
     if not AI_SERVICE_TOKENS:
+        # Just return empty if no OpenAI tokens, but don't crash if Google is used
+        return {"tokens_checked": 0, "results": [], "note": "OpenAI tokens missing"}
     results = []
     for i, token in enumerate(AI_SERVICE_TOKENS):
         headers = get_headers(token)
         payload = {
+            "model": OPENAI_MODEL_NAME,
             "messages": [{"role": "user", "content": "Ping."}],
             "temperature": 0.1,
+            "max_tokens": 1
         }
         try:
+            response = requests.post(OPENAI_API_URL, headers=headers, json=payload, timeout=10)
             token_status = {
                 "token_index": i,
                 "status_code": response.status_code,
+                "valid": response.status_code == 200,
+                "remaining": response.headers.get('x-ratelimit-remaining-requests', 'N/A')
             }
             results.append(token_status)
         except Exception as e:
+            results.append({"token_index": i, "status_code": "ERROR", "error": str(e)})
     return {"tokens_checked": len(results), "results": results}
+def call_openai_gpt4o(filename, tokens):
     payload = {
+        "model": OPENAI_MODEL_NAME,
         "messages": [
+            {"role": "system", "content": "You are an expert Movie and TV metadata analyst. Return ONLY raw JSON in the format: {\"title\": \"...\", \"year\": \"...\", \"isSeries\": false/true}. Analyze the following filename and extract the data."},
+            {"role": "user", "content": f"Analyze: \"{filename}\""}
         ],
+        "temperature": 0.1,
+        "max_tokens": 500
     }
+    last_error = ""
+    for i, token in enumerate(tokens):
         try:
+            response = requests.post(OPENAI_API_URL, headers=get_headers(token), json=payload, timeout=30)
+            if response.status_code == 200:
+                content = response.json().get('choices', [{}])[0].get('message', {}).get('content')
+                return content
+            elif response.status_code in [429, 401, 403]:
+                last_error = f"Token {i}: {response.status_code}"
                 continue
+            else:
+                last_error = f"Token {i} Error: {response.text}"
         except Exception as e:
+            last_error = str(e)
             continue
+    raise Exception(f"OpenAI All tokens failed. Last: {last_error}")
+def call_google_gemini(filename):
+    if not GOOGLE_API_KEY:
+        raise Exception("GOOGLE_API_KEY not configured.")
+    # Construct the Gemini payload
+    prompt = f"""
+    You are an expert Movie and TV metadata analyst.
+    Analyze the filename: "{filename}"
+    Identify the title, year, and whether it is a series.
+    Return ONLY a raw JSON object with this exact format:
+    {{"title": "Movie Title", "year": "2024", "isSeries": false}}
+    """
+    payload = {
+        "contents": [{
+            "parts": [{"text": prompt}]
+        }],
+        "generationConfig": {
+            "temperature": 0.1,
+            "maxOutputTokens": 100,
+            "responseMimeType": "application/json" # Gemini supports JSON mode natively
+        }
+    }
+    response = requests.post(GEMINI_API_URL, headers={"Content-Type": "application/json"}, json=payload, timeout=30)
+    if response.status_code != 200:
+        raise Exception(f"Google Gemini API Error {response.status_code}: {response.text}")
+    result = response.json()
+    # Extract text from Gemini response structure
+    try:
+        return result['candidates'][0]['content']['parts'][0]['text']
+    except (KeyError, IndexError):
+        raise Exception(f"Unexpected response structure from Gemini: {str(result)}")
+@app.post("/analyze")
+def analyze_filename(request: AnalyzeRequest):
+    """
+    Analyze filename using selected provider (openai or gemma/gemini).
+    """
+    raw_content = ""
+    provider_used = request.model_provider
+    try:
+        if provider_used == "gemma":
+            # Although the frontend sends "gemma", we map this to our Google Gemini function
+            raw_content = call_google_gemini(request.filename)
+        else:
+            # Default to OpenAI
+            if not AI_SERVICE_TOKENS: raise HTTPException(500, "OpenAI tokens missing.")
+            raw_content = call_openai_gpt4o(request.filename, AI_SERVICE_TOKENS)
+        # Parse JSON output from either provider
+        if raw_content:
+            clean_content = raw_content.replace("```json", "").replace("```", "").strip()
+            return json.loads(clean_content)
+        return {"error": "No content returned", "provider": provider_used}
+    except Exception as e:
+        print(f"Analysis Error ({provider_used}): {e}")
+        raise HTTPException(status_code=500, detail=f"Analysis failed ({provider_used}): {str(e)}")