ffastapi / app.py
bigbossmonster's picture
Update app.py
9e8b7fe verified
import os
import requests
import json
import time
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
# --- CONFIGURATION ---
# 1. OpenAI/Azure Configuration
AI_SERVICE_TOKENS_RAW = os.environ.get("AI_SERVICE_TOKEN", "")
AI_SERVICE_TOKENS = [t.strip() for t in AI_SERVICE_TOKENS_RAW.split(",") if t.strip()]
OPENAI_API_URL = "https://models.inference.ai.azure.com/chat/completions"
OPENAI_MODEL_NAME = "gpt-4o-mini"
# 2. Google Gemini Configuration (Direct Google API)
GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
# CORRECTED: Use gemma-2-27b-it (Gemma 2).
GEMINI_API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemma-3-27b-it:generateContent?key={GOOGLE_API_KEY}"
app = FastAPI(
title="AI Backend Service",
description="Running on Hugging Face Spaces (Docker SDK)"
)
# --- MODELS ---
class AnalyzeRequest(BaseModel):
filename: str
model_provider: str = "openai" # 'openai' or 'gemma'
# --- HELPERS ---
def get_headers(token):
return {
"Authorization": f"Bearer {token}",
"Content-Type": "application/json"
}
# --- ENDPOINTS ---
@app.get("/")
def home():
"""Health check endpoint."""
return {
"status": "active",
"platform": "Hugging Face Spaces",
"tokens_loaded": len(AI_SERVICE_TOKENS),
"google_api_enabled": bool(GOOGLE_API_KEY)
}
@app.get("/check-limit")
def check_limit():
"""Checks the rate limit status of OpenAI tokens."""
if not AI_SERVICE_TOKENS:
return {"tokens_checked": 0, "results": [], "note": "OpenAI tokens missing"}
results = []
for i, token in enumerate(AI_SERVICE_TOKENS):
headers = get_headers(token)
payload = {
"model": OPENAI_MODEL_NAME,
"messages": [{"role": "user", "content": "Ping."}],
"temperature": 0.1,
"max_tokens": 1
}
try:
response = requests.post(OPENAI_API_URL, headers=headers, json=payload, timeout=10)
token_status = {
"token_index": i,
"status_code": response.status_code,
"valid": response.status_code == 200,
"remaining": response.headers.get('x-ratelimit-remaining-requests', 'N/A')
}
results.append(token_status)
except Exception as e:
results.append({"token_index": i, "status_code": "ERROR", "error": str(e)})
return {"tokens_checked": len(results), "results": results}
def call_openai_gpt4o(filename, tokens):
payload = {
"model": OPENAI_MODEL_NAME,
"messages": [
{"role": "system", "content": "You are an expert Movie and TV metadata analyst. Return ONLY raw JSON in the format: {\"title\": \"...\", \"year\": \"...\", \"isSeries\": false/true}. Analyze the following filename and extract the data."},
{"role": "user", "content": f"Analyze: \"{filename}\""}
],
"temperature": 0.1,
"max_tokens": 500
}
last_error = ""
for i, token in enumerate(tokens):
try:
response = requests.post(OPENAI_API_URL, headers=get_headers(token), json=payload, timeout=30)
if response.status_code == 200:
content = response.json().get('choices', [{}])[0].get('message', {}).get('content')
return content
elif response.status_code in [429, 401, 403]:
last_error = f"Token {i}: {response.status_code}"
continue
else:
last_error = f"Token {i} Error: {response.text}"
except Exception as e:
last_error = str(e)
continue
raise Exception(f"OpenAI All tokens failed. Last: {last_error}")
def call_google_gemini(filename):
if not GOOGLE_API_KEY:
raise Exception("GOOGLE_API_KEY not configured.")
# Updated Prompt: Since we can't use JSON mode, we make the prompt stricter.
prompt = f"""
You are an expert Movie and TV metadata analyst.
Analyze the filename: "{filename}"
Identify the title, year, and whether it is a series.
Return ONLY a raw JSON object with this exact format (no markdown, no backticks):
{{"title": "Movie Title", "year": "2024", "isSeries": false}}
"""
payload = {
"contents": [{
"parts": [{"text": prompt}]
}],
"generationConfig": {
"temperature": 0.1,
"maxOutputTokens": 100
# REMOVED: "responseMimeType": "application/json" (Not supported by Gemma)
}
}
response = requests.post(GEMINI_API_URL, headers={"Content-Type": "application/json"}, json=payload, timeout=30)
if response.status_code != 200:
raise Exception(f"Google Gemini API Error {response.status_code}: {response.text}")
result = response.json()
try:
return result['candidates'][0]['content']['parts'][0]['text']
except (KeyError, IndexError):
raise Exception(f"Unexpected response structure from Gemini: {str(result)}")
@app.post("/analyze")
def analyze_filename(request: AnalyzeRequest):
"""
Analyze filename using selected provider (openai or gemma/gemini).
"""
raw_content = ""
provider_used = request.model_provider
try:
if provider_used == "gemma":
raw_content = call_google_gemini(request.filename)
else:
if not AI_SERVICE_TOKENS: raise HTTPException(500, "OpenAI tokens missing.")
raw_content = call_openai_gpt4o(request.filename, AI_SERVICE_TOKENS)
if raw_content:
# Clean up markdown code blocks if the model includes them
clean_content = raw_content.replace("```json", "").replace("```", "").strip()
return json.loads(clean_content)
return {"error": "No content returned", "provider": provider_used}
except Exception as e:
print(f"Analysis Error ({provider_used}): {e}")
raise HTTPException(status_code=500, detail=f"Analysis failed ({provider_used}): {str(e)}")