Spaces:
Sleeping
Sleeping
| import os | |
| import time | |
| import random | |
| import asyncio | |
| import json | |
| from fastapi import FastAPI, HTTPException, Depends | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.security.api_key import APIKeyHeader | |
| from pydantic import BaseModel | |
| from typing import List, Optional | |
| from dotenv import load_dotenv | |
| from starlette.responses import StreamingResponse | |
| from openai import OpenAI | |
| from typing import List, Optional, Dict, Any | |
| load_dotenv() | |
| BASE_URL = "https://generativelanguage.googleapis.com/v1beta/openai/" | |
| EXPECTED_API_KEY = os.getenv("API_HUGGINGFACE") | |
| API_KEY_NAME = "Authorization" | |
| API_KEYS = [ | |
| os.getenv("API_GEMINI_1"), | |
| os.getenv("API_GEMINI_2"), | |
| os.getenv("API_GEMINI_3"), | |
| os.getenv("API_GEMINI_4"), | |
| os.getenv("API_GEMINI_5") | |
| ] | |
| # Classi Pydantic di VALIDAZIONE Body | |
| class Message(BaseModel): | |
| role: Any | |
| content: Any | |
| class ChatCompletionRequest(BaseModel): | |
| model: str = "gemini-2.0-flash" | |
| messages: List[Message] | |
| max_tokens: Optional[int] = 8196 | |
| temperature: Optional[float] = 0.8 | |
| stream: Optional[bool] = False | |
| stream_options: Optional[Dict[str, Any]] = None | |
| class Config: | |
| extra = "allow" | |
| # Server FAST API | |
| app = FastAPI(title="OpenAI-SDK-compatible API", version="1.0.0", description="Un wrapper FastAPI compatibile con le specifiche dell'API OpenAI.") | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Validazione API | |
| api_key_header = APIKeyHeader(name=API_KEY_NAME, auto_error=False) | |
| def verify_api_key(api_key: str = Depends(api_key_header)): | |
| ''' Validazione Chiave API - Per ora in ENV, Token HF ''' | |
| if not api_key: | |
| raise HTTPException(status_code=403, detail="API key mancante") | |
| if api_key != f"Bearer {EXPECTED_API_KEY}": | |
| raise HTTPException(status_code=403, detail="API key non valida") | |
| return api_key | |
| # Client OpenAI | |
| def get_openai_client(): | |
| ''' Client OpenAI passando in modo RANDOM le Chiavi API. In questo modo posso aggirare i limiti "Quota Exceeded" ''' | |
| api_key = random.choice(API_KEYS) | |
| return OpenAI(api_key=api_key, base_url=BASE_URL) | |
| # Chiama API (senza Streaming) | |
| def call_api_sync(params: ChatCompletionRequest): | |
| ''' Chiamata API senza streaming. Se da errore 429 lo rifa''' | |
| try: | |
| client = get_openai_client() | |
| response_format = getattr(params, 'response_format', None) | |
| if response_format and getattr(response_format, 'type', None) == 'json_schema': | |
| response = client.beta.chat.completions.parse(**params.model_dump()) | |
| else: | |
| response = client.chat.completions.create(**params.model_dump()) | |
| return response | |
| except Exception as e: | |
| if "429" in str(e): | |
| time.sleep(2) | |
| return call_api_sync(params) | |
| else: | |
| raise e | |
| # Chiama API (con Streaming) | |
| async def _resp_async_generator(params: ChatCompletionRequest): | |
| ''' Chiamata API con streaming. Se da errore 429 lo rifa''' | |
| client = get_openai_client() | |
| try: | |
| response = client.chat.completions.create(**params.model_dump()) | |
| for chunk in response: | |
| chunk_data = chunk.to_dict() if hasattr(chunk, "to_dict") else chunk | |
| yield f"data: {json.dumps(chunk_data)}\n\n" | |
| await asyncio.sleep(0.01) | |
| yield "data: [DONE]\n\n" | |
| except Exception as e: | |
| if "429" in str(e): | |
| await asyncio.sleep(2) | |
| async for item in _resp_async_generator(params): | |
| yield item | |
| else: | |
| error_data = {"error": str(e)} | |
| yield f"data: {json.dumps(error_data)}\n\n" | |
| # ---------------------------------- Metodi API --------------------------------------- | |
| def read_general(): | |
| return {"response": "Benvenuto"} | |
| async def health_check(): | |
| return {"message": "success"} | |
| async def chat_completions(req: ChatCompletionRequest): | |
| print(req) | |
| try: | |
| if not req.messages: | |
| raise HTTPException(status_code=400, detail="Nessun messaggio fornito") | |
| if not req.stream: | |
| return call_api_sync(req) | |
| else: | |
| return StreamingResponse(_resp_async_generator(req), media_type="application/x-ndjson") | |
| except Exception as e: | |
| raise HTTPException(status_code=500, detail=str(e)) |