import typing as t import os # from dotenv import load_dotenv from fastapi import FastAPI, HTTPException from pydantic import BaseModel import logging from src.utils import ( OpenAIClient, TogetherAIClient, GeminiClient, GroqClient, MistralClient, ) from src.models_enums import ModelProvider # load_dotenv() assert os.environ['TOGETHER_API_KEY'] is not None # Configure basic logging to see messages in stdout (and thus in HF Space logs) logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class RequestData(BaseModel): prompt: str max_tokens: int = 50 system_prompt: t.Optional[str] = None MODEL_PROVIDER2CLIENT = { ModelProvider.OPENAI.value: OpenAIClient, ModelProvider.GEMINI.value: GeminiClient, ModelProvider.TOGETHERAI.value: TogetherAIClient, ModelProvider.GROQ.value: GroqClient, ModelProvider.MISTRAL.value: MistralClient, } app = FastAPI() logger.info("FastAPI app initialized.") # The application now starts without initializing a specific LLM, # which makes it more flexible. @app.post("/generate/{model_provider}/{model_name:path}") async def generate_text( model_provider: str, model_name: str, request: RequestData ): """ Generates text using a specified LLM provider and model. Example: POST /generate/togetherai/meta-llama/Llama-3.3-70B-Instruct-Turbo-Free with body: {"prompt": "...", "max_tokens": 100} """ logger.info(f"Received POST request to /generate/{model_provider}/{model_name}.") # Check if the requested model provider exists if model_provider not in MODEL_PROVIDER2CLIENT: logger.error(f"Invalid model provider: {model_provider}") raise HTTPException( status_code=400, detail=f"Invalid model provider: {model_provider}. " f"Available providers: {[p.value for p in ModelProvider]}" ) try: # Get the correct client class and instantiate it dynamically llm_client_class = MODEL_PROVIDER2CLIENT[model_provider] llm_client = llm_client_class(model=model_name) # Call the client's async method output = await llm_client( prompt=request.prompt, system_prompt=request.system_prompt, max_tokens=request.max_tokens ) return output except Exception as e: logger.error( f"Error during text generation for {model_provider}/{model_name}: {str(e)}", exc_info=True ) raise HTTPException(status_code=500, detail=str(e)) @app.get("/health") async def health_check(): logger.info("Received GET request to /health.") return {"status": "ok"}