""" DealFlow AI — Configuration Loads env vars and constructs the LLM clients for CrewAI. AMD Fallback Status: LOCKED at T+63h (2026-05-06). Primary backend: HuggingFace Serverless Inference API (Qwen/Qwen2.5-72B-Instruct). AMD MI300X bonus track abandoned per PRE-FLIGHT SPEC #3. """ from __future__ import annotations import os from enum import Enum from pathlib import Path from typing import Optional from dotenv import load_dotenv from loguru import logger from pydantic import BaseModel, Field load_dotenv() class Backend(str, Enum): VLLM = "vllm" HF = "hf" OPENAI = "openai" # HF Serverless Inference — featherless-ai provider (supports Qwen2.5-72B) # Verified working endpoint for Qwen/Qwen2.5-72B-Instruct HF_ROUTER_BASE_URL = "https://router.huggingface.co/featherless-ai/v1" def hf_base_url_for(model: str) -> str: # noqa: ARG001 — kept for future provider routing return HF_ROUTER_BASE_URL # Primary model locked at T+63h (AMD credits never arrived) HF_PRIMARY_MODEL = "Qwen/Qwen2.5-72B-Instruct" HF_FALLBACK_MODEL = "Qwen/Qwen2.5-32B-Instruct" # if 72B latency is an issue class AppConfig(BaseModel): # Inference backend — default is now "hf" (AMD locked out at T+63h) llm_backend: Backend = Field( default_factory=lambda: Backend(os.getenv("LLM_BACKEND", "hf")) ) # HuggingFace Serverless (primary) hf_token: str = Field( default_factory=lambda: ( os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACE_TOKEN") or "" ) ) hf_model: str = Field( default_factory=lambda: os.getenv("HF_MODEL", HF_PRIMARY_MODEL) ) # Computed dynamically from hf_model if not explicitly overridden hf_base_url: Optional[str] = Field( default_factory=lambda: os.getenv("HF_BASE_URL") ) # vLLM (AMD MI300X — kept for future use, currently inactive) vllm_base_url: str = Field( default_factory=lambda: os.getenv("VLLM_BASE_URL", "http://localhost:8000/v1") ) vllm_api_key: str = Field( default_factory=lambda: os.environ.get("VLLM_API_KEY", "") ) vllm_model: str = Field( default_factory=lambda: os.getenv("VLLM_MODEL", "Qwen/Qwen3-VL-32B-Instruct-FP8") ) # Search serper_api_key: Optional[str] = Field( default_factory=lambda: os.environ.get("SERPER_API_KEY") ) # Memory mem0_api_key: Optional[str] = Field( default_factory=lambda: os.getenv("MEM0_API_KEY") ) mem0_base_url: Optional[str] = Field( default_factory=lambda: os.getenv("MEM0_BASE_URL") ) # App output_dir: Path = Field( default_factory=lambda: Path(os.getenv("OUTPUT_DIR", "./outputs")) ) max_crew_iterations: int = Field( default_factory=lambda: int(os.getenv("MAX_CREW_ITERATIONS", "3")) ) verbose_agents: bool = Field( default_factory=lambda: os.getenv("VERBOSE_AGENTS", "false").lower() == "true" ) model_config = {"arbitrary_types_allowed": True} def get_config() -> AppConfig: return AppConfig() def get_llm(config: Optional[AppConfig] = None): """Return a CrewAI-compatible LLM instance based on active backend.""" if config is None: config = get_config() from crewai import LLM if config.llm_backend == Backend.HF: if not config.hf_token: raise ValueError("HF_TOKEN (or HUGGINGFACE_TOKEN) must be set for HF backend") # HF Serverless uses model-specific OpenAI-compatible paths: # https://api-inference.huggingface.co/models/{model}/v1/chat/completions base_url = config.hf_base_url or hf_base_url_for(config.hf_model) logger.info( f"Using HF Serverless Inference: model={config.hf_model} base_url={base_url}" ) return LLM( model=f"openai/{config.hf_model}", base_url=base_url, api_key=config.hf_token, temperature=0.1, max_tokens=4096, ) elif config.llm_backend == Backend.VLLM: logger.info( f"Using vLLM backend: {config.vllm_base_url} model={config.vllm_model}" ) return LLM( model=f"openai/{config.vllm_model}", base_url=config.vllm_base_url, api_key=config.vllm_api_key, temperature=0.1, max_tokens=4096, ) elif config.llm_backend == Backend.OPENAI: api_key = os.getenv("OPENAI_API_KEY") if not api_key: raise ValueError("OPENAI_API_KEY not set for openai backend") return LLM( model="gpt-4o", api_key=api_key, temperature=0.1, max_tokens=4096, ) else: raise ValueError(f"Unknown LLM_BACKEND: {config.llm_backend}")