Spaces:
Running
Running
| """ | |
| DealFlow AI — Configuration | |
| Loads env vars and constructs the LLM clients for CrewAI. | |
| AMD Fallback Status: LOCKED at T+63h (2026-05-06). | |
| Primary backend: HuggingFace Serverless Inference API (Qwen/Qwen2.5-72B-Instruct). | |
| AMD MI300X bonus track abandoned per PRE-FLIGHT SPEC #3. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| from enum import Enum | |
| from pathlib import Path | |
| from typing import Optional | |
| from dotenv import load_dotenv | |
| from loguru import logger | |
| from pydantic import BaseModel, Field | |
| load_dotenv() | |
| class Backend(str, Enum): | |
| VLLM = "vllm" | |
| HF = "hf" | |
| OPENAI = "openai" | |
| # HF Serverless Inference — featherless-ai provider (supports Qwen2.5-72B) | |
| # Verified working endpoint for Qwen/Qwen2.5-72B-Instruct | |
| HF_ROUTER_BASE_URL = "https://router.huggingface.co/featherless-ai/v1" | |
| def hf_base_url_for(model: str) -> str: # noqa: ARG001 — kept for future provider routing | |
| return HF_ROUTER_BASE_URL | |
| # Primary model locked at T+63h (AMD credits never arrived) | |
| HF_PRIMARY_MODEL = "Qwen/Qwen2.5-72B-Instruct" | |
| HF_FALLBACK_MODEL = "Qwen/Qwen2.5-32B-Instruct" # if 72B latency is an issue | |
| class AppConfig(BaseModel): | |
| # Inference backend — default is now "hf" (AMD locked out at T+63h) | |
| llm_backend: Backend = Field( | |
| default_factory=lambda: Backend(os.getenv("LLM_BACKEND", "hf")) | |
| ) | |
| # HuggingFace Serverless (primary) | |
| hf_token: str = Field( | |
| default_factory=lambda: ( | |
| os.environ.get("HF_TOKEN") | |
| or os.environ.get("HUGGINGFACE_TOKEN") | |
| or "" | |
| ) | |
| ) | |
| hf_model: str = Field( | |
| default_factory=lambda: os.getenv("HF_MODEL", HF_PRIMARY_MODEL) | |
| ) | |
| # Computed dynamically from hf_model if not explicitly overridden | |
| hf_base_url: Optional[str] = Field( | |
| default_factory=lambda: os.getenv("HF_BASE_URL") | |
| ) | |
| # vLLM (AMD MI300X — kept for future use, currently inactive) | |
| vllm_base_url: str = Field( | |
| default_factory=lambda: os.getenv("VLLM_BASE_URL", "http://localhost:8000/v1") | |
| ) | |
| vllm_api_key: str = Field( | |
| default_factory=lambda: os.environ.get("VLLM_API_KEY", "") | |
| ) | |
| vllm_model: str = Field( | |
| default_factory=lambda: os.getenv("VLLM_MODEL", "Qwen/Qwen3-VL-32B-Instruct-FP8") | |
| ) | |
| # Search | |
| serper_api_key: Optional[str] = Field( | |
| default_factory=lambda: os.environ.get("SERPER_API_KEY") | |
| ) | |
| # Memory | |
| mem0_api_key: Optional[str] = Field( | |
| default_factory=lambda: os.getenv("MEM0_API_KEY") | |
| ) | |
| mem0_base_url: Optional[str] = Field( | |
| default_factory=lambda: os.getenv("MEM0_BASE_URL") | |
| ) | |
| # App | |
| output_dir: Path = Field( | |
| default_factory=lambda: Path(os.getenv("OUTPUT_DIR", "./outputs")) | |
| ) | |
| max_crew_iterations: int = Field( | |
| default_factory=lambda: int(os.getenv("MAX_CREW_ITERATIONS", "3")) | |
| ) | |
| verbose_agents: bool = Field( | |
| default_factory=lambda: os.getenv("VERBOSE_AGENTS", "false").lower() == "true" | |
| ) | |
| model_config = {"arbitrary_types_allowed": True} | |
| def get_config() -> AppConfig: | |
| return AppConfig() | |
| def get_llm(config: Optional[AppConfig] = None): | |
| """Return a CrewAI-compatible LLM instance based on active backend.""" | |
| if config is None: | |
| config = get_config() | |
| from crewai import LLM | |
| if config.llm_backend == Backend.HF: | |
| if not config.hf_token: | |
| raise ValueError("HF_TOKEN (or HUGGINGFACE_TOKEN) must be set for HF backend") | |
| # HF Serverless uses model-specific OpenAI-compatible paths: | |
| # https://api-inference.huggingface.co/models/{model}/v1/chat/completions | |
| base_url = config.hf_base_url or hf_base_url_for(config.hf_model) | |
| logger.info( | |
| f"Using HF Serverless Inference: model={config.hf_model} base_url={base_url}" | |
| ) | |
| return LLM( | |
| model=f"openai/{config.hf_model}", | |
| base_url=base_url, | |
| api_key=config.hf_token, | |
| temperature=0.1, | |
| max_tokens=4096, | |
| ) | |
| elif config.llm_backend == Backend.VLLM: | |
| logger.info( | |
| f"Using vLLM backend: {config.vllm_base_url} model={config.vllm_model}" | |
| ) | |
| return LLM( | |
| model=f"openai/{config.vllm_model}", | |
| base_url=config.vllm_base_url, | |
| api_key=config.vllm_api_key, | |
| temperature=0.1, | |
| max_tokens=4096, | |
| ) | |
| elif config.llm_backend == Backend.OPENAI: | |
| api_key = os.getenv("OPENAI_API_KEY") | |
| if not api_key: | |
| raise ValueError("OPENAI_API_KEY not set for openai backend") | |
| return LLM( | |
| model="gpt-4o", | |
| api_key=api_key, | |
| temperature=0.1, | |
| max_tokens=4096, | |
| ) | |
| else: | |
| raise ValueError(f"Unknown LLM_BACKEND: {config.llm_backend}") | |