dealflow-ai / src /config.py
PeterBot22's picture
feat: DealFlow AI MVP — 3-agent CrewAI due diligence system on HF Spaces
8dcf472 verified
"""
DealFlow AI — Configuration
Loads env vars and constructs the LLM clients for CrewAI.
AMD Fallback Status: LOCKED at T+63h (2026-05-06).
Primary backend: HuggingFace Serverless Inference API (Qwen/Qwen2.5-72B-Instruct).
AMD MI300X bonus track abandoned per PRE-FLIGHT SPEC #3.
"""
from __future__ import annotations
import os
from enum import Enum
from pathlib import Path
from typing import Optional
from dotenv import load_dotenv
from loguru import logger
from pydantic import BaseModel, Field
load_dotenv()
class Backend(str, Enum):
VLLM = "vllm"
HF = "hf"
OPENAI = "openai"
# HF Serverless Inference — featherless-ai provider (supports Qwen2.5-72B)
# Verified working endpoint for Qwen/Qwen2.5-72B-Instruct
HF_ROUTER_BASE_URL = "https://router.huggingface.co/featherless-ai/v1"
def hf_base_url_for(model: str) -> str: # noqa: ARG001 — kept for future provider routing
return HF_ROUTER_BASE_URL
# Primary model locked at T+63h (AMD credits never arrived)
HF_PRIMARY_MODEL = "Qwen/Qwen2.5-72B-Instruct"
HF_FALLBACK_MODEL = "Qwen/Qwen2.5-32B-Instruct" # if 72B latency is an issue
class AppConfig(BaseModel):
# Inference backend — default is now "hf" (AMD locked out at T+63h)
llm_backend: Backend = Field(
default_factory=lambda: Backend(os.getenv("LLM_BACKEND", "hf"))
)
# HuggingFace Serverless (primary)
hf_token: str = Field(
default_factory=lambda: (
os.environ.get("HF_TOKEN")
or os.environ.get("HUGGINGFACE_TOKEN")
or ""
)
)
hf_model: str = Field(
default_factory=lambda: os.getenv("HF_MODEL", HF_PRIMARY_MODEL)
)
# Computed dynamically from hf_model if not explicitly overridden
hf_base_url: Optional[str] = Field(
default_factory=lambda: os.getenv("HF_BASE_URL")
)
# vLLM (AMD MI300X — kept for future use, currently inactive)
vllm_base_url: str = Field(
default_factory=lambda: os.getenv("VLLM_BASE_URL", "http://localhost:8000/v1")
)
vllm_api_key: str = Field(
default_factory=lambda: os.environ.get("VLLM_API_KEY", "")
)
vllm_model: str = Field(
default_factory=lambda: os.getenv("VLLM_MODEL", "Qwen/Qwen3-VL-32B-Instruct-FP8")
)
# Search
serper_api_key: Optional[str] = Field(
default_factory=lambda: os.environ.get("SERPER_API_KEY")
)
# Memory
mem0_api_key: Optional[str] = Field(
default_factory=lambda: os.getenv("MEM0_API_KEY")
)
mem0_base_url: Optional[str] = Field(
default_factory=lambda: os.getenv("MEM0_BASE_URL")
)
# App
output_dir: Path = Field(
default_factory=lambda: Path(os.getenv("OUTPUT_DIR", "./outputs"))
)
max_crew_iterations: int = Field(
default_factory=lambda: int(os.getenv("MAX_CREW_ITERATIONS", "3"))
)
verbose_agents: bool = Field(
default_factory=lambda: os.getenv("VERBOSE_AGENTS", "false").lower() == "true"
)
model_config = {"arbitrary_types_allowed": True}
def get_config() -> AppConfig:
return AppConfig()
def get_llm(config: Optional[AppConfig] = None):
"""Return a CrewAI-compatible LLM instance based on active backend."""
if config is None:
config = get_config()
from crewai import LLM
if config.llm_backend == Backend.HF:
if not config.hf_token:
raise ValueError("HF_TOKEN (or HUGGINGFACE_TOKEN) must be set for HF backend")
# HF Serverless uses model-specific OpenAI-compatible paths:
# https://api-inference.huggingface.co/models/{model}/v1/chat/completions
base_url = config.hf_base_url or hf_base_url_for(config.hf_model)
logger.info(
f"Using HF Serverless Inference: model={config.hf_model} base_url={base_url}"
)
return LLM(
model=f"openai/{config.hf_model}",
base_url=base_url,
api_key=config.hf_token,
temperature=0.1,
max_tokens=4096,
)
elif config.llm_backend == Backend.VLLM:
logger.info(
f"Using vLLM backend: {config.vllm_base_url} model={config.vllm_model}"
)
return LLM(
model=f"openai/{config.vllm_model}",
base_url=config.vllm_base_url,
api_key=config.vllm_api_key,
temperature=0.1,
max_tokens=4096,
)
elif config.llm_backend == Backend.OPENAI:
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
raise ValueError("OPENAI_API_KEY not set for openai backend")
return LLM(
model="gpt-4o",
api_key=api_key,
temperature=0.1,
max_tokens=4096,
)
else:
raise ValueError(f"Unknown LLM_BACKEND: {config.llm_backend}")