File size: 4,818 Bytes
8dcf472
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""
DealFlow AI β€” Configuration
Loads env vars and constructs the LLM clients for CrewAI.

AMD Fallback Status: LOCKED at T+63h (2026-05-06).
Primary backend: HuggingFace Serverless Inference API (Qwen/Qwen2.5-72B-Instruct).
AMD MI300X bonus track abandoned per PRE-FLIGHT SPEC #3.
"""
from __future__ import annotations

import os
from enum import Enum
from pathlib import Path
from typing import Optional

from dotenv import load_dotenv
from loguru import logger
from pydantic import BaseModel, Field

load_dotenv()


class Backend(str, Enum):
    VLLM = "vllm"
    HF = "hf"
    OPENAI = "openai"


# HF Serverless Inference β€” featherless-ai provider (supports Qwen2.5-72B)
# Verified working endpoint for Qwen/Qwen2.5-72B-Instruct
HF_ROUTER_BASE_URL = "https://router.huggingface.co/featherless-ai/v1"


def hf_base_url_for(model: str) -> str:  # noqa: ARG001 β€” kept for future provider routing
    return HF_ROUTER_BASE_URL

# Primary model locked at T+63h (AMD credits never arrived)
HF_PRIMARY_MODEL = "Qwen/Qwen2.5-72B-Instruct"
HF_FALLBACK_MODEL = "Qwen/Qwen2.5-32B-Instruct"   # if 72B latency is an issue


class AppConfig(BaseModel):
    # Inference backend β€” default is now "hf" (AMD locked out at T+63h)
    llm_backend: Backend = Field(
        default_factory=lambda: Backend(os.getenv("LLM_BACKEND", "hf"))
    )

    # HuggingFace Serverless (primary)
    hf_token: str = Field(
        default_factory=lambda: (
            os.environ.get("HF_TOKEN")
            or os.environ.get("HUGGINGFACE_TOKEN")
            or ""
        )
    )
    hf_model: str = Field(
        default_factory=lambda: os.getenv("HF_MODEL", HF_PRIMARY_MODEL)
    )
    # Computed dynamically from hf_model if not explicitly overridden
    hf_base_url: Optional[str] = Field(
        default_factory=lambda: os.getenv("HF_BASE_URL")
    )

    # vLLM (AMD MI300X β€” kept for future use, currently inactive)
    vllm_base_url: str = Field(
        default_factory=lambda: os.getenv("VLLM_BASE_URL", "http://localhost:8000/v1")
    )
    vllm_api_key: str = Field(
        default_factory=lambda: os.environ.get("VLLM_API_KEY", "")
    )
    vllm_model: str = Field(
        default_factory=lambda: os.getenv("VLLM_MODEL", "Qwen/Qwen3-VL-32B-Instruct-FP8")
    )

    # Search
    serper_api_key: Optional[str] = Field(
        default_factory=lambda: os.environ.get("SERPER_API_KEY")
    )

    # Memory
    mem0_api_key: Optional[str] = Field(
        default_factory=lambda: os.getenv("MEM0_API_KEY")
    )
    mem0_base_url: Optional[str] = Field(
        default_factory=lambda: os.getenv("MEM0_BASE_URL")
    )

    # App
    output_dir: Path = Field(
        default_factory=lambda: Path(os.getenv("OUTPUT_DIR", "./outputs"))
    )
    max_crew_iterations: int = Field(
        default_factory=lambda: int(os.getenv("MAX_CREW_ITERATIONS", "3"))
    )
    verbose_agents: bool = Field(
        default_factory=lambda: os.getenv("VERBOSE_AGENTS", "false").lower() == "true"
    )

    model_config = {"arbitrary_types_allowed": True}


def get_config() -> AppConfig:
    return AppConfig()


def get_llm(config: Optional[AppConfig] = None):
    """Return a CrewAI-compatible LLM instance based on active backend."""
    if config is None:
        config = get_config()

    from crewai import LLM

    if config.llm_backend == Backend.HF:
        if not config.hf_token:
            raise ValueError("HF_TOKEN (or HUGGINGFACE_TOKEN) must be set for HF backend")
        # HF Serverless uses model-specific OpenAI-compatible paths:
        # https://api-inference.huggingface.co/models/{model}/v1/chat/completions
        base_url = config.hf_base_url or hf_base_url_for(config.hf_model)
        logger.info(
            f"Using HF Serverless Inference: model={config.hf_model} base_url={base_url}"
        )
        return LLM(
            model=f"openai/{config.hf_model}",
            base_url=base_url,
            api_key=config.hf_token,
            temperature=0.1,
            max_tokens=4096,
        )

    elif config.llm_backend == Backend.VLLM:
        logger.info(
            f"Using vLLM backend: {config.vllm_base_url} model={config.vllm_model}"
        )
        return LLM(
            model=f"openai/{config.vllm_model}",
            base_url=config.vllm_base_url,
            api_key=config.vllm_api_key,
            temperature=0.1,
            max_tokens=4096,
        )

    elif config.llm_backend == Backend.OPENAI:
        api_key = os.getenv("OPENAI_API_KEY")
        if not api_key:
            raise ValueError("OPENAI_API_KEY not set for openai backend")
        return LLM(
            model="gpt-4o",
            api_key=api_key,
            temperature=0.1,
            max_tokens=4096,
        )

    else:
        raise ValueError(f"Unknown LLM_BACKEND: {config.llm_backend}")