File size: 1,128 Bytes
34b6cef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import os
from dotenv import load_dotenv

from langchain_groq import ChatGroq
from langchain_openai import ChatOpenAI

load_dotenv()


def get_llm():
    """
    Dynamic LLM Provider

    DEV MODE:
        Groq API

    PROD MODE:
        AMD MI300X + vLLM + Qwen2.5-7B-Instruct
    """

    provider = os.getenv(
        "LLM_PROVIDER",
        "groq"
    ).lower()

    # =========================
    # AMD GPU Inference (PROD)
    # =========================
    if provider == "amd":
        return ChatOpenAI(
            api_key="dummy",
            base_url=os.getenv(
                "AMD_BASE_URL",
                "http://129.212.182.205:8000/v1"
            ),
            model=os.getenv(
                "AMD_MODEL",
                "Qwen/Qwen2.5-7B-Instruct"
            ),
            temperature=0.1,
            max_tokens=1200,
        )

    # =========================
    # GROQ (DEV)
    # =========================
    return ChatGroq(
        groq_api_key=os.getenv(
            "GROQ_API_KEY"
        ),
        model_name="llama-3.1-8b-instant",
        temperature=0.1,
        max_tokens=1200,
    )