File size: 997 Bytes
59a7be2
 
 
 
 
8a919a4
 
59a7be2
 
 
 
8a919a4
59a7be2
 
8a919a4
 
59a7be2
 
 
8a919a4
 
 
 
 
 
 
 
 
59a7be2
8a919a4
59a7be2
8a919a4
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# # llm_engine.py

import google.generativeai as genai
from app.core.config import GEMINI_API_KEY
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_nvidia_ai_endpoints import ChatNVIDIA
import os

# ✅ Configure Gemini client
genai.configure(api_key=GEMINI_API_KEY)

# llm = ChatGoogleGenerativeAI(
#     model="gemini-2.5-flash",
#     google_api_key=GEMINI_API_KEY,
#     temperature=0.2,
#     max_output_tokens=800,
# )


llm = ChatNVIDIA(
    model="meta/llama-3.1-70b-instruct",  # or nvidia/nemotron-4-340b-instruct
    api_key=os.getenv("NVIDIA_API_KEY"),
    temperature=0.7,
    max_tokens=1024
)

eval_llm = ChatNVIDIA(
    model="meta/llama-3.1-8b-instruct",  # Faster for evaluation
    temperature=0.0,
    max_tokens=200
)


# Separate LLM for evaluator — needs near-deterministic JSON output

# eval_llm = ChatGoogleGenerativeAI(
#     model="gemini-2.0-flash",
#     google_api_key=GEMINI_API_KEY,
#     temperature=0.0,
#     max_output_tokens=200,
# )