Spaces:
Running
Running
File size: 997 Bytes
59a7be2 8a919a4 59a7be2 8a919a4 59a7be2 8a919a4 59a7be2 8a919a4 59a7be2 8a919a4 59a7be2 8a919a4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 | # # llm_engine.py
import google.generativeai as genai
from app.core.config import GEMINI_API_KEY
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_nvidia_ai_endpoints import ChatNVIDIA
import os
# ✅ Configure Gemini client
genai.configure(api_key=GEMINI_API_KEY)
# llm = ChatGoogleGenerativeAI(
# model="gemini-2.5-flash",
# google_api_key=GEMINI_API_KEY,
# temperature=0.2,
# max_output_tokens=800,
# )
llm = ChatNVIDIA(
model="meta/llama-3.1-70b-instruct", # or nvidia/nemotron-4-340b-instruct
api_key=os.getenv("NVIDIA_API_KEY"),
temperature=0.7,
max_tokens=1024
)
eval_llm = ChatNVIDIA(
model="meta/llama-3.1-8b-instruct", # Faster for evaluation
temperature=0.0,
max_tokens=200
)
# Separate LLM for evaluator — needs near-deterministic JSON output
# eval_llm = ChatGoogleGenerativeAI(
# model="gemini-2.0-flash",
# google_api_key=GEMINI_API_KEY,
# temperature=0.0,
# max_output_tokens=200,
# )
|