# # llm_engine.py import google.generativeai as genai from app.core.config import GEMINI_API_KEY from langchain_google_genai import ChatGoogleGenerativeAI from langchain_nvidia_ai_endpoints import ChatNVIDIA import os # ✅ Configure Gemini client genai.configure(api_key=GEMINI_API_KEY) # llm = ChatGoogleGenerativeAI( # model="gemini-2.5-flash", # google_api_key=GEMINI_API_KEY, # temperature=0.2, # max_output_tokens=800, # ) llm = ChatNVIDIA( model="meta/llama-3.1-70b-instruct", # or nvidia/nemotron-4-340b-instruct api_key=os.getenv("NVIDIA_API_KEY"), temperature=0.7, max_tokens=1024 ) eval_llm = ChatNVIDIA( model="meta/llama-3.1-8b-instruct", # Faster for evaluation temperature=0.0, max_tokens=200 ) # Separate LLM for evaluator — needs near-deterministic JSON output # eval_llm = ChatGoogleGenerativeAI( # model="gemini-2.0-flash", # google_api_key=GEMINI_API_KEY, # temperature=0.0, # max_output_tokens=200, # )