# # llm_engine.py

import google.generativeai as genai
from app.core.config import GEMINI_API_KEY
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_nvidia_ai_endpoints import ChatNVIDIA
import os

# ✅ Configure Gemini client
genai.configure(api_key=GEMINI_API_KEY)

# llm = ChatGoogleGenerativeAI(
#     model="gemini-2.5-flash",
#     google_api_key=GEMINI_API_KEY,
#     temperature=0.2,
#     max_output_tokens=800,
# )


llm = ChatNVIDIA(
    model="meta/llama-3.1-70b-instruct",  # or nvidia/nemotron-4-340b-instruct
    api_key=os.getenv("NVIDIA_API_KEY"),
    temperature=0.7,
    max_tokens=1024
)

eval_llm = ChatNVIDIA(
    model="meta/llama-3.1-8b-instruct",  # Faster for evaluation
    temperature=0.0,
    max_tokens=200
)


# Separate LLM for evaluator — needs near-deterministic JSON output

# eval_llm = ChatGoogleGenerativeAI(
#     model="gemini-2.0-flash",
#     google_api_key=GEMINI_API_KEY,
#     temperature=0.0,
#     max_output_tokens=200,
# )