Spaces:
Running
Running
| # # llm_engine.py | |
| import google.generativeai as genai | |
| from app.core.config import GEMINI_API_KEY | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| from langchain_nvidia_ai_endpoints import ChatNVIDIA | |
| import os | |
| # ✅ Configure Gemini client | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| # llm = ChatGoogleGenerativeAI( | |
| # model="gemini-2.5-flash", | |
| # google_api_key=GEMINI_API_KEY, | |
| # temperature=0.2, | |
| # max_output_tokens=800, | |
| # ) | |
| llm = ChatNVIDIA( | |
| model="meta/llama-3.1-70b-instruct", # or nvidia/nemotron-4-340b-instruct | |
| api_key=os.getenv("NVIDIA_API_KEY"), | |
| temperature=0.7, | |
| max_tokens=1024 | |
| ) | |
| eval_llm = ChatNVIDIA( | |
| model="meta/llama-3.1-8b-instruct", # Faster for evaluation | |
| temperature=0.0, | |
| max_tokens=200 | |
| ) | |
| # Separate LLM for evaluator — needs near-deterministic JSON output | |
| # eval_llm = ChatGoogleGenerativeAI( | |
| # model="gemini-2.0-flash", | |
| # google_api_key=GEMINI_API_KEY, | |
| # temperature=0.0, | |
| # max_output_tokens=200, | |
| # ) | |