# # llm_engine.py import google.generativeai as genai from app.core.config import GEMINI_API_KEY from langchain_google_genai import ChatGoogleGenerativeAI # ✅ Configure Gemini client genai.configure(api_key=GEMINI_API_KEY) llm = ChatGoogleGenerativeAI( model="gemini-2.5-flash", google_api_key=GEMINI_API_KEY, temperature=0.2, max_output_tokens=800, ) # # ✅ Separate LLM for evaluator — needs near-deterministic JSON output # eval_llm = ChatGoogleGenerativeAI( # model="gemini-2.5-flash", # google_api_key=GEMINI_API_KEY, # temperature=0.0, # ✅ deterministic — evaluator must return valid JSON # max_output_tokens=200, # ✅ evaluator only returns a small JSON blob # thinking_level="none" # to disable chain-of-thought # ) eval_llm = ChatGoogleGenerativeAI( model="gemini-2.0-flash", # no thinking, faster google_api_key=GEMINI_API_KEY, temperature=0.0, max_output_tokens=200, # model_kwargs={ # "generation_config": { # "thinking_config": { # "thinking_budget": 0 # ✅ 0 = disabled, bypasses langchain validation entirely # } # } # } )