Spaces:
Running
Running
| # # llm_engine.py | |
| import google.generativeai as genai | |
| from app.core.config import GEMINI_API_KEY | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| # β Configure Gemini client | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| llm = ChatGoogleGenerativeAI( | |
| model="gemini-2.5-flash", | |
| google_api_key=GEMINI_API_KEY, | |
| temperature=0.2, | |
| max_output_tokens=800, | |
| ) | |
| # # β Separate LLM for evaluator β needs near-deterministic JSON output | |
| # eval_llm = ChatGoogleGenerativeAI( | |
| # model="gemini-2.5-flash", | |
| # google_api_key=GEMINI_API_KEY, | |
| # temperature=0.0, # β deterministic β evaluator must return valid JSON | |
| # max_output_tokens=200, # β evaluator only returns a small JSON blob | |
| # thinking_level="none" # to disable chain-of-thought | |
| # ) | |
| eval_llm = ChatGoogleGenerativeAI( | |
| model="gemini-2.0-flash", # no thinking, faster | |
| google_api_key=GEMINI_API_KEY, | |
| temperature=0.0, | |
| max_output_tokens=200, | |
| # model_kwargs={ | |
| # "generation_config": { | |
| # "thinking_config": { | |
| # "thinking_budget": 0 # β 0 = disabled, bypasses langchain validation entirely | |
| # } | |
| # } | |
| # } | |
| ) | |