pluto90 commited on
Commit
59a7be2
Β·
verified Β·
1 Parent(s): 1f0931f

Update app/core/llm_engine.py

Browse files
Files changed (1) hide show
  1. app/core/llm_engine.py +42 -41
app/core/llm_engine.py CHANGED
@@ -1,41 +1,42 @@
1
- # # llm_engine.py
2
-
3
- import google.generativeai as genai
4
- from app.core.config import GEMINI_API_KEY
5
- from langchain_google_genai import ChatGoogleGenerativeAI
6
-
7
- # βœ… Configure Gemini client
8
- genai.configure(api_key=GEMINI_API_KEY)
9
-
10
-
11
- llm = ChatGoogleGenerativeAI(
12
- model="gemini-2.5-flash",
13
- google_api_key=GEMINI_API_KEY,
14
- temperature=0.2,
15
- max_output_tokens=800,
16
- )
17
-
18
-
19
- # # βœ… Separate LLM for evaluator β€” needs near-deterministic JSON output
20
- # eval_llm = ChatGoogleGenerativeAI(
21
- # model="gemini-2.5-flash",
22
- # google_api_key=GEMINI_API_KEY,
23
- # temperature=0.0, # βœ… deterministic β€” evaluator must return valid JSON
24
- # max_output_tokens=200, # βœ… evaluator only returns a small JSON blob
25
- # thinking_level="none" # to disable chain-of-thought
26
- # )
27
-
28
-
29
- eval_llm = ChatGoogleGenerativeAI(
30
- model="gemini-2.0-flash", # no thinking, faster
31
- google_api_key=GEMINI_API_KEY,
32
- temperature=0.0,
33
- max_output_tokens=200,
34
- # model_kwargs={
35
- # "generation_config": {
36
- # "thinking_config": {
37
- # "thinking_budget": 0 # βœ… 0 = disabled, bypasses langchain validation entirely
38
- # }
39
- # }
40
- # }
41
- )
 
 
1
+ # # llm_engine.py
2
+
3
+ import google.generativeai as genai
4
+ from app.core.config import GEMINI_API_KEY
5
+ from langchain_google_genai import ChatGoogleGenerativeAI
6
+
7
+ # βœ… Configure Gemini client
8
+ genai.configure(api_key=GEMINI_API_KEY)
9
+
10
+
11
+ llm = ChatGoogleGenerativeAI(
12
+ model="gemini-2.5-flash",
13
+ google_api_key=GEMINI_API_KEY,
14
+ temperature=0.2,
15
+ max_output_tokens=800,
16
+ )
17
+
18
+
19
+ # # βœ… Separate LLM for evaluator β€” needs near-deterministic JSON output
20
+ # eval_llm = ChatGoogleGenerativeAI(
21
+ # model="gemini-2.5-flash",
22
+ # google_api_key=GEMINI_API_KEY,
23
+ # temperature=0.0, # βœ… deterministic β€” evaluator must return valid JSON
24
+ # max_output_tokens=200, # βœ… evaluator only returns a small JSON blob
25
+ # thinking_level="none" # to disable chain-of-thought
26
+ # )
27
+
28
+
29
+ eval_llm = ChatGoogleGenerativeAI(
30
+ model="gemini-2.5-flash",
31
+ google_api_key=GEMINI_API_KEY,
32
+ temperature=0.0,
33
+ max_output_tokens=200,
34
+ thinking_level="minimal", # least thinking bleed
35
+ # model_kwargs={
36
+ # "generation_config": {
37
+ # "thinking_config": {
38
+ # "thinking_budget": 0 # βœ… 0 = disabled, bypasses langchain validation entirely
39
+ # }
40
+ # }
41
+ # }
42
+ )