pluto90 commited on
Commit
8a919a4
Β·
verified Β·
1 Parent(s): c8f4c1b

Update app/core/llm_engine.py

Browse files
Files changed (1) hide show
  1. app/core/llm_engine.py +25 -26
app/core/llm_engine.py CHANGED
@@ -3,40 +3,39 @@
3
  import google.generativeai as genai
4
  from app.core.config import GEMINI_API_KEY
5
  from langchain_google_genai import ChatGoogleGenerativeAI
 
 
6
 
7
  # βœ… Configure Gemini client
8
  genai.configure(api_key=GEMINI_API_KEY)
9
 
10
-
11
- llm = ChatGoogleGenerativeAI(
12
- model="gemini-2.5-flash",
13
- google_api_key=GEMINI_API_KEY,
14
- temperature=0.2,
15
- max_output_tokens=800,
16
- )
17
-
18
-
19
- # # βœ… Separate LLM for evaluator β€” needs near-deterministic JSON output
20
- # eval_llm = ChatGoogleGenerativeAI(
21
  # model="gemini-2.5-flash",
22
  # google_api_key=GEMINI_API_KEY,
23
- # temperature=0.0, # βœ… deterministic β€” evaluator must return valid JSON
24
- # max_output_tokens=200, # βœ… evaluator only returns a small JSON blob
25
- # thinking_level="none" # to disable chain-of-thought
26
  # )
27
 
28
 
29
- eval_llm = ChatGoogleGenerativeAI(
30
- model="gemini-2.5-flash",
31
- google_api_key=GEMINI_API_KEY,
 
 
 
 
 
 
32
  temperature=0.0,
33
- max_output_tokens=200,
34
- thinking_level="minimal", # least thinking bleed
35
- # model_kwargs={
36
- # "generation_config": {
37
- # "thinking_config": {
38
- # "thinking_budget": 0 # βœ… 0 = disabled, bypasses langchain validation entirely
39
- # }
40
- # }
41
- # }
42
  )
 
 
 
 
 
 
 
 
 
 
 
3
  import google.generativeai as genai
4
  from app.core.config import GEMINI_API_KEY
5
  from langchain_google_genai import ChatGoogleGenerativeAI
6
+ from langchain_nvidia_ai_endpoints import ChatNVIDIA
7
+ import os
8
 
9
  # βœ… Configure Gemini client
10
  genai.configure(api_key=GEMINI_API_KEY)
11
 
12
+ # llm = ChatGoogleGenerativeAI(
 
 
 
 
 
 
 
 
 
 
13
  # model="gemini-2.5-flash",
14
  # google_api_key=GEMINI_API_KEY,
15
+ # temperature=0.2,
16
+ # max_output_tokens=800,
 
17
  # )
18
 
19
 
20
+ llm = ChatNVIDIA(
21
+ model="meta/llama-3.1-70b-instruct", # or nvidia/nemotron-4-340b-instruct
22
+ api_key=os.getenv("NVIDIA_API_KEY"),
23
+ temperature=0.7,
24
+ max_tokens=1024
25
+ )
26
+
27
+ eval_llm = ChatNVIDIA(
28
+ model="meta/llama-3.1-8b-instruct", # Faster for evaluation
29
  temperature=0.0,
30
+ max_tokens=200
 
 
 
 
 
 
 
 
31
  )
32
+
33
+
34
+ # Separate LLM for evaluator β€” needs near-deterministic JSON output
35
+
36
+ # eval_llm = ChatGoogleGenerativeAI(
37
+ # model="gemini-2.0-flash",
38
+ # google_api_key=GEMINI_API_KEY,
39
+ # temperature=0.0,
40
+ # max_output_tokens=200,
41
+ # )