Spaces:

pluto90
/

Smart-Notes-backend

Running

pluto90 commited on 23 days ago

Commit

8a919a4

verified ·

1 Parent(s): c8f4c1b

Update app/core/llm_engine.py

Files changed (1) hide show

app/core/llm_engine.py CHANGED Viewed

@@ -3,40 +3,39 @@
 import google.generativeai as genai
 from app.core.config import GEMINI_API_KEY
 from langchain_google_genai import ChatGoogleGenerativeAI
 # ✅ Configure Gemini client
 genai.configure(api_key=GEMINI_API_KEY)
-llm = ChatGoogleGenerativeAI(
-    model="gemini-2.5-flash",
-    google_api_key=GEMINI_API_KEY,
-    temperature=0.2,
-    max_output_tokens=800,
-)
-# # ✅ Separate LLM for evaluator — needs near-deterministic JSON output
-# eval_llm = ChatGoogleGenerativeAI(
 #     model="gemini-2.5-flash",
 #     google_api_key=GEMINI_API_KEY,
-#     temperature=0.0,              # ✅ deterministic — evaluator must return valid JSON
-#     max_output_tokens=200,        # ✅ evaluator only returns a small JSON blob
-#     thinking_level="none" # to disable chain-of-thought
 # )
-eval_llm = ChatGoogleGenerativeAI(
-    model="gemini-2.5-flash",
-    google_api_key=GEMINI_API_KEY,
     temperature=0.0,
-    max_output_tokens=200,
-    thinking_level="minimal", # least thinking bleed
-    # model_kwargs={
-    #     "generation_config": {
-    #         "thinking_config": {
-    #             "thinking_budget": 0  # ✅ 0 = disabled, bypasses langchain validation entirely
-    #         }
-    #     }
-    # }
 )

 import google.generativeai as genai
 from app.core.config import GEMINI_API_KEY
 from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_nvidia_ai_endpoints import ChatNVIDIA
+import os
 # ✅ Configure Gemini client
 genai.configure(api_key=GEMINI_API_KEY)
+# llm = ChatGoogleGenerativeAI(
 #     model="gemini-2.5-flash",
 #     google_api_key=GEMINI_API_KEY,
+#     temperature=0.2,
+#     max_output_tokens=800,
 # )
+llm = ChatNVIDIA(
+    model="meta/llama-3.1-70b-instruct",  # or nvidia/nemotron-4-340b-instruct
+    api_key=os.getenv("NVIDIA_API_KEY"),
+    temperature=0.7,
+    max_tokens=1024
+)
+eval_llm = ChatNVIDIA(
+    model="meta/llama-3.1-8b-instruct",  # Faster for evaluation
     temperature=0.0,
+    max_tokens=200
 )
+# Separate LLM for evaluator — needs near-deterministic JSON output
+# eval_llm = ChatGoogleGenerativeAI(
+#     model="gemini-2.0-flash",
+#     google_api_key=GEMINI_API_KEY,
+#     temperature=0.0,
+#     max_output_tokens=200,
+# )