Spaces:

pluto90
/

Smart-Notes-backend

Running

pluto90 commited on Apr 13

Commit

59a7be2

verified ·

1 Parent(s): 1f0931f

Update app/core/llm_engine.py

Files changed (1) hide show

app/core/llm_engine.py CHANGED Viewed

@@ -1,41 +1,42 @@
-# # llm_engine.py
-import google.generativeai as genai
-from app.core.config import GEMINI_API_KEY
-from langchain_google_genai import ChatGoogleGenerativeAI
-# ✅ Configure Gemini client
-genai.configure(api_key=GEMINI_API_KEY)
-llm = ChatGoogleGenerativeAI(
-    model="gemini-2.5-flash",
-    google_api_key=GEMINI_API_KEY,
-    temperature=0.2,
-    max_output_tokens=800,
-)
-# # ✅ Separate LLM for evaluator — needs near-deterministic JSON output
-# eval_llm = ChatGoogleGenerativeAI(
-#     model="gemini-2.5-flash",
-#     google_api_key=GEMINI_API_KEY,
-#     temperature=0.0,              # ✅ deterministic — evaluator must return valid JSON
-#     max_output_tokens=200,        # ✅ evaluator only returns a small JSON blob
-#     thinking_level="none" # to disable chain-of-thought
-# )
-eval_llm = ChatGoogleGenerativeAI(
-    model="gemini-2.0-flash",  # no thinking, faster
-    google_api_key=GEMINI_API_KEY,
-    temperature=0.0,
-    max_output_tokens=200,
-    # model_kwargs={
-    #     "generation_config": {
-    #         "thinking_config": {
-    #             "thinking_budget": 0  # ✅ 0 = disabled, bypasses langchain validation entirely
-    #         }
-    #     }
-    # }
-)

+# # llm_engine.py
+import google.generativeai as genai
+from app.core.config import GEMINI_API_KEY
+from langchain_google_genai import ChatGoogleGenerativeAI
+# ✅ Configure Gemini client
+genai.configure(api_key=GEMINI_API_KEY)
+llm = ChatGoogleGenerativeAI(
+    model="gemini-2.5-flash",
+    google_api_key=GEMINI_API_KEY,
+    temperature=0.2,
+    max_output_tokens=800,
+)
+# # ✅ Separate LLM for evaluator — needs near-deterministic JSON output
+# eval_llm = ChatGoogleGenerativeAI(
+#     model="gemini-2.5-flash",
+#     google_api_key=GEMINI_API_KEY,
+#     temperature=0.0,              # ✅ deterministic — evaluator must return valid JSON
+#     max_output_tokens=200,        # ✅ evaluator only returns a small JSON blob
+#     thinking_level="none" # to disable chain-of-thought
+# )
+eval_llm = ChatGoogleGenerativeAI(
+    model="gemini-2.5-flash",
+    google_api_key=GEMINI_API_KEY,
+    temperature=0.0,
+    max_output_tokens=200,
+    thinking_level="minimal", # least thinking bleed
+    # model_kwargs={
+    #     "generation_config": {
+    #         "thinking_config": {
+    #             "thinking_budget": 0  # ✅ 0 = disabled, bypasses langchain validation entirely
+    #         }
+    #     }
+    # }
+)