Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -908,34 +908,34 @@ REMEMBER: One tool per turn. No reasoning without tools. Exact answer format.
|
|
| 908 |
# max_new_tokens=2048, # Increased token limit for potentially longer reasoning/tool use
|
| 909 |
# temperature=0.01, # Keep temperature low for factual tasks
|
| 910 |
# Use tool_choice="any" to FORCE tool usage
|
| 911 |
-
|
| 912 |
-
|
| 913 |
-
|
| 914 |
-
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
|
| 918 |
-
|
| 919 |
-
|
| 920 |
-
|
| 921 |
-
#
|
| 922 |
-
# except Exception as e:
|
| 923 |
-
# print(f"β Error initializing HuggingFace: {e}")
|
| 924 |
-
# raise
|
| 925 |
-
# print("Initializing LLM Endpoint...")
|
| 926 |
-
llm = HuggingFaceEndpoint(
|
| 927 |
-
repo_id="Qwen/Qwen2.5-Coder-32B-Instruct", # Changed model
|
| 928 |
-
huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
|
| 929 |
-
max_new_tokens=2048, # Increased token limit for potentially longer reasoning/tool use
|
| 930 |
-
temperature=0.01, # Keep temperature low for factual tasks
|
| 931 |
-
)
|
| 932 |
-
chat_llm = ChatHuggingFace(llm=llm)
|
| 933 |
-
print("β
LLM Endpoint initialized.")
|
| 934 |
|
| 935 |
-
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
print("
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 939 |
# print("Initializing Google Gemini LLM...")
|
| 940 |
# try:
|
| 941 |
# self.llm_with_tools = ChatGoogleGenerativeAI(
|
|
|
|
| 908 |
# max_new_tokens=2048, # Increased token limit for potentially longer reasoning/tool use
|
| 909 |
# temperature=0.01, # Keep temperature low for factual tasks
|
| 910 |
# Use tool_choice="any" to FORCE tool usage
|
| 911 |
+
print("Initializing Groq LLM...")
|
| 912 |
+
try:
|
| 913 |
+
self.llm_with_tools = ChatGroq(
|
| 914 |
+
temperature=0,
|
| 915 |
+
groq_api_key=GROQ_API_KEY,
|
| 916 |
+
model_name="qwen/qwen3-32b",
|
| 917 |
+
max_tokens=4096,
|
| 918 |
+
timeout=60
|
| 919 |
+
).bind_tools(self.tools, tool_choice="auto")
|
| 920 |
+
print("β
LLM initialized without FORCED tool usage.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 921 |
|
| 922 |
+
except Exception as e:
|
| 923 |
+
print(f"β Error initializing HuggingFace: {e}")
|
| 924 |
+
raise
|
| 925 |
+
# print("Initializing LLM Endpoint...")
|
| 926 |
+
# llm = HuggingFaceEndpoint(
|
| 927 |
+
# repo_id="Qwen/Qwen2.5-Coder-32B-Instruct", # Changed model
|
| 928 |
+
# huggingfacehub_api_token=HUGGINGFACEHUB_API_TOKEN,
|
| 929 |
+
# max_new_tokens=2048, # Increased token limit for potentially longer reasoning/tool use
|
| 930 |
+
# temperature=0.01, # Keep temperature low for factual tasks
|
| 931 |
+
# )
|
| 932 |
+
# chat_llm = ChatHuggingFace(llm=llm)
|
| 933 |
+
# print("β
LLM Endpoint initialized.")
|
| 934 |
+
#
|
| 935 |
+
# # 6. Bind tools to the LLM
|
| 936 |
+
# # Ensure the LLM knows how to format calls for the tools
|
| 937 |
+
# self.llm_with_tools = chat_llm.bind_tools(self.tools)
|
| 938 |
+
# print("β
Tools bound to LLM.")
|
| 939 |
# print("Initializing Google Gemini LLM...")
|
| 940 |
# try:
|
| 941 |
# self.llm_with_tools = ChatGoogleGenerativeAI(
|