Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
Β·
f52b66d
1
Parent(s):
158fae7
Support multiple tool calls for local models
Browse files- app.py +5 -4
- mods/tool_calling_llm.py +51 -34
- prompts.py +11 -2
app.py
CHANGED
|
@@ -382,7 +382,8 @@ with gr.Blocks(
|
|
| 382 |
status_text = f"""
|
| 383 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
| 384 |
β Response time is about one minute<br>
|
| 385 |
-
π§
|
|
|
|
| 386 |
β¨ [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
| 387 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 388 |
"""
|
|
@@ -400,8 +401,8 @@ with gr.Blocks(
|
|
| 400 |
end = None
|
| 401 |
info_text = f"""
|
| 402 |
**Database:** {len(sources)} emails from {start} to {end}.
|
| 403 |
-
**Features:** RAG, today's date, hybrid search (dense+sparse),
|
| 404 |
-
|
| 405 |
**Tech:** LangChain + Hugging Face + Gradio; ChromaDB and BM25S-based retrievers.<br>
|
| 406 |
"""
|
| 407 |
return info_text
|
|
@@ -448,7 +449,7 @@ with gr.Blocks(
|
|
| 448 |
gr.Examples(
|
| 449 |
examples=[[q] for q in multi_tool_questions],
|
| 450 |
inputs=[input],
|
| 451 |
-
label="Multiple retrievals
|
| 452 |
elem_id="example-questions",
|
| 453 |
)
|
| 454 |
multi_turn_questions = [
|
|
|
|
| 382 |
status_text = f"""
|
| 383 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
| 384 |
β Response time is about one minute<br>
|
| 385 |
+
π§ Add **/think** to enable thinking for answer</br>
|
| 386 |
+
  π Thinking is already enabled for query<br>
|
| 387 |
β¨ [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
| 388 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 389 |
"""
|
|
|
|
| 401 |
end = None
|
| 402 |
info_text = f"""
|
| 403 |
**Database:** {len(sources)} emails from {start} to {end}.
|
| 404 |
+
**Features:** RAG, today's date, hybrid search (dense+sparse), multiple retrievals,
|
| 405 |
+
thinking output (local), citations output (remote), chat memory.
|
| 406 |
**Tech:** LangChain + Hugging Face + Gradio; ChromaDB and BM25S-based retrievers.<br>
|
| 407 |
"""
|
| 408 |
return info_text
|
|
|
|
| 449 |
gr.Examples(
|
| 450 |
examples=[[q] for q in multi_tool_questions],
|
| 451 |
inputs=[input],
|
| 452 |
+
label="Multiple retrievals",
|
| 453 |
elem_id="example-questions",
|
| 454 |
)
|
| 455 |
multi_turn_questions = [
|
mods/tool_calling_llm.py
CHANGED
|
@@ -43,8 +43,12 @@ DEFAULT_SYSTEM_TEMPLATE = """You have access to the following tools:
|
|
| 43 |
You must always select one of the above tools and respond with only a JSON object matching the following schema:
|
| 44 |
|
| 45 |
{{
|
| 46 |
-
"tool": <name of
|
| 47 |
-
"tool_input": <parameters for
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
}}
|
| 49 |
""" # noqa: E501
|
| 50 |
|
|
@@ -173,52 +177,65 @@ class ToolCallingLLM(BaseChatModel, ABC):
|
|
| 173 |
# Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
|
| 174 |
think_text, post_think = extract_think(response_message.content)
|
| 175 |
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
| 177 |
try:
|
| 178 |
-
|
| 179 |
except json.JSONDecodeError:
|
| 180 |
# Return entire response if JSON wasn't parsed (or is missing)
|
| 181 |
return AIMessage(content=response_message.content)
|
| 182 |
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
)
|
| 194 |
-
if called_tool is None:
|
| 195 |
-
# Issue a warning and return the generated content 20250727 jmd
|
| 196 |
-
warnings.warn(f"Called tool ({called_tool_name}) not in functions list")
|
| 197 |
-
return AIMessage(content=response_message.content)
|
| 198 |
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
)
|
| 208 |
-
)
|
| 209 |
|
| 210 |
-
|
| 211 |
-
response_message = AIMessage(
|
| 212 |
-
content=f"<think>\n{think_text}\n</think>",
|
| 213 |
-
tool_calls=[
|
| 214 |
ToolCall(
|
| 215 |
name=called_tool_name,
|
| 216 |
args=called_tool_arguments,
|
| 217 |
id=f"call_{str(uuid.uuid4()).replace('-', '')}",
|
| 218 |
)
|
| 219 |
-
|
| 220 |
-
)
|
| 221 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
return response_message
|
| 223 |
|
| 224 |
def _generate(
|
|
|
|
| 43 |
You must always select one of the above tools and respond with only a JSON object matching the following schema:
|
| 44 |
|
| 45 |
{{
|
| 46 |
+
"tool": <name of selected tool 1>,
|
| 47 |
+
"tool_input": <parameters for selected tool 1, matching the tool's JSON schema>
|
| 48 |
+
}},
|
| 49 |
+
{{
|
| 50 |
+
"tool": <name of selected tool 2>,
|
| 51 |
+
"tool_input": <parameters for selected tool 2, matching the tool's JSON schema>
|
| 52 |
}}
|
| 53 |
""" # noqa: E501
|
| 54 |
|
|
|
|
| 177 |
# Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
|
| 178 |
think_text, post_think = extract_think(response_message.content)
|
| 179 |
|
| 180 |
+
print("post_think")
|
| 181 |
+
print(post_think)
|
| 182 |
+
|
| 183 |
+
# Parse output for JSON (support multiple objects separated by commas)
|
| 184 |
try:
|
| 185 |
+
parsed_json_results = json.loads(f"[{post_think}]")
|
| 186 |
except json.JSONDecodeError:
|
| 187 |
# Return entire response if JSON wasn't parsed (or is missing)
|
| 188 |
return AIMessage(content=response_message.content)
|
| 189 |
|
| 190 |
+
tool_calls = []
|
| 191 |
+
for parsed_json_result in parsed_json_results:
|
| 192 |
+
# Get tool name from output
|
| 193 |
+
called_tool_name = (
|
| 194 |
+
parsed_json_result["tool"]
|
| 195 |
+
if "tool" in parsed_json_result
|
| 196 |
+
else (
|
| 197 |
+
parsed_json_result["name"] if "name" in parsed_json_result else None
|
| 198 |
+
)
|
| 199 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
|
| 201 |
+
# Check if tool name is in functions list
|
| 202 |
+
called_tool = next(
|
| 203 |
+
(fn for fn in functions if fn["function"]["name"] == called_tool_name),
|
| 204 |
+
None,
|
| 205 |
+
)
|
| 206 |
+
if called_tool is None:
|
| 207 |
+
# Issue a warning and skip this tool call
|
| 208 |
+
warnings.warn(f"Called tool ({called_tool_name}) not in functions list")
|
| 209 |
+
continue
|
| 210 |
+
|
| 211 |
+
# Get tool arguments from output
|
| 212 |
+
called_tool_arguments = (
|
| 213 |
+
parsed_json_result["tool_input"]
|
| 214 |
+
if "tool_input" in parsed_json_result
|
| 215 |
+
else (
|
| 216 |
+
parsed_json_result["parameters"]
|
| 217 |
+
if "parameters" in parsed_json_result
|
| 218 |
+
else {}
|
| 219 |
+
)
|
| 220 |
)
|
|
|
|
| 221 |
|
| 222 |
+
tool_calls.append(
|
|
|
|
|
|
|
|
|
|
| 223 |
ToolCall(
|
| 224 |
name=called_tool_name,
|
| 225 |
args=called_tool_arguments,
|
| 226 |
id=f"call_{str(uuid.uuid4()).replace('-', '')}",
|
| 227 |
)
|
| 228 |
+
)
|
|
|
|
| 229 |
|
| 230 |
+
if not tool_calls:
|
| 231 |
+
# If nothing valid, return original content
|
| 232 |
+
return AIMessage(content=response_message.content)
|
| 233 |
+
|
| 234 |
+
# Put together response message
|
| 235 |
+
response_message = AIMessage(
|
| 236 |
+
content=f"<think>\n{think_text}\n</think>",
|
| 237 |
+
tool_calls=tool_calls,
|
| 238 |
+
)
|
| 239 |
return response_message
|
| 240 |
|
| 241 |
def _generate(
|
prompts.py
CHANGED
|
@@ -31,7 +31,7 @@ def query_prompt(chat_model, think=False):
|
|
| 31 |
"Do not answer the user's question and do not ask the user for more information. "
|
| 32 |
# gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval. "
|
| 33 |
f"The emails available for retrieval are from {start} to {end}. "
|
| 34 |
-
"For questions about differences or comparison between X and Y, retrieve emails about X and Y. "
|
| 35 |
"For general summaries, use retrieve_emails(search_query='R'). "
|
| 36 |
"For specific questions, use retrieve_emails(search_query=<specific topic>). "
|
| 37 |
"For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year). "
|
|
@@ -60,7 +60,8 @@ def generate_prompt(chat_model, think=False, with_tools=False):
|
|
| 60 |
"Summarize the content of the emails rather than copying the headers. " # Qwen
|
| 61 |
"You must include inline citations (email senders and dates) in each part of your response. "
|
| 62 |
"Only answer general questions about R if the answer is in the retrieved emails. "
|
| 63 |
-
"
|
|
|
|
| 64 |
)
|
| 65 |
if with_tools:
|
| 66 |
prompt = (
|
|
@@ -87,6 +88,10 @@ You have access to the following tools:
|
|
| 87 |
|
| 88 |
You must always select one of the above tools and respond with only a JSON object matching the following schema:
|
| 89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
{{
|
| 91 |
"tool": <function-name>,
|
| 92 |
"tool_input": <args-json-object>
|
|
@@ -102,6 +107,10 @@ generic_tools_template = """
|
|
| 102 |
|
| 103 |
You have access to functions. If you decide to invoke any of the function(s), you MUST put it in the format of
|
| 104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
{{
|
| 106 |
"tool": <function-name>,
|
| 107 |
"tool_input": <args-json-object>
|
|
|
|
| 31 |
"Do not answer the user's question and do not ask the user for more information. "
|
| 32 |
# gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval. "
|
| 33 |
f"The emails available for retrieval are from {start} to {end}. "
|
| 34 |
+
"For questions about differences or comparison between X and Y, retrieve emails about X and Y using separate tool calls. "
|
| 35 |
"For general summaries, use retrieve_emails(search_query='R'). "
|
| 36 |
"For specific questions, use retrieve_emails(search_query=<specific topic>). "
|
| 37 |
"For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year). "
|
|
|
|
| 60 |
"Summarize the content of the emails rather than copying the headers. " # Qwen
|
| 61 |
"You must include inline citations (email senders and dates) in each part of your response. "
|
| 62 |
"Only answer general questions about R if the answer is in the retrieved emails. "
|
| 63 |
+
"Your response can include URLs, but make sure they are quoted verbatim from the retrieved emails. " # Qwen
|
| 64 |
+
"Respond with 300 words maximum and 30 lines of code maximum. "
|
| 65 |
)
|
| 66 |
if with_tools:
|
| 67 |
prompt = (
|
|
|
|
| 88 |
|
| 89 |
You must always select one of the above tools and respond with only a JSON object matching the following schema:
|
| 90 |
|
| 91 |
+
{{
|
| 92 |
+
"tool": <function-name>,
|
| 93 |
+
"tool_input": <args-json-object>
|
| 94 |
+
}},
|
| 95 |
{{
|
| 96 |
"tool": <function-name>,
|
| 97 |
"tool_input": <args-json-object>
|
|
|
|
| 107 |
|
| 108 |
You have access to functions. If you decide to invoke any of the function(s), you MUST put it in the format of
|
| 109 |
|
| 110 |
+
{{
|
| 111 |
+
"tool": <function-name>,
|
| 112 |
+
"tool_input": <args-json-object>
|
| 113 |
+
}},
|
| 114 |
{{
|
| 115 |
"tool": <function-name>,
|
| 116 |
"tool_input": <args-json-object>
|