Spaces:

jedick
/

R-help-chat

Running on Zero

App Files Files Community

jedick commited on Jul 28

Commit

f52b66d

1 Parent(s): 158fae7

Support multiple tool calls for local models

Browse files

Files changed (3) hide show

app.py +5 -4
mods/tool_calling_llm.py +51 -34
prompts.py +11 -2

app.py CHANGED Viewed

@@ -382,7 +382,8 @@ with gr.Blocks(
             status_text = f"""
             📍 Now in **local** mode, using ZeroGPU hardware<br>
             ⌛ Response time is about one minute<br>
-            🧠 Thinking is enabled for query; add **/think** to enable thinking for answer</br>
             ✨ [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
             🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
             """
@@ -400,8 +401,8 @@ with gr.Blocks(
             end = None
         info_text = f"""
             **Database:** {len(sources)} emails from {start} to {end}.
-            **Features:** RAG, today's date, hybrid search (dense+sparse), thinking output (local),
-            multiple retrievals (remote), citations output (remote), chat memory.
             **Tech:** LangChain + Hugging Face + Gradio; ChromaDB and BM25S-based retrievers.<br>
             """
         return info_text
@@ -448,7 +449,7 @@ with gr.Blocks(
                 gr.Examples(
                     examples=[[q] for q in multi_tool_questions],
                     inputs=[input],
-                    label="Multiple retrievals (remote mode)",
                     elem_id="example-questions",
                 )
                 multi_turn_questions = [

             status_text = f"""
             📍 Now in **local** mode, using ZeroGPU hardware<br>
             ⌛ Response time is about one minute<br>
+            🧠 Add **/think** to enable thinking for answer</br>
+            &emsp;&nbsp; 🔍 Thinking is already enabled for query<br>
             ✨ [nomic-embed-text-v1.5](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
             🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
             """
             end = None
         info_text = f"""
             **Database:** {len(sources)} emails from {start} to {end}.
+            **Features:** RAG, today's date, hybrid search (dense+sparse), multiple retrievals,
+            thinking output (local), citations output (remote), chat memory.
             **Tech:** LangChain + Hugging Face + Gradio; ChromaDB and BM25S-based retrievers.<br>
             """
         return info_text
                 gr.Examples(
                     examples=[[q] for q in multi_tool_questions],
                     inputs=[input],
+                    label="Multiple retrievals",
                     elem_id="example-questions",
                 )
                 multi_turn_questions = [

mods/tool_calling_llm.py CHANGED Viewed

@@ -43,8 +43,12 @@ DEFAULT_SYSTEM_TEMPLATE = """You have access to the following tools:
 You must always select one of the above tools and respond with only a JSON object matching the following schema:
 {{
-  "tool": <name of the selected tool>,
-  "tool_input": <parameters for the selected tool, matching the tool's JSON schema>
 }}
 """  # noqa: E501
@@ -173,52 +177,65 @@ class ToolCallingLLM(BaseChatModel, ABC):
         # Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
         think_text, post_think = extract_think(response_message.content)
-        # Parse output for JSON
         try:
-            parsed_json_result = json.loads(post_think)
         except json.JSONDecodeError:
             # Return entire response if JSON wasn't parsed (or is missing)
             return AIMessage(content=response_message.content)
-        # Get tool name from output
-        called_tool_name = (
-            parsed_json_result["tool"]
-            if "tool" in parsed_json_result
-            else parsed_json_result["name"] if "name" in parsed_json_result else None
-        )
-        # Check if tool name is in functions list
-        called_tool = next(
-            (fn for fn in functions if fn["function"]["name"] == called_tool_name), None
-        )
-        if called_tool is None:
-            # Issue a warning and return the generated content 20250727 jmd
-            warnings.warn(f"Called tool ({called_tool_name}) not in functions list")
-            return AIMessage(content=response_message.content)
-        # Get tool arguments from output
-        called_tool_arguments = (
-            parsed_json_result["tool_input"]
-            if "tool_input" in parsed_json_result
-            else (
-                parsed_json_result["parameters"]
-                if "parameters" in parsed_json_result
-                else {}
             )
-        )
-        # Put together response message
-        response_message = AIMessage(
-            content=f"<think>\n{think_text}\n</think>",
-            tool_calls=[
                 ToolCall(
                     name=called_tool_name,
                     args=called_tool_arguments,
                     id=f"call_{str(uuid.uuid4()).replace('-', '')}",
                 )
-            ],
-        )
         return response_message
     def _generate(

 You must always select one of the above tools and respond with only a JSON object matching the following schema:
 {{
+  "tool": <name of selected tool 1>,
+  "tool_input": <parameters for selected tool 1, matching the tool's JSON schema>
+}},
+{{
+  "tool": <name of selected tool 2>,
+  "tool_input": <parameters for selected tool 2, matching the tool's JSON schema>
 }}
 """  # noqa: E501
         # Extract <think>...</think> content and text after </think> for further processing 20250726 jmd
         think_text, post_think = extract_think(response_message.content)
+        print("post_think")
+        print(post_think)
+        # Parse output for JSON (support multiple objects separated by commas)
         try:
+            parsed_json_results = json.loads(f"[{post_think}]")
         except json.JSONDecodeError:
             # Return entire response if JSON wasn't parsed (or is missing)
             return AIMessage(content=response_message.content)
+        tool_calls = []
+        for parsed_json_result in parsed_json_results:
+            # Get tool name from output
+            called_tool_name = (
+                parsed_json_result["tool"]
+                if "tool" in parsed_json_result
+                else (
+                    parsed_json_result["name"] if "name" in parsed_json_result else None
+                )
+            )
+            # Check if tool name is in functions list
+            called_tool = next(
+                (fn for fn in functions if fn["function"]["name"] == called_tool_name),
+                None,
+            )
+            if called_tool is None:
+                # Issue a warning and skip this tool call
+                warnings.warn(f"Called tool ({called_tool_name}) not in functions list")
+                continue
+            # Get tool arguments from output
+            called_tool_arguments = (
+                parsed_json_result["tool_input"]
+                if "tool_input" in parsed_json_result
+                else (
+                    parsed_json_result["parameters"]
+                    if "parameters" in parsed_json_result
+                    else {}
+                )
             )
+            tool_calls.append(
                 ToolCall(
                     name=called_tool_name,
                     args=called_tool_arguments,
                     id=f"call_{str(uuid.uuid4()).replace('-', '')}",
                 )
+            )
+        if not tool_calls:
+            # If nothing valid, return original content
+            return AIMessage(content=response_message.content)
+        # Put together response message
+        response_message = AIMessage(
+            content=f"<think>\n{think_text}\n</think>",
+            tool_calls=tool_calls,
+        )
         return response_message
     def _generate(

prompts.py CHANGED Viewed

@@ -31,7 +31,7 @@ def query_prompt(chat_model, think=False):
         "Do not answer the user's question and do not ask the user for more information. "
         # gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval. "
         f"The emails available for retrieval are from {start} to {end}. "
-        "For questions about differences or comparison between X and Y, retrieve emails about X and Y. "
         "For general summaries, use retrieve_emails(search_query='R'). "
         "For specific questions, use retrieve_emails(search_query=<specific topic>). "
         "For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year). "
@@ -60,7 +60,8 @@ def generate_prompt(chat_model, think=False, with_tools=False):
         "Summarize the content of the emails rather than copying the headers. "  # Qwen
         "You must include inline citations (email senders and dates) in each part of your response. "
         "Only answer general questions about R if the answer is in the retrieved emails. "
-        "Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails. "
     )
     if with_tools:
         prompt = (
@@ -87,6 +88,10 @@ You have access to the following tools:
 You must always select one of the above tools and respond with only a JSON object matching the following schema:
 {{
     "tool": <function-name>,
     "tool_input": <args-json-object>
@@ -102,6 +107,10 @@ generic_tools_template = """
 You have access to functions. If you decide to invoke any of the function(s), you MUST put it in the format of
 {{
     "tool": <function-name>,
     "tool_input": <args-json-object>

         "Do not answer the user's question and do not ask the user for more information. "
         # gpt-4o-mini thinks last two months aren't available with this: "Emails from from {start} to {end} are available for retrieval. "
         f"The emails available for retrieval are from {start} to {end}. "
+        "For questions about differences or comparison between X and Y, retrieve emails about X and Y using separate tool calls. "
         "For general summaries, use retrieve_emails(search_query='R'). "
         "For specific questions, use retrieve_emails(search_query=<specific topic>). "
         "For questions about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year). "
         "Summarize the content of the emails rather than copying the headers. "  # Qwen
         "You must include inline citations (email senders and dates) in each part of your response. "
         "Only answer general questions about R if the answer is in the retrieved emails. "
+        "Your response can include URLs, but make sure they are quoted verbatim from the retrieved emails. "  # Qwen
+        "Respond with 300 words maximum and 30 lines of code maximum. "
     )
     if with_tools:
         prompt = (
 You must always select one of the above tools and respond with only a JSON object matching the following schema:
+{{
+    "tool": <function-name>,
+    "tool_input": <args-json-object>
+}},
 {{
     "tool": <function-name>,
     "tool_input": <args-json-object>
 You have access to functions. If you decide to invoke any of the function(s), you MUST put it in the format of
+{{
+    "tool": <function-name>,
+    "tool_input": <args-json-object>
+}},
 {{
     "tool": <function-name>,
     "tool_input": <args-json-object>