Spaces:

jedick
/

R-help-chat

Running on Zero

App Files Files Community

jedick commited on Jul 23

Commit

03db0de

1 Parent(s): 0efb496

Normalize message types for Gemma

Browse files

Files changed (5) hide show

app.py +157 -145
graph.py +70 -40
main.py +12 -7
prompts.py +32 -14
util.py +20 -5

app.py CHANGED Viewed

@@ -3,9 +3,9 @@ from main import GetChatModel
 from graph import BuildGraph
 from retriever import db_dir
 from langgraph.checkpoint.memory import MemorySaver
-from dotenv import load_dotenv
-# from util import get_collection, get_sources, get_start_end_months
 from git import Repo
 import zipfile
 import spaces
@@ -18,11 +18,6 @@ import os
 COMPUTE = "cloud"
 search_type = "hybrid"
-# Load LANGCHAIN_API_KEY (for local deployment)
-load_dotenv(dotenv_path=".env", override=True)
-os.environ["LANGSMITH_TRACING"] = "true"
-os.environ["LANGSMITH_PROJECT"] = "R-help-chat"
 # Check for GPU
 if COMPUTE == "edge":
     if not torch.cuda.is_available():
@@ -33,7 +28,7 @@ graph_edge = None
 graph_cloud = None
-def run_workflow(chatbot, input, thread_id):
     """The main function to run the chat workflow"""
     # Get global graph for compute location
@@ -69,10 +64,10 @@ def run_workflow(chatbot, input, thread_id):
     print(f"Using thread_id: {thread_id}")
-    # Display the user input in the chatbot interface
-    chatbot.append(gr.ChatMessage(role="user", content=input))
-    # Return the chatbot messages and empty lists for emails and citations texboxes
-    yield chatbot, [], []
     # Asynchronously stream graph steps for a single input
     # https://langchain-ai.lang.chat/langgraph/reference/graphs/#langgraph.graph.state.CompiledStateGraph
@@ -101,7 +96,7 @@ def run_workflow(chatbot, input, thread_id):
                         content = f"{content} ({start_year or ''} - {end_year or ''})"
                     if "months" in args:
                         content = f"{content} {args['months']}"
-                    chatbot.append(
                         gr.ChatMessage(
                             role="assistant",
                             content=content,
@@ -109,10 +104,10 @@ def run_workflow(chatbot, input, thread_id):
                         )
                     )
             if chunk_messages.content:
-                chatbot.append(
                     gr.ChatMessage(role="assistant", content=chunk_messages.content)
                 )
-            yield chatbot, [], []
         if node == "retrieve_emails":
             chunk_messages = chunk["messages"]
@@ -136,7 +131,7 @@ def run_workflow(chatbot, input, thread_id):
                 title = f"🛒 Retrieved {n_emails} emails"
                 if email_list[0] == "### No emails were retrieved":
                     title = "❌ Retrieved 0 emails"
-                chatbot.append(
                     gr.ChatMessage(
                         role="assistant",
                         content=month_text,
@@ -152,17 +147,17 @@ def run_workflow(chatbot, input, thread_id):
                 )
             # Combine all the Tool Call results
             retrieved_emails = "\n\n".join(retrieved_emails)
-            yield chatbot, retrieved_emails, []
         if node == "generate":
             chunk_messages = chunk["messages"]
             # Chat response without citations
             if chunk_messages.content:
-                chatbot.append(
                     gr.ChatMessage(role="assistant", content=chunk_messages.content)
                 )
             # None is used for no change to the retrieved emails textbox
-            yield chatbot, None, []
         if node == "answer_with_citations":
             chunk_messages = chunk["messages"][0]
@@ -174,8 +169,8 @@ def run_workflow(chatbot, input, thread_id):
                 answer = chunk_messages.content
                 citations = None
-            chatbot.append(gr.ChatMessage(role="assistant", content=answer))
-            yield chatbot, None, citations
 def to_workflow(*args):
@@ -230,12 +225,6 @@ with gr.Blocks(
         render=False,
     )
-    input = gr.Textbox(
-        lines=1,
-        label="Your Question",
-        info="Press Enter to submit",
-        render=False,
-    )
     downloading = gr.Textbox(
         lines=1,
         label="Downloading Data, Please Wait",
@@ -248,6 +237,13 @@ with gr.Blocks(
         visible=False,
         render=False,
     )
     show_examples = gr.Checkbox(
         value=False,
         label="💡 Example Questions",
@@ -268,142 +264,142 @@ with gr.Blocks(
         render=False,
     )
     # ------------------
     # Make the interface
     # ------------------
     def get_intro_text():
-        ## Get start and end months from database
-        # start, end = get_start_end_months(get_sources(compute_location.value))
         intro = f"""<!-- # 🤖 R-help-chat -->
             ## 🇷🤝💬 R-help-chat
-            **Chat with the [R-help mailing list archives]((https://stat.ethz.ch/pipermail/r-help/)).** Get AI-powered answers about R programming backed by email retrieval.
-            An LLM turns your question into a search query, including year ranges.
             You can ask follow-up questions with the chat history as context.
-            ➡️ To clear the chat history and start a new chat, press the 🗑️ trash button.<br>
             **_Answers may be incorrect._**<br>
             """
         return intro
-    def get_info_text(compute_location):
-        info_prefix = """
-            **Features:** conversational RAG, today's date, email database (*start* to *end*), hybrid search (dense+sparse),
-            query analysis, multiple tool calls (cloud model), answer with citations.
-            **Tech:** LangChain + Hugging Face + Gradio; ChromaDB and BM25S-based retrievers.<br>
-            """
         if compute_location.startswith("cloud"):
-            info_text = f"""{info_prefix}
             📍 This is the **cloud** version, using the OpenAI API<br>
-            ✨ gpt-4o-mini<br>
-            ⚠️ **_Privacy Notice_**: Data sharing with OpenAI is enabled, and all interactions are logged<br>
             🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
             """
         if compute_location.startswith("edge"):
-            info_text = f"""{info_prefix}
-            📍 This is the **edge** version, using [ZeroGPU](https://huggingface.co/docs/hub/spaces-zerogpu) hardware<br>
-            ✨ Nomic embeddings and Gemma-3 LLM<br>
-            ⚠️ **_Privacy Notice_**: All interactions are logged<br>
             🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
             """
         return info_text
-    with gr.Row(elem_classes=["row-container"]):
         with gr.Column(scale=2):
             with gr.Row(elem_classes=["row-container"]):
                 with gr.Column(scale=2):
                     intro = gr.Markdown(get_intro_text())
                 with gr.Column(scale=1):
                     compute_location.render()
-            input.render()
             downloading.render()
             extracting.render()
-        with gr.Column(scale=1):
-            # Add information about the system
-            with gr.Accordion("ℹ️ About This App", open=True):
-                ## Get number of emails (unique doc ids) in vector database
-                # collection = get_collection(compute_location.value)
-                # n_emails = len(set([m["doc_id"] for m in collection["metadatas"]]))
-                # gr.Markdown(
-                #    f"""
-                #    - **Database**: *n_emails* emails from the [R-help mailing list archives](https://stat.ethz.ch/pipermail/r-help/)
-                #    - **System**: retrieval and citation tools; system prompt has today's date
-                #    - **Retrieval**: hybrid of dense (vector embeddings) and sparse ([BM25S](https://github.com/xhluca/bm25s))
-                #    """
-                # )
-                info = gr.Markdown(get_info_text(compute_location.value))
-            show_examples.render()
-    with gr.Row():
-        with gr.Column(scale=2):
-            chatbot.render()
-        with gr.Column(scale=1, visible=False) as examples:
-            # Add some helpful examples
-            example_questions = [
-                # "What is today's date?",
-                "Summarize emails from the last two months",
-                "What plotmath examples have been discussed?",
-                "When was has.HLC mentioned?",
-                "Who discussed profiling in 2023?",
-                "Any messages about installation problems in 2023-2024?",
-            ]
-            gr.Examples(
-                examples=[[q] for q in example_questions],
-                inputs=[input],
-                label="Click an example to fill the question box",
-                elem_id="example-questions",
-            )
-            multi_tool_questions = [
-                "Speed differences between lapply and for loops",
-                "Compare usage of pipe operator between 2022 and 2024",
-            ]
-            gr.Examples(
-                examples=[[q] for q in multi_tool_questions],
-                inputs=[input],
-                label="Example prompts for multiple retrievals",
-                elem_id="example-questions",
-            )
-            multi_turn_questions = [
-                "Lookup emails that reference bugs.r-project.org in 2025",
-                "Did those authors report bugs before 2025?",
-            ]
-            gr.Examples(
-                examples=[[q] for q in multi_turn_questions],
-                inputs=[input],
-                label="Multi-turn example for asking follow-up questions",
-                elem_id="example-questions",
-            )
-    with gr.Row():
-        with gr.Column(scale=2):
             emails_textbox = gr.Textbox(
                 label="Retrieved Emails",
                 lines=10,
                 visible=False,
                 info="Tip: Look for 'Tool Call' and 'Next Email' separators. Quoted lines (starting with '>') are removed before indexing.",
             )
-        with gr.Column():
             citations_textbox = gr.Textbox(label="Citations", lines=2, visible=False)
-    # ------------
-    # Set up state
-    # ------------
-    def generate_thread_id():
-        """Generate a new thread ID"""
-        thread_id = uuid.uuid4()
-        print(f"Generated thread_id: {thread_id}")
-        return thread_id
-    # Define thread_id variable
-    thread_id = gr.State(generate_thread_id())
-    # Define states for the output textboxes
-    retrieved_emails = gr.State([])
-    citations_text = gr.State([])
     # -------------
     # App functions
     # -------------
@@ -458,16 +454,26 @@ with gr.Blocks(
     # https://github.com/gradio-app/gradio/issues/9722
     chatbot.clear(generate_thread_id, outputs=[thread_id], api_name=False)
     compute_location.change(
         # Update global COMPUTE variable
         set_compute,
         [compute_location],
         api_name=False,
     ).then(
-        # Change the info text
-        get_info_text,
         [compute_location],
-        [info],
         api_name=False,
     ).then(
         # Change the chatbot avatar
@@ -475,21 +481,10 @@ with gr.Blocks(
         [compute_location],
         [chatbot],
         api_name=False,
-    )
-    show_examples.change(
-        # Show examples
-        change_visibility,
-        [show_examples],
-        [examples],
-        api_name=False,
-    )
-    input.submit(
-        # Submit input to the chatbot
-        to_workflow,
-        [chatbot, input, thread_id],
-        [chatbot, retrieved_emails, citations_text],
         api_name=False,
     )
@@ -558,13 +553,20 @@ with gr.Blocks(
     need_data = gr.State()
     have_data = gr.State()
     # fmt: off
     demo.load(
         is_data_missing, None, [need_data], api_name=False
     ).then(
         is_data_present, None, [have_data], api_name=False
     ).then(
-        change_visibility, [have_data], [input], api_name=False
     ).then(
         change_visibility, [need_data], [downloading], api_name=False
     ).then(
@@ -578,7 +580,17 @@ with gr.Blocks(
     ).then(
         change_visibility, [false], [extracting], api_name=False
     ).then(
-        change_visibility, [true], [input], api_name=False
     )
     # fmt: on

 from graph import BuildGraph
 from retriever import db_dir
 from langgraph.checkpoint.memory import MemorySaver
+from main import openai_model, model_id
+from util import get_sources, get_start_end_months
 from git import Repo
 import zipfile
 import spaces
 COMPUTE = "cloud"
 search_type = "hybrid"
 # Check for GPU
 if COMPUTE == "edge":
     if not torch.cuda.is_available():
 graph_cloud = None
+def run_workflow(input, history, thread_id):
     """The main function to run the chat workflow"""
     # Get global graph for compute location
     print(f"Using thread_id: {thread_id}")
+    #    # Display the user input in the history
+    #    history.append(gr.ChatMessage(role="user", content=input))
+    #    # Return the history and empty lists for emails and citations texboxes
+    #    yield history, [], []
     # Asynchronously stream graph steps for a single input
     # https://langchain-ai.lang.chat/langgraph/reference/graphs/#langgraph.graph.state.CompiledStateGraph
                         content = f"{content} ({start_year or ''} - {end_year or ''})"
                     if "months" in args:
                         content = f"{content} {args['months']}"
+                    history.append(
                         gr.ChatMessage(
                             role="assistant",
                             content=content,
                         )
                     )
             if chunk_messages.content:
+                history.append(
                     gr.ChatMessage(role="assistant", content=chunk_messages.content)
                 )
+            yield history, [], []
         if node == "retrieve_emails":
             chunk_messages = chunk["messages"]
                 title = f"🛒 Retrieved {n_emails} emails"
                 if email_list[0] == "### No emails were retrieved":
                     title = "❌ Retrieved 0 emails"
+                history.append(
                     gr.ChatMessage(
                         role="assistant",
                         content=month_text,
                 )
             # Combine all the Tool Call results
             retrieved_emails = "\n\n".join(retrieved_emails)
+            yield history, retrieved_emails, []
         if node == "generate":
             chunk_messages = chunk["messages"]
             # Chat response without citations
             if chunk_messages.content:
+                history.append(
                     gr.ChatMessage(role="assistant", content=chunk_messages.content)
                 )
             # None is used for no change to the retrieved emails textbox
+            yield history, None, []
         if node == "answer_with_citations":
             chunk_messages = chunk["messages"][0]
                 answer = chunk_messages.content
                 citations = None
+            history.append(gr.ChatMessage(role="assistant", content=answer))
+            yield history, None, citations
 def to_workflow(*args):
         render=False,
     )
     downloading = gr.Textbox(
         lines=1,
         label="Downloading Data, Please Wait",
         visible=False,
         render=False,
     )
+    data_error = gr.Textbox(
+        value="App is unavailable. Please contact the maintainer.",
+        lines=1,
+        label="Error downloading or extracting data",
+        visible=False,
+        render=False,
+    )
     show_examples = gr.Checkbox(
         value=False,
         label="💡 Example Questions",
         render=False,
     )
+    # ------------
+    # Set up state
+    # ------------
+    def generate_thread_id():
+        """Generate a new thread ID"""
+        thread_id = uuid.uuid4()
+        print(f"Generated thread_id: {thread_id}")
+        return thread_id
+    # Define thread_id variable
+    thread_id = gr.State(generate_thread_id())
+    # Define states for the output textboxes
+    retrieved_emails = gr.State([])
+    citations_text = gr.State([])
     # ------------------
     # Make the interface
     # ------------------
     def get_intro_text():
         intro = f"""<!-- # 🤖 R-help-chat -->
+            <!-- Get AI-powered answers about R programming backed by email retrieval. -->
             ## 🇷🤝💬 R-help-chat
+            **Chat with the [R-help mailing list archives]((https://stat.ethz.ch/pipermail/r-help/)).**
+            An LLM turns your question into a search query, including year ranges, and generates an answer from the retrieved emails.
             You can ask follow-up questions with the chat history as context.
+            ➡️ To clear the history and start a new chat, press the 🗑️ trash button.<br>
             **_Answers may be incorrect._**<br>
             """
         return intro
+    def get_status_text(compute_location):
         if compute_location.startswith("cloud"):
+            status_text = f"""
             📍 This is the **cloud** version, using the OpenAI API<br>
+            ✨ text-embedding-3-small and {openai_model}<br>
+            ⚠️ **_Privacy Notice_**: Data sharing with OpenAI is enabled<br>
             🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
             """
         if compute_location.startswith("edge"):
+            status_text = f"""
+            📍 This is the **edge** version, using ZeroGPU hardware<br>
+            ✨ Embeddings: [Nomic](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5); LLM: [{model_id}](https://huggingface.co/{model_id})<br>
             🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
             """
+        return status_text
+    def get_info_text():
+        try:
+            # Get source files for each email and start and end months from database
+            sources = get_sources()
+            start, end = get_start_end_months(sources)
+        except:
+            # If database isn't ready, put in empty values
+            sources = []
+            start = None
+            end = None
+        info_text = f"""
+            **Database:** {len(sources)} emails from {start} to {end}.
+            **Features:** RAG, today's date, hybrid search (dense+sparse), query analysis,
+            multiple tool calls (cloud model), answer with citations, chat memory.
+            **Tech:** LangChain + Hugging Face + Gradio; ChromaDB and [BM25S](https://github.com/xhluca/bm25s)-based retrievers.<br>
+            """
         return info_text
+    with gr.Row():
+        # Left column: Intro, Compute, Chat, Emails
         with gr.Column(scale=2):
             with gr.Row(elem_classes=["row-container"]):
                 with gr.Column(scale=2):
                     intro = gr.Markdown(get_intro_text())
                 with gr.Column(scale=1):
                     compute_location.render()
+            chat_interface = gr.ChatInterface(
+                to_workflow,
+                chatbot=chatbot,
+                type="messages",
+                additional_inputs=[thread_id],
+                additional_outputs=[retrieved_emails, citations_text],
+                api_name=False,
+            )
             downloading.render()
             extracting.render()
+            data_error.render()
             emails_textbox = gr.Textbox(
                 label="Retrieved Emails",
                 lines=10,
                 visible=False,
                 info="Tip: Look for 'Tool Call' and 'Next Email' separators. Quoted lines (starting with '>') are removed before indexing.",
             )
+        # Right column: Info, Examples, Citations
+        with gr.Column(scale=1):
+            status = gr.Markdown(get_status_text(compute_location.value))
+            with gr.Accordion("ℹ️ More Info", open=False):
+                info = gr.Markdown(get_info_text())
+            with gr.Accordion("💡 Examples", open=True):
+                # Add some helpful examples
+                example_questions = [
+                    # "What is today's date?",
+                    "Summarize emails from the last two months",
+                    "What plotmath examples have been discussed?",
+                    "When was has.HLC mentioned?",
+                    "Who discussed profiling in 2023?",
+                    "Any messages about installation problems in 2023-2024?",
+                ]
+                gr.Examples(
+                    examples=[[q] for q in example_questions],
+                    inputs=[chat_interface.textbox],
+                    label="Click an example to fill the message box",
+                    elem_id="example-questions",
+                )
+                multi_tool_questions = [
+                    "Differences between lapply and for loops",
+                    "Compare usage of pipe operator between 2022 and 2024",
+                ]
+                gr.Examples(
+                    examples=[[q] for q in multi_tool_questions],
+                    inputs=[chat_interface.textbox],
+                    label="Prompts for multiple retrievals",
+                    elem_id="example-questions",
+                )
+                multi_turn_questions = [
+                    "Lookup emails that reference bugs.r-project.org in 2025",
+                    "Did those authors report bugs before 2025?",
+                ]
+                gr.Examples(
+                    examples=[[q] for q in multi_turn_questions],
+                    inputs=[chat_interface.textbox],
+                    label="Asking follow-up questions",
+                    elem_id="example-questions",
+                )
             citations_textbox = gr.Textbox(label="Citations", lines=2, visible=False)
     # -------------
     # App functions
     # -------------
     # https://github.com/gradio-app/gradio/issues/9722
     chatbot.clear(generate_thread_id, outputs=[thread_id], api_name=False)
+    def clear_component(component):
+        """Return cleared component"""
+        return component.clear()
     compute_location.change(
         # Update global COMPUTE variable
         set_compute,
         [compute_location],
         api_name=False,
     ).then(
+        # Change the app status text
+        get_status_text,
         [compute_location],
+        [status],
+        api_name=False,
+    ).then(
+        # Clear the chatbot history
+        clear_component,
+        [chatbot],
+        [chatbot],
         api_name=False,
     ).then(
         # Change the chatbot avatar
         [compute_location],
         [chatbot],
         api_name=False,
+    ).then(
+        # Start a new thread
+        generate_thread_id,
+        outputs=[thread_id],
         api_name=False,
     )
     need_data = gr.State()
     have_data = gr.State()
+    # When app is launched, check if data is present, download it if necessary,
+    # hide chat interface during downloading, show downloading and extracting
+    # steps as textboxes, show error textbox if needed, restore chat interface,
+    # and show database info
     # fmt: off
     demo.load(
         is_data_missing, None, [need_data], api_name=False
     ).then(
         is_data_present, None, [have_data], api_name=False
     ).then(
+        change_visibility, [have_data], [chatbot], api_name=False
+    ).then(
+        change_visibility, [have_data], [chat_interface.textbox], api_name=False
     ).then(
         change_visibility, [need_data], [downloading], api_name=False
     ).then(
     ).then(
         change_visibility, [false], [extracting], api_name=False
     ).then(
+        is_data_missing, None, [need_data], api_name=False
+    ).then(
+        is_data_present, None, [have_data], api_name=False
+    ).then(
+        change_visibility, [have_data], [chatbot], api_name=False
+    ).then(
+        change_visibility, [have_data], [chat_interface.textbox], api_name=False
+    ).then(
+        change_visibility, [need_data], [data_error], api_name=False
+    ).then(
+        get_info_text, None, [info], api_name=False
     )
     # fmt: on

graph.py CHANGED Viewed

@@ -4,37 +4,85 @@ from langchain_core.tools import tool
 from langgraph.prebuilt import ToolNode, tools_condition
 from langchain_huggingface import ChatHuggingFace
 from typing import Optional
 import datetime
 import os
 # Local modules
 from retriever import BuildRetriever
-from prompts import retrieve_prompt, answer_prompt, smollm3_tools_template
 from mods.tool_calling_llm import ToolCallingLLM
 # Local modules
 from retriever import BuildRetriever
 ## For LANGCHAIN_API_KEY
-# from dotenv import load_dotenv
-#
 # load_dotenv(dotenv_path=".env", override=True)
 # os.environ["LANGSMITH_TRACING"] = "true"
 # os.environ["LANGSMITH_PROJECT"] = "R-help-chat"
-def ToolifySmolLM3(chat_model, system_message, system_message_suffix="", think=False):
     """
-    Get a SmolLM3 model ready for bind_tools().
     """
-    # Add /no_think flag to turn off thinking mode
-    if not think:
-        system_message = "/no_think\n" + system_message
-    # NOTE: The first two nonblank lines are taken from the chat template for HuggingFaceTB/SmolLM3-3B
-    # The rest are taken from the default system template for ToolCallingLLM
-    tool_system_prompt_template = system_message + smollm3_tools_template
     class HuggingFaceWithTools(ToolCallingLLM, ChatHuggingFace):
@@ -45,6 +93,7 @@ def ToolifySmolLM3(chat_model, system_message, system_message_suffix="", think=F
     chat_model = HuggingFaceWithTools(
         llm=chat_model.llm,
         tool_system_prompt_template=tool_system_prompt_template,
         system_message_suffix=system_message_suffix,
     )
@@ -154,12 +203,12 @@ def BuildGraph(
     is_edge = hasattr(chat_model, "model_id")
     if is_edge:
         # For edge model (ChatHuggingFace)
-        query_model = ToolifySmolLM3(
             chat_model, retrieve_prompt(compute_location), "", think_retrieve
         ).bind_tools([retrieve_emails])
-        generate_model = ToolifySmolLM3(chat_model, answer_prompt(), "", think_generate)
-        # For testing with Gemma, don't bind tool for now
-        # ).bind_tools([answer_with_citations])
     else:
         # For cloud model (OpenAI API)
         query_model = chat_model.bind_tools([retrieve_emails])
@@ -173,12 +222,9 @@ def BuildGraph(
         if is_edge:
             # Don't include the system message here because it's defined in ToolCallingLLM
             messages = state["messages"]
-            # Convert ToolMessage (from previous turns) to AIMessage
-            # (avoids SmolLM3 ValueError: Unknown message type: <class 'langchain_core.messages.tool.ToolMessage'>)
-            messages = [
-                AIMessage(msg.content) if type(msg) is ToolMessage else msg
-                for msg in messages
-            ]
         else:
             messages = [SystemMessage(retrieve_prompt(compute_location))] + state[
                 "messages"
@@ -191,25 +237,9 @@ def BuildGraph(
         """Generates an answer with the chat model"""
         if is_edge:
             messages = state["messages"]
-            # Copy the most recent HumanMessage to the end
-            # (avoids SmolLM3 ValueError: Last message must be a HumanMessage!)
-            for msg in reversed(messages):
-                if type(msg) is HumanMessage:
-                    messages.append(msg)
-            # Convert tool output (ToolMessage) to AIMessage
-            # (avoids SmolLM3 ValueError: Unknown message type: <class 'langchain_core.messages.tool.ToolMessage'>)
-            messages = [
-                AIMessage(msg.content) if type(msg) is ToolMessage else msg
-                for msg in messages
-            ]
-            # Delete tool call (AIMessage)
-            # (avoids Gemma TemplateError: Conversation roles must alternate user/assistant/user/assistant/...)
-            messages = [
-                msg
-                for msg in messages
-                if not hasattr(msg, "tool_calls")
-                or (hasattr(msg, "tool_calls") and not msg.tool_calls)
-            ]
         else:
             messages = [SystemMessage(answer_prompt())] + state["messages"]
         response = generate_model.invoke(messages)

 from langgraph.prebuilt import ToolNode, tools_condition
 from langchain_huggingface import ChatHuggingFace
 from typing import Optional
+from dotenv import load_dotenv
 import datetime
 import os
 # Local modules
 from retriever import BuildRetriever
+from prompts import retrieve_prompt, answer_prompt, gemma_tools_template
 from mods.tool_calling_llm import ToolCallingLLM
 # Local modules
 from retriever import BuildRetriever
 ## For LANGCHAIN_API_KEY
 # load_dotenv(dotenv_path=".env", override=True)
 # os.environ["LANGSMITH_TRACING"] = "true"
 # os.environ["LANGSMITH_PROJECT"] = "R-help-chat"
+def print_messages_summary(messages, header):
+    """Print message types and summaries for debugging"""
+    if header:
+        print(header)
+    for message in messages:
+        summary_text = ""
+        if type(message) == SystemMessage:
+            type_txt = "SystemMessage"
+            summary_txt = f"length = {len(message.content)}"
+        if type(message) == HumanMessage:
+            type_txt = "HumanMessage"
+            summary_txt = message.content
+        if type(message) == AIMessage:
+            type_txt = "AIMessage"
+            summary_txt = f"length = {len(message.content)}"
+        if type(message) == ToolMessage:
+            type_txt = "ToolMessage"
+            summary_txt = f"length = {len(message.content)}"
+        if hasattr(message, "tool_calls"):
+            if len(message.tool_calls) != 1:
+                summary_txt = f"{summary_txt} with {len(message.tool_calls)} tool calls"
+            else:
+                summary_txt = f"{summary_txt} with 1 tool call"
+        print(f"{type_txt}: {summary_txt}")
+def normalize_messages(messages):
+    """Normalize messages to sequence of types expected by chat templates"""
+    # Copy the most recent HumanMessage to the end
+    # (avoids SmolLM3 ValueError: Last message must be a HumanMessage!)
+    if not type(messages[-1]) is HumanMessage:
+        for msg in reversed(messages):
+            if type(msg) is HumanMessage:
+                messages.append(msg)
+    # Convert tool output (ToolMessage) to AIMessage
+    # (avoids SmolLM3 ValueError: Unknown message type: <class 'langchain_core.messages.tool.ToolMessage'>)
+    messages = [
+        AIMessage(msg.content) if type(msg) is ToolMessage else msg for msg in messages
+    ]
+    # Delete tool call (AIMessage)
+    # (avoids Gemma TemplateError: Conversation roles must alternate user/assistant/user/assistant/...)
+    messages = [
+        msg
+        for msg in messages
+        if not hasattr(msg, "tool_calls")
+        or (hasattr(msg, "tool_calls") and not msg.tool_calls)
+    ]
+    return messages
+def ToolifyHF(chat_model, system_message, system_message_suffix="", think=False):
     """
+    Get a Hugging Face model ready for bind_tools().
     """
+    ## Add /no_think flag to turn off thinking mode (SmolLM3)
+    # if not think:
+    #    system_message = "/no_think\n" + system_message
+    # Combine system prompt and tools template
+    tool_system_prompt_template = system_message + gemma_tools_template
     class HuggingFaceWithTools(ToolCallingLLM, ChatHuggingFace):
     chat_model = HuggingFaceWithTools(
         llm=chat_model.llm,
         tool_system_prompt_template=tool_system_prompt_template,
+        # Suffix is for any additional context (not templated)
         system_message_suffix=system_message_suffix,
     )
     is_edge = hasattr(chat_model, "model_id")
     if is_edge:
         # For edge model (ChatHuggingFace)
+        query_model = ToolifyHF(
             chat_model, retrieve_prompt(compute_location), "", think_retrieve
         ).bind_tools([retrieve_emails])
+        generate_model = ToolifyHF(
+            chat_model, answer_prompt(), "", think_generate
+        ).bind_tools([answer_with_citations])
     else:
         # For cloud model (OpenAI API)
         query_model = chat_model.bind_tools([retrieve_emails])
         if is_edge:
             # Don't include the system message here because it's defined in ToolCallingLLM
             messages = state["messages"]
+            print_messages_summary(messages, "--- query: before normalization ---")
+            messages = normalize_messages(messages)
+            print_messages_summary(messages, "--- query: after normalization ---")
         else:
             messages = [SystemMessage(retrieve_prompt(compute_location))] + state[
                 "messages"
         """Generates an answer with the chat model"""
         if is_edge:
             messages = state["messages"]
+            print_messages_summary(messages, "--- generate: before normalization ---")
+            messages = normalize_messages(messages)
+            print_messages_summary(messages, "--- generate: after normalization ---")
         else:
             messages = [SystemMessage(answer_prompt())] + state["messages"]
         response = generate_model.invoke(messages)

main.py CHANGED Viewed

@@ -24,9 +24,20 @@ from retriever import BuildRetriever, db_dir
 from graph import BuildGraph
 from prompts import answer_prompt
 # R-help-chat
 # First version by Jeffrey Dick on 2025-06-29
 # Suppress these messages:
 # INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
 # INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
@@ -122,7 +133,7 @@ def GetChatModel(compute_location):
     if compute_location == "cloud":
-        chat_model = ChatOpenAI(model="gpt-4o-mini", temperature=0)
     if compute_location == "edge":
@@ -130,12 +141,6 @@ def GetChatModel(compute_location):
         if compute_location == "edge" and not torch.cuda.is_available():
             raise Exception("Edge chat model selected without GPU")
-        # Get the model ID (we can define the variable in HF Spaces settings)
-        model_id = os.getenv("MODEL_ID")
-        if model_id is None:
-            # model_id = "HuggingFaceTB/SmolLM3-3B"
-            model_id = "google/gemma-3-1b-it"
         # Define the pipeline to pass to the HuggingFacePipeline class
         # https://huggingface.co/blog/langchain
         tokenizer = AutoTokenizer.from_pretrained(model_id)

 from graph import BuildGraph
 from prompts import answer_prompt
+# -----------
 # R-help-chat
+# -----------
 # First version by Jeffrey Dick on 2025-06-29
+# Define the cloud (OpenAI) model
+openai_model = "gpt-4o-mini"
+# Get the edge model ID (we can define the variable in HF Spaces settings)
+model_id = os.getenv("MODEL_ID")
+if model_id is None:
+    # model_id = "HuggingFaceTB/SmolLM3-3B"
+    model_id = "google/gemma-3-1b-it"
 # Suppress these messages:
 # INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
 # INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
     if compute_location == "cloud":
+        chat_model = ChatOpenAI(model=openai_model, temperature=0)
     if compute_location == "edge":
         if compute_location == "edge" and not torch.cuda.is_available():
             raise Exception("Edge chat model selected without GPU")
         # Define the pipeline to pass to the HuggingFacePipeline class
         # https://huggingface.co/blog/langchain
         tokenizer = AutoTokenizer.from_pretrained(model_id)

prompts.py CHANGED Viewed

@@ -11,7 +11,7 @@ def retrieve_prompt(compute_location):
     """
     # Get start and end months from database
-    start, end = get_start_end_months(get_sources(compute_location))
     retrieve_prompt = (
         f"The current date is {date.today()}. "
@@ -58,23 +58,41 @@ def answer_prompt():
 # Prompt template for SmolLM3 with tools
-# The first two lines are from the apply_chat_template for HuggingFaceTB/SmolLM3-3B
-# The remainding lines (starting with You have access...) from tool_calling_llm.py
 smollm3_tools_template = """
-    ### Tools
-    You may call one or more functions to assist with the user query.
-    You have access to the following tools:
-    {tools}
-    You must always select one of the above tools and respond with only a JSON object matching the following schema:
-    {{
-      "tool": <name of the selected tool>,
-      "tool_input": <parameters for the selected tool, matching the tool's JSON schema>
-    }}
-    """

     """
     # Get start and end months from database
+    start, end = get_start_end_months(get_sources())
     retrieve_prompt = (
         f"The current date is {date.today()}. "
 # Prompt template for SmolLM3 with tools
+# The first two lines, <function-name>, and <args-json-object> are from the apply_chat_template for HuggingFaceTB/SmolLM3-3B
+# The other lines (You have, {tools}, You must), "tool", and "tool_input" are from tool_calling_llm.py
 smollm3_tools_template = """
+### Tools
+You may call one or more functions to assist with the user query.
+You have access to the following tools:
+{tools}
+You must always select one of the above tools and respond with only a JSON object matching the following schema:
+{{
+    "tool": <function-name>,
+    "tool_input": <args-json-object>
+}}
+"""
+# Prompt template for Gemma-3 with tools
+# Based on https://ai.google.dev/gemma/docs/capabilities/function-calling
+gemma_tools_template = """
+### Functions
+You have access to functions. If you decide to invoke any of the function(s), you MUST put it in the format of
+{{
+    "tool": <function-name>,
+    "tool_input": <args-json-object>
+}}
+You SHOULD NOT include any other text in the response if you call a function
+{tools}
+"""

util.py CHANGED Viewed

@@ -1,22 +1,37 @@
-import re
 from calendar import month_name
-from retriever import BuildRetriever
 def get_collection(compute_location):
     """
     Returns the vectorstore collection.
     """
     retriever = BuildRetriever(compute_location, "dense")
     return retriever.vectorstore.get()
-def get_sources(compute_location):
     """
     Return the source files indexed in the database, e.g. 'R-help/2024-April.txt'.
     """
-    collection = get_collection(compute_location)
-    sources = [m["source"] for m in collection["metadatas"]]
     return sources

 from calendar import month_name
+from retriever import BuildRetriever, db_dir
+import json
+import os
+import re
 def get_collection(compute_location):
     """
     Returns the vectorstore collection.
+    Usage Examples:
+        # Number of child documents
+        collection = get_collection("cloud")
+        len(collection["ids"])
+        # Number of parent documents (unique doc_ids)
+        len(set([m["doc_id"] for m in collection["metadatas"]]))
     """
     retriever = BuildRetriever(compute_location, "dense")
     return retriever.vectorstore.get()
+def get_sources():
     """
     Return the source files indexed in the database, e.g. 'R-help/2024-April.txt'.
     """
+    # Path to your JSON Lines file
+    file_path = os.path.join(db_dir, "bm25", "corpus.jsonl")
+    # Reading the JSON Lines file
+    with open(file_path, "r", encoding="utf-8") as file:
+        # Parse each line as a JSON object
+        sources = [json.loads(line.strip())["metadata"]["source"] for line in file]
     return sources