Spaces:

jedick
/

R-help-chat

Running on Zero

App Files Files Community

jedick commited on Aug 1

Commit

555a40e

1 Parent(s): 1b28bd4

Change chat model to google/gemma-3-12b-it

Browse files

Files changed (4) hide show

app.py +2 -16
graph.py +5 -4
main.py +2 -2
prompts.py +2 -2

app.py CHANGED Viewed

@@ -416,8 +416,6 @@ with gr.Blocks(
             status_text = f"""
             📍 Now in **local** mode, using ZeroGPU hardware<br>
             ⌛ Response time is about one minute<br>
-            🧠 Add **/think** to enable thinking</br>
-            &emsp;&nbsp; 🐢 Increases ZeroGPU allotment to 100 seconds</br>
             ✨ [{embedding_model_id.split("/")[-1]}](https://huggingface.co/{embedding_model_id}) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
             🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
             """
@@ -461,7 +459,7 @@ with gr.Blocks(
     def get_multi_tool_questions(compute_mode, as_dataset=True):
         """Get multi-tool example questions based on compute mode"""
         questions = [
-            "Differences between lapply and for loops /think",
             "Discuss pipe operator usage in 2022, 2023, and 2024",
         ]
@@ -474,7 +472,7 @@ with gr.Blocks(
         """Get multi-turn example questions based on compute mode"""
         questions = [
             "Lookup emails that reference bugs.r-project.org in 2025",
-            "Did those authors report bugs before 2025? /think",
         ]
         if compute_mode == "remote":
@@ -610,18 +608,6 @@ with gr.Blocks(
         [compute_mode],
         [status],
         api_name=False,
-    ).then(
-        # Update multi-tool examples based on compute mode
-        get_multi_tool_questions,
-        [compute_mode],
-        [multi_tool_questions.dataset],
-        api_name=False,
-    ).then(
-        # Update multi-turn examples based on compute mode
-        get_multi_turn_questions,
-        [compute_mode],
-        [multi_turn_questions.dataset],
-        api_name=False,
     )
     input.submit(

             status_text = f"""
             📍 Now in **local** mode, using ZeroGPU hardware<br>
             ⌛ Response time is about one minute<br>
             ✨ [{embedding_model_id.split("/")[-1]}](https://huggingface.co/{embedding_model_id}) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
             🏠 See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
             """
     def get_multi_tool_questions(compute_mode, as_dataset=True):
         """Get multi-tool example questions based on compute mode"""
         questions = [
+            "Differences between lapply and for loops",
             "Discuss pipe operator usage in 2022, 2023, and 2024",
         ]
         """Get multi-turn example questions based on compute mode"""
         questions = [
             "Lookup emails that reference bugs.r-project.org in 2025",
+            "Did those authors report bugs before 2025?",
         ]
         if compute_mode == "remote":
         [compute_mode],
         [status],
         api_name=False,
     )
     input.submit(

graph.py CHANGED Viewed

@@ -65,10 +65,11 @@ def normalize_messages(messages):
             tool_messages = []
             count = 1
             while i < len(messages) and type(messages[i]) is ToolMessage:
-                #tool_msg = messages[i]
-                #formatted_content = f"## Tool Call {count}\n\n{tool_msg.content}"
-                #tool_messages.append(formatted_content)
-                tool_messages.append(messages[i].content)
                 count += 1
                 i += 1

             tool_messages = []
             count = 1
             while i < len(messages) and type(messages[i]) is ToolMessage:
+                tool_msg = messages[i].content.replace(
+                    "### Retrieved Emails:",
+                    f"### Retrieved Emails from Tool Call {count}:",
+                )
+                tool_messages.append(tool_msg)
                 count += 1
                 i += 1

main.py CHANGED Viewed

@@ -40,8 +40,8 @@ openai_model = "gpt-4o-mini"
 model_id = os.getenv("MODEL_ID")
 if model_id is None:
     # model_id = "HuggingFaceTB/SmolLM3-3B"
-    # model_id = "google/gemma-3-12b-it"
-    model_id = "Qwen/Qwen3-14B"
 # Suppress these messages:
 # INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"

 model_id = os.getenv("MODEL_ID")
 if model_id is None:
     # model_id = "HuggingFaceTB/SmolLM3-3B"
+    model_id = "google/gemma-3-12b-it"
+    # model_id = "Qwen/Qwen3-14B"
 # Suppress these messages:
 # INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"

prompts.py CHANGED Viewed

@@ -36,8 +36,8 @@ def query_prompt(chat_model, think=False):
         "For general summaries, use retrieve_emails(search_query='R'). "
         "For questions about years, use retrieve_emails(search_query=<query>, start_year=, end_year=) (this month is this year). "
         "For questions about months, use 3-letter abbreviations (Jan...Dec) for the 'month' argument. "
-        "If retrieved emails are available, use them as context to formulate your search query. "  # Gemma
-        "You should always retrieve more emails based on previous context and the most recent question. "  # Qwen
         # "Even if retrieved emails are available, you should retrieve more emails to answer the most recent question. "  # Qwen
         # "You must perform the search yourself. Do not tell the user how to retrieve emails. "  # Qwen
         # "Do not use your memory or knowledge to answer the user's question. Only retrieve emails based on the user's question. "  # Qwen

         "For general summaries, use retrieve_emails(search_query='R'). "
         "For questions about years, use retrieve_emails(search_query=<query>, start_year=, end_year=) (this month is this year). "
         "For questions about months, use 3-letter abbreviations (Jan...Dec) for the 'month' argument. "
+        "Use all previous messages as context to formulate your search query. "  # Gemma
+        "You should always retrieve more emails based on context and the most recent question. "  # Qwen
         # "Even if retrieved emails are available, you should retrieve more emails to answer the most recent question. "  # Qwen
         # "You must perform the search yourself. Do not tell the user how to retrieve emails. "  # Qwen
         # "Do not use your memory or knowledge to answer the user's question. Only retrieve emails based on the user's question. "  # Qwen