Spaces:
Running
on
Zero
Running
on
Zero
jedick
commited on
Commit
Β·
555a40e
1
Parent(s):
1b28bd4
Change chat model to google/gemma-3-12b-it
Browse files
app.py
CHANGED
|
@@ -416,8 +416,6 @@ with gr.Blocks(
|
|
| 416 |
status_text = f"""
|
| 417 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
| 418 |
β Response time is about one minute<br>
|
| 419 |
-
π§ Add **/think** to enable thinking</br>
|
| 420 |
-
  π’ Increases ZeroGPU allotment to 100 seconds</br>
|
| 421 |
β¨ [{embedding_model_id.split("/")[-1]}](https://huggingface.co/{embedding_model_id}) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
| 422 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 423 |
"""
|
|
@@ -461,7 +459,7 @@ with gr.Blocks(
|
|
| 461 |
def get_multi_tool_questions(compute_mode, as_dataset=True):
|
| 462 |
"""Get multi-tool example questions based on compute mode"""
|
| 463 |
questions = [
|
| 464 |
-
"Differences between lapply and for loops
|
| 465 |
"Discuss pipe operator usage in 2022, 2023, and 2024",
|
| 466 |
]
|
| 467 |
|
|
@@ -474,7 +472,7 @@ with gr.Blocks(
|
|
| 474 |
"""Get multi-turn example questions based on compute mode"""
|
| 475 |
questions = [
|
| 476 |
"Lookup emails that reference bugs.r-project.org in 2025",
|
| 477 |
-
"Did those authors report bugs before 2025?
|
| 478 |
]
|
| 479 |
|
| 480 |
if compute_mode == "remote":
|
|
@@ -610,18 +608,6 @@ with gr.Blocks(
|
|
| 610 |
[compute_mode],
|
| 611 |
[status],
|
| 612 |
api_name=False,
|
| 613 |
-
).then(
|
| 614 |
-
# Update multi-tool examples based on compute mode
|
| 615 |
-
get_multi_tool_questions,
|
| 616 |
-
[compute_mode],
|
| 617 |
-
[multi_tool_questions.dataset],
|
| 618 |
-
api_name=False,
|
| 619 |
-
).then(
|
| 620 |
-
# Update multi-turn examples based on compute mode
|
| 621 |
-
get_multi_turn_questions,
|
| 622 |
-
[compute_mode],
|
| 623 |
-
[multi_turn_questions.dataset],
|
| 624 |
-
api_name=False,
|
| 625 |
)
|
| 626 |
|
| 627 |
input.submit(
|
|
|
|
| 416 |
status_text = f"""
|
| 417 |
π Now in **local** mode, using ZeroGPU hardware<br>
|
| 418 |
β Response time is about one minute<br>
|
|
|
|
|
|
|
| 419 |
β¨ [{embedding_model_id.split("/")[-1]}](https://huggingface.co/{embedding_model_id}) and [{model_id.split("/")[-1]}](https://huggingface.co/{model_id})<br>
|
| 420 |
π See the project's [GitHub repository](https://github.com/jedick/R-help-chat)
|
| 421 |
"""
|
|
|
|
| 459 |
def get_multi_tool_questions(compute_mode, as_dataset=True):
|
| 460 |
"""Get multi-tool example questions based on compute mode"""
|
| 461 |
questions = [
|
| 462 |
+
"Differences between lapply and for loops",
|
| 463 |
"Discuss pipe operator usage in 2022, 2023, and 2024",
|
| 464 |
]
|
| 465 |
|
|
|
|
| 472 |
"""Get multi-turn example questions based on compute mode"""
|
| 473 |
questions = [
|
| 474 |
"Lookup emails that reference bugs.r-project.org in 2025",
|
| 475 |
+
"Did those authors report bugs before 2025?",
|
| 476 |
]
|
| 477 |
|
| 478 |
if compute_mode == "remote":
|
|
|
|
| 608 |
[compute_mode],
|
| 609 |
[status],
|
| 610 |
api_name=False,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 611 |
)
|
| 612 |
|
| 613 |
input.submit(
|
graph.py
CHANGED
|
@@ -65,10 +65,11 @@ def normalize_messages(messages):
|
|
| 65 |
tool_messages = []
|
| 66 |
count = 1
|
| 67 |
while i < len(messages) and type(messages[i]) is ToolMessage:
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
|
|
|
| 72 |
count += 1
|
| 73 |
i += 1
|
| 74 |
|
|
|
|
| 65 |
tool_messages = []
|
| 66 |
count = 1
|
| 67 |
while i < len(messages) and type(messages[i]) is ToolMessage:
|
| 68 |
+
tool_msg = messages[i].content.replace(
|
| 69 |
+
"### Retrieved Emails:",
|
| 70 |
+
f"### Retrieved Emails from Tool Call {count}:",
|
| 71 |
+
)
|
| 72 |
+
tool_messages.append(tool_msg)
|
| 73 |
count += 1
|
| 74 |
i += 1
|
| 75 |
|
main.py
CHANGED
|
@@ -40,8 +40,8 @@ openai_model = "gpt-4o-mini"
|
|
| 40 |
model_id = os.getenv("MODEL_ID")
|
| 41 |
if model_id is None:
|
| 42 |
# model_id = "HuggingFaceTB/SmolLM3-3B"
|
| 43 |
-
|
| 44 |
-
model_id = "Qwen/Qwen3-14B"
|
| 45 |
|
| 46 |
# Suppress these messages:
|
| 47 |
# INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
|
|
|
|
| 40 |
model_id = os.getenv("MODEL_ID")
|
| 41 |
if model_id is None:
|
| 42 |
# model_id = "HuggingFaceTB/SmolLM3-3B"
|
| 43 |
+
model_id = "google/gemma-3-12b-it"
|
| 44 |
+
# model_id = "Qwen/Qwen3-14B"
|
| 45 |
|
| 46 |
# Suppress these messages:
|
| 47 |
# INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
|
prompts.py
CHANGED
|
@@ -36,8 +36,8 @@ def query_prompt(chat_model, think=False):
|
|
| 36 |
"For general summaries, use retrieve_emails(search_query='R'). "
|
| 37 |
"For questions about years, use retrieve_emails(search_query=<query>, start_year=, end_year=) (this month is this year). "
|
| 38 |
"For questions about months, use 3-letter abbreviations (Jan...Dec) for the 'month' argument. "
|
| 39 |
-
"
|
| 40 |
-
"You should always retrieve more emails based on
|
| 41 |
# "Even if retrieved emails are available, you should retrieve more emails to answer the most recent question. " # Qwen
|
| 42 |
# "You must perform the search yourself. Do not tell the user how to retrieve emails. " # Qwen
|
| 43 |
# "Do not use your memory or knowledge to answer the user's question. Only retrieve emails based on the user's question. " # Qwen
|
|
|
|
| 36 |
"For general summaries, use retrieve_emails(search_query='R'). "
|
| 37 |
"For questions about years, use retrieve_emails(search_query=<query>, start_year=, end_year=) (this month is this year). "
|
| 38 |
"For questions about months, use 3-letter abbreviations (Jan...Dec) for the 'month' argument. "
|
| 39 |
+
"Use all previous messages as context to formulate your search query. " # Gemma
|
| 40 |
+
"You should always retrieve more emails based on context and the most recent question. " # Qwen
|
| 41 |
# "Even if retrieved emails are available, you should retrieve more emails to answer the most recent question. " # Qwen
|
| 42 |
# "You must perform the search yourself. Do not tell the user how to retrieve emails. " # Qwen
|
| 43 |
# "Do not use your memory or knowledge to answer the user's question. Only retrieve emails based on the user's question. " # Qwen
|