Spaces:
Sleeping
Sleeping
jedick
commited on
Commit
·
e4391fe
1
Parent(s):
3472410
Cleanup data directories
Browse files- app.py +23 -3
- prompts.py +23 -23
app.py
CHANGED
|
@@ -8,6 +8,7 @@ from main import openai_model, model_id
|
|
| 8 |
from util import get_sources, get_start_end_months
|
| 9 |
from git import Repo
|
| 10 |
import zipfile
|
|
|
|
| 11 |
import spaces
|
| 12 |
import torch
|
| 13 |
import uuid
|
|
@@ -263,7 +264,7 @@ with gr.Blocks(
|
|
| 263 |
render=False,
|
| 264 |
)
|
| 265 |
data_error = gr.Textbox(
|
| 266 |
-
value="App is unavailable.
|
| 267 |
lines=1,
|
| 268 |
label="Error downloading or extracting data",
|
| 269 |
visible=False,
|
|
@@ -561,6 +562,19 @@ with gr.Blocks(
|
|
| 561 |
# Data loading
|
| 562 |
# ------------
|
| 563 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 564 |
def download():
|
| 565 |
"""Download the db.zip file"""
|
| 566 |
|
|
@@ -586,8 +600,14 @@ with gr.Blocks(
|
|
| 586 |
|
| 587 |
zip_file_path = "./R-help-db/db.zip"
|
| 588 |
extract_to_path = "./"
|
| 589 |
-
|
| 590 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 591 |
|
| 592 |
return None
|
| 593 |
|
|
|
|
| 8 |
from util import get_sources, get_start_end_months
|
| 9 |
from git import Repo
|
| 10 |
import zipfile
|
| 11 |
+
import shutil
|
| 12 |
import spaces
|
| 13 |
import torch
|
| 14 |
import uuid
|
|
|
|
| 264 |
render=False,
|
| 265 |
)
|
| 266 |
data_error = gr.Textbox(
|
| 267 |
+
value="App is unavailable because data could not be loaded. Try reloading the page, then contact the maintainer if the problem persists.",
|
| 268 |
lines=1,
|
| 269 |
label="Error downloading or extracting data",
|
| 270 |
visible=False,
|
|
|
|
| 562 |
# Data loading
|
| 563 |
# ------------
|
| 564 |
|
| 565 |
+
def rm_directory(directory_path):
|
| 566 |
+
"""Forcefully and recursively delete a directory, like rm -rf"""
|
| 567 |
+
|
| 568 |
+
try:
|
| 569 |
+
shutil.rmtree(directory_path)
|
| 570 |
+
print(f"Successfully deleted: {directory_path}")
|
| 571 |
+
except FileNotFoundError:
|
| 572 |
+
print(f"Directory not found: {directory_path}")
|
| 573 |
+
except PermissionError:
|
| 574 |
+
print(f"Permission denied: {directory_path}")
|
| 575 |
+
except Exception as e:
|
| 576 |
+
print(f"An error occurred: {e}")
|
| 577 |
+
|
| 578 |
def download():
|
| 579 |
"""Download the db.zip file"""
|
| 580 |
|
|
|
|
| 600 |
|
| 601 |
zip_file_path = "./R-help-db/db.zip"
|
| 602 |
extract_to_path = "./"
|
| 603 |
+
try:
|
| 604 |
+
with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
|
| 605 |
+
zip_ref.extractall(extract_to_path)
|
| 606 |
+
except:
|
| 607 |
+
# If there were any errors, clean up directories to
|
| 608 |
+
# initiate a new download when app is reloaded
|
| 609 |
+
rm_directory("./db")
|
| 610 |
+
rm_directory("./R-help-db")
|
| 611 |
|
| 612 |
return None
|
| 613 |
|
prompts.py
CHANGED
|
@@ -14,22 +14,22 @@ def retrieve_prompt(compute_mode):
|
|
| 14 |
start, end = get_start_end_months(get_sources())
|
| 15 |
|
| 16 |
retrieve_prompt = (
|
| 17 |
-
f"Today Date: {date.today()}.
|
| 18 |
-
"You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list.
|
| 19 |
-
"Do not ask the user for more information, but retrieve emails from the R-help mailing list archives.
|
| 20 |
# gpt-4o-mini says last two months aren't available with this: Emails from from {start} to {end} are available for retrieval.
|
| 21 |
-
f"The emails available for retrieval are from {start} to {end}.
|
| 22 |
-
"Write a search query based on the user's question, but do not answer the question just yet.
|
| 23 |
-
"For questions about differences or comparison between X and Y, retrieve emails about X and Y.
|
| 24 |
-
"If the user's question is about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year).
|
| 25 |
-
"Example: to retrieve emails about R from a month in any year use retrieve_emails(search_query='R', months=<month>).
|
| 26 |
# This confuses gpt-4o-mini (empty search_query - token problem?)
|
| 27 |
-
"Use 3-letter month abbreviations: Jan for January, Jul for July.
|
| 28 |
-
"If you decide not to retrieve emails, tell the user why and suggest how to improve their question to chat with the R-help mailing list.
|
| 29 |
)
|
| 30 |
# A sanity check that we don't have unassigned variables
|
| 31 |
# (this causes KeyError in parsing by ToolCallingLLM)
|
| 32 |
-
matches = re.findall(r"\{.*?\}", "".join(retrieve_prompt))
|
| 33 |
if matches:
|
| 34 |
raise ValueError(f"Unassigned variables in prompt: {' '.join(matches)}")
|
| 35 |
return retrieve_prompt
|
|
@@ -38,21 +38,21 @@ def retrieve_prompt(compute_mode):
|
|
| 38 |
def answer_prompt(with_tools=True):
|
| 39 |
"""Return system prompt for generate step"""
|
| 40 |
answer_prompt = (
|
| 41 |
-
f"Today Date: {date.today()}.
|
| 42 |
-
"You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list.
|
| 43 |
-
"Summarize the retrieved emails from the R-help mailing list archives to answer the user's question or query.
|
| 44 |
-
"If any of the retrieved emails are irrelevant (e.g. wrong dates), then do not use them.
|
| 45 |
-
"Tell the user if there are no retrieved emails or if you are unable to answer the question based on the information in the emails.
|
| 46 |
-
"Do not give an answer based on your own knowledge or memory, and do not include examples that aren't based on the retrieved emails.
|
| 47 |
-
"Example: For a question about writing formulas for lm(), make your answer about formulas for lm() from the retrieved emails.
|
| 48 |
-
"Do not respond with packages that are only listed under sessionInfo, session info, or other attached packages.
|
| 49 |
-
"Include inline citations (email senders and dates) in your response.
|
| 50 |
-
"Only answer general questions about R if the answer is given in the retrieved emails.
|
| 51 |
-
"Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails.
|
| 52 |
)
|
| 53 |
if with_tools:
|
| 54 |
answer_prompt += "Use answer_with_citations to provide the complete answer and all citations used. "
|
| 55 |
-
matches = re.findall(r"\{.*?\}", "".join(answer_prompt))
|
| 56 |
if matches:
|
| 57 |
raise ValueError(f"Unassigned variables in prompt: {' '.join(matches)}")
|
| 58 |
return answer_prompt
|
|
|
|
| 14 |
start, end = get_start_end_months(get_sources())
|
| 15 |
|
| 16 |
retrieve_prompt = (
|
| 17 |
+
f"Today Date: {date.today()}."
|
| 18 |
+
"You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list."
|
| 19 |
+
"Do not ask the user for more information, but retrieve emails from the R-help mailing list archives."
|
| 20 |
# gpt-4o-mini says last two months aren't available with this: Emails from from {start} to {end} are available for retrieval.
|
| 21 |
+
f"The emails available for retrieval are from {start} to {end}."
|
| 22 |
+
"Write a search query based on the user's question, but do not answer the question just yet."
|
| 23 |
+
"For questions about differences or comparison between X and Y, retrieve emails about X and Y."
|
| 24 |
+
"If the user's question is about years, use retrieve_emails(search_query=, start_year=, end_year=) (this month is this year)."
|
| 25 |
+
"Example: to retrieve emails about R from a month in any year use retrieve_emails(search_query='R', months=<month>)."
|
| 26 |
# This confuses gpt-4o-mini (empty search_query - token problem?)
|
| 27 |
+
"Use 3-letter month abbreviations: Jan for January, Jul for July."
|
| 28 |
+
"If you decide not to retrieve emails, tell the user why and suggest how to improve their question to chat with the R-help mailing list."
|
| 29 |
)
|
| 30 |
# A sanity check that we don't have unassigned variables
|
| 31 |
# (this causes KeyError in parsing by ToolCallingLLM)
|
| 32 |
+
matches = re.findall(r"\{.*?\}", " ".join(retrieve_prompt))
|
| 33 |
if matches:
|
| 34 |
raise ValueError(f"Unassigned variables in prompt: {' '.join(matches)}")
|
| 35 |
return retrieve_prompt
|
|
|
|
| 38 |
def answer_prompt(with_tools=True):
|
| 39 |
"""Return system prompt for generate step"""
|
| 40 |
answer_prompt = (
|
| 41 |
+
f"Today Date: {date.today()}."
|
| 42 |
+
"You are a helpful RAG chatbot designed to answer questions about R programming based on the R-help mailing list."
|
| 43 |
+
"Summarize the retrieved emails from the R-help mailing list archives to answer the user's question or query."
|
| 44 |
+
"If any of the retrieved emails are irrelevant (e.g. wrong dates), then do not use them."
|
| 45 |
+
"Tell the user if there are no retrieved emails or if you are unable to answer the question based on the information in the emails."
|
| 46 |
+
"Do not give an answer based on your own knowledge or memory, and do not include examples that aren't based on the retrieved emails."
|
| 47 |
+
"Example: For a question about writing formulas for lm(), make your answer about formulas for lm() from the retrieved emails."
|
| 48 |
+
"Do not respond with packages that are only listed under sessionInfo, session info, or other attached packages."
|
| 49 |
+
"Include inline citations (email senders and dates) in your response."
|
| 50 |
+
"Only answer general questions about R if the answer is given in the retrieved emails."
|
| 51 |
+
"Respond with 300 words maximum and 30 lines of code maximum and include any relevant URLs from the retrieved emails."
|
| 52 |
)
|
| 53 |
if with_tools:
|
| 54 |
answer_prompt += "Use answer_with_citations to provide the complete answer and all citations used. "
|
| 55 |
+
matches = re.findall(r"\{.*?\}", " ".join(answer_prompt))
|
| 56 |
if matches:
|
| 57 |
raise ValueError(f"Unassigned variables in prompt: {' '.join(matches)}")
|
| 58 |
return answer_prompt
|