Spaces:

jedick
/

R-help-chat

Running

App Files Files Community

jedick commited on Jan 8

Commit

6020ae0

1 Parent(s): e92d658

Downgrade Gradio to avoid ValueError: Invalid file descriptor: -1 on HF Spaces

Browse files

Files changed (5) hide show

README.md +1 -1
app.py +40 -37
requirements.txt +1 -1
test_main.py +37 -0
test_retriever.py +75 -0

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🇷🤝💬
 colorFrom: indigo
 colorTo: red
 sdk: gradio
-sdk_version: 6.2.0
 app_file: app.py
 pinned: false
 license: mit

 colorFrom: indigo
 colorTo: red
 sdk: gradio
+sdk_version: 5.49.1
 app_file: app.py
 pinned: false
 license: mit

app.py CHANGED Viewed

@@ -239,8 +239,39 @@ def run_workflow_in_session(request: gr.Request, *args):
         yield value
 with gr.Blocks(
     title="R-help-chat",
 ) as demo:
     # -----------------
@@ -275,8 +306,9 @@ with gr.Blocks(
     )
     chatbot = gr.Chatbot(
         show_label=False,
         avatar_images=(None, "images/cloud.png"),
-        buttons=["copy_all"],
         render=False,
     )
     # Modified from gradio/chat_interface.py
@@ -335,7 +367,7 @@ with gr.Blocks(
             Retrieved emails are shown below the chatbot and are used by the LLM to generate an answer.
             You can ask follow-up questions with the chat history as context; changing the mailing list maintains history.
             Press the clear button (🗑) to clear the history and start a new chat.
-            *Privacy notice*: Data sharing with OpenAI is enabled.
             """
         return intro
@@ -481,7 +513,7 @@ with gr.Blocks(
     # Start a new thread when the user presses the clear (trash) button
     # https://github.com/gradio-app/gradio/issues/9722
-    chatbot.clear(generate_thread_id, outputs=[thread_id], api_visibility="private")
     collection.change(
         # We need to build a new graph if the collection changes
@@ -499,7 +531,7 @@ with gr.Blocks(
         run_workflow_in_session,
         [input, collection, chatbot, thread_id],
         [chatbot, retrieved_emails, citations_text],
-        api_visibility="private",
     )
     retrieved_emails.change(
@@ -507,7 +539,7 @@ with gr.Blocks(
         update_textbox,
         [retrieved_emails, emails_textbox],
         [emails_textbox, emails_textbox],
-        api_visibility="private",
     )
     citations_text.change(
@@ -515,7 +547,7 @@ with gr.Blocks(
         update_textbox,
         [citations_text, citations_textbox],
         [citations_textbox, citations_textbox],
-        api_visibility="private",
     )
     chatbot.clear(
@@ -523,7 +555,7 @@ with gr.Blocks(
         lambda x: gr.update(value=x),
         [input],
         [input],
-        api_visibility="private",
     )
     # Clean up graph instances when page is closed/refreshed
@@ -532,37 +564,8 @@ with gr.Blocks(
 if __name__ == "__main__":
-    # Set allowed_paths to serve chatbot avatar images
-    current_directory = os.getcwd()
-    allowed_paths = [current_directory + "/images"]
-    # Noto Color Emoji gets a nice-looking Unicode Character “🇷” (U+1F1F7) on Chrome
-    theme = gr.themes.Soft(
-        font=[
-            "ui-sans-serif",
-            "system-ui",
-            "sans-serif",
-            "Apple Color Emoji",
-            "Segoe UI Emoji",
-            "Segoe UI Symbol",
-            "Noto Color Emoji",
-        ]
-    )
-    # Custom CSS for bottom alignment
-    css = """
-    .row-container {
-        display: flex;
-        align-items: flex-end; /* Align components at the bottom */
-        gap: 10px; /* Add spacing between components */
-    }
-    """
-    # HTML for Font Awesome
-    # https://cdnjs.com/libraries/font-awesome
-    head = '<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/7.0.1/css/all.min.css" rel="stylesheet">'
     # Launch the Gradio app
     demo.launch(
         allowed_paths=allowed_paths,
-        theme=theme,
-        css=css,
-        head=head,
-        footer_links=["gradio", "settings"],
     )

         yield value
+# Set allowed_paths to serve chatbot avatar images
+current_directory = os.getcwd()
+allowed_paths = [current_directory + "/images"]
+# Noto Color Emoji gets a nice-looking Unicode Character “🇷” (U+1F1F7) on Chrome
+theme = gr.themes.Soft(
+    font=[
+        "ui-sans-serif",
+        "system-ui",
+        "sans-serif",
+        "Apple Color Emoji",
+        "Segoe UI Emoji",
+        "Segoe UI Symbol",
+        "Noto Color Emoji",
+    ]
+)
+# Custom CSS for bottom alignment
+css = """
+.row-container {
+    display: flex;
+    align-items: flex-end; /* Align components at the bottom */
+    gap: 10px; /* Add spacing between components */
+}
+"""
+# HTML for Font Awesome
+# https://cdnjs.com/libraries/font-awesome
+head = '<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/7.0.1/css/all.min.css" rel="stylesheet">'
 with gr.Blocks(
     title="R-help-chat",
+    theme=theme,
+    css=css,
+    head=head,
 ) as demo:
     # -----------------
     )
     chatbot = gr.Chatbot(
         show_label=False,
+        type="messages",  # Gradio 5
+        # buttons=["copy_all"], # Gradio 6
         avatar_images=(None, "images/cloud.png"),
         render=False,
     )
     # Modified from gradio/chat_interface.py
             Retrieved emails are shown below the chatbot and are used by the LLM to generate an answer.
             You can ask follow-up questions with the chat history as context; changing the mailing list maintains history.
             Press the clear button (🗑) to clear the history and start a new chat.
+            *Privacy notice*: Inputs and outputs are shared with OpenAI.
             """
         return intro
     # Start a new thread when the user presses the clear (trash) button
     # https://github.com/gradio-app/gradio/issues/9722
+    chatbot.clear(generate_thread_id, outputs=[thread_id], api_name=False)
     collection.change(
         # We need to build a new graph if the collection changes
         run_workflow_in_session,
         [input, collection, chatbot, thread_id],
         [chatbot, retrieved_emails, citations_text],
+        api_name=False,
     )
     retrieved_emails.change(
         update_textbox,
         [retrieved_emails, emails_textbox],
         [emails_textbox, emails_textbox],
+        api_name=False,
     )
     citations_text.change(
         update_textbox,
         [citations_text, citations_textbox],
         [citations_textbox, citations_textbox],
+        api_name=False,
     )
     chatbot.clear(
         lambda x: gr.update(value=x),
         [input],
         [input],
+        api_name=False,
     )
     # Clean up graph instances when page is closed/refreshed
 if __name__ == "__main__":
     # Launch the Gradio app
     demo.launch(
         allowed_paths=allowed_paths,
+        show_api=False,
     )

requirements.txt CHANGED Viewed

@@ -21,4 +21,4 @@ ragas==0.2.15
 #ragas==0.4.2
 # Frontend
-gradio==6.2.0

 #ragas==0.4.2
 # Frontend
+gradio==5.49.1

test_main.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from main import ProcessCollection, RunChain, RunGraph
+from dotenv import load_dotenv
+# Setup environment variables
+load_dotenv(dotenv_path=".env", override=True)
+# Define email and database directories
+email_dir = "test_emails/R-help/"
+db_dir = "test_db"
+def test_main():
+    # Create the test database
+    ProcessCollection(email_dir, db_dir)
+    # Define the collection (last part of the email directory path)
+    collection = "R-help"
+    # Run a query with the chain workflow
+    result = RunChain("What R functions are discussed?", db_dir, collection)
+    # We should get at least one of these
+    assert (
+        "aggregate" in result
+        or "t.test" in result
+        or "lme" in result
+        or "ifelse" in result
+        or "xyplot" in result
+    )
+    # Run a query with the graph workflow
+    result = RunGraph(
+        "What dataset was used in a question about plotting with nlme?",
+        db_dir,
+        collection,
+    )
+    assert "BodyWeight" in result["answer"]

test_retriever.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from retriever import BuildRetriever
+from main import ProcessCollection
+from dotenv import load_dotenv
+# Setup environment variables
+load_dotenv(dotenv_path=".env", override=True)
+# Define email and database directories
+# NOTE: Here we add the R-devel collection to the database
+# (R-help was already added by the CI running test_main.py before this file)
+email_dir = "test_emails/R-devel/"
+db_dir = "test_db"
+def test_retriever():
+    # Create the test database
+    ProcessCollection(email_dir, db_dir)
+    # Get a dense retriever instance
+    retriever = BuildRetriever(
+        db_dir, "R-help", "dense", top_k=1, start_year=2025, end_year=2025
+    )
+    # The result is a semantically similar match to the query
+    results = retriever.invoke("inscrutable")
+    assert (
+        "anyone who might know enough to actually do it" in results[0].page_content
+        or "makes no sense" in results[0].page_content
+    )
+    # But we don't get an exact match
+    assert not "inscrutable" in results[0].page_content
+    # Try keyword retrieval
+    retriever = BuildRetriever(
+        db_dir, "R-help", "sparse", top_k=1, start_year=2025, end_year=2025
+    )
+    results = retriever.invoke("inscrutable")
+    # This time we get an exact match
+    assert "inscrutable" in results[0].page_content
+    # R-devel with hybrid search
+    retriever = BuildRetriever(
+        db_dir, "R-devel", "hybrid", top_k=1, start_year=2025, end_year=2025
+    )
+    results = retriever.invoke("MCMC")
+    assert "MCMC" in results[0].page_content
+    # Search by month - sparse
+    retriever = BuildRetriever(
+        db_dir,
+        "R-help",
+        "sparse",
+        top_k=6,
+        start_year=2025,
+        end_year=2025,
+        months=["Dec"],
+    )
+    results = retriever.invoke("the")
+    # Check that the source file name for each result contains "December"
+    assert all(["December" in result.metadata["source"] for result in results])
+    # Search by month - dense
+    retriever = BuildRetriever(
+        db_dir,
+        "R-help",
+        "dense",
+        top_k=6,
+        start_year=2025,
+        end_year=2025,
+        months=["Oct"],
+    )
+    results = retriever.invoke("plotting")
+    assert all(["October" in result.metadata["source"] for result in results])
+    # In the test database, only one email in October 2025 has the word "plot"
+    assert "plot" in results[0].page_content