jedick commited on
Commit
6020ae0
·
1 Parent(s): e92d658

Downgrade Gradio to avoid ValueError: Invalid file descriptor: -1 on HF Spaces

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. app.py +40 -37
  3. requirements.txt +1 -1
  4. test_main.py +37 -0
  5. test_retriever.py +75 -0
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🇷🤝💬
4
  colorFrom: indigo
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 6.2.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
 
4
  colorFrom: indigo
5
  colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.49.1
8
  app_file: app.py
9
  pinned: false
10
  license: mit
app.py CHANGED
@@ -239,8 +239,39 @@ def run_workflow_in_session(request: gr.Request, *args):
239
  yield value
240
 
241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
  with gr.Blocks(
243
  title="R-help-chat",
 
 
 
244
  ) as demo:
245
 
246
  # -----------------
@@ -275,8 +306,9 @@ with gr.Blocks(
275
  )
276
  chatbot = gr.Chatbot(
277
  show_label=False,
 
 
278
  avatar_images=(None, "images/cloud.png"),
279
- buttons=["copy_all"],
280
  render=False,
281
  )
282
  # Modified from gradio/chat_interface.py
@@ -335,7 +367,7 @@ with gr.Blocks(
335
  Retrieved emails are shown below the chatbot and are used by the LLM to generate an answer.
336
  You can ask follow-up questions with the chat history as context; changing the mailing list maintains history.
337
  Press the clear button (🗑) to clear the history and start a new chat.
338
- *Privacy notice*: Data sharing with OpenAI is enabled.
339
  """
340
  return intro
341
 
@@ -481,7 +513,7 @@ with gr.Blocks(
481
 
482
  # Start a new thread when the user presses the clear (trash) button
483
  # https://github.com/gradio-app/gradio/issues/9722
484
- chatbot.clear(generate_thread_id, outputs=[thread_id], api_visibility="private")
485
 
486
  collection.change(
487
  # We need to build a new graph if the collection changes
@@ -499,7 +531,7 @@ with gr.Blocks(
499
  run_workflow_in_session,
500
  [input, collection, chatbot, thread_id],
501
  [chatbot, retrieved_emails, citations_text],
502
- api_visibility="private",
503
  )
504
 
505
  retrieved_emails.change(
@@ -507,7 +539,7 @@ with gr.Blocks(
507
  update_textbox,
508
  [retrieved_emails, emails_textbox],
509
  [emails_textbox, emails_textbox],
510
- api_visibility="private",
511
  )
512
 
513
  citations_text.change(
@@ -515,7 +547,7 @@ with gr.Blocks(
515
  update_textbox,
516
  [citations_text, citations_textbox],
517
  [citations_textbox, citations_textbox],
518
- api_visibility="private",
519
  )
520
 
521
  chatbot.clear(
@@ -523,7 +555,7 @@ with gr.Blocks(
523
  lambda x: gr.update(value=x),
524
  [input],
525
  [input],
526
- api_visibility="private",
527
  )
528
 
529
  # Clean up graph instances when page is closed/refreshed
@@ -532,37 +564,8 @@ with gr.Blocks(
532
 
533
  if __name__ == "__main__":
534
 
535
- # Set allowed_paths to serve chatbot avatar images
536
- current_directory = os.getcwd()
537
- allowed_paths = [current_directory + "/images"]
538
- # Noto Color Emoji gets a nice-looking Unicode Character “🇷” (U+1F1F7) on Chrome
539
- theme = gr.themes.Soft(
540
- font=[
541
- "ui-sans-serif",
542
- "system-ui",
543
- "sans-serif",
544
- "Apple Color Emoji",
545
- "Segoe UI Emoji",
546
- "Segoe UI Symbol",
547
- "Noto Color Emoji",
548
- ]
549
- )
550
- # Custom CSS for bottom alignment
551
- css = """
552
- .row-container {
553
- display: flex;
554
- align-items: flex-end; /* Align components at the bottom */
555
- gap: 10px; /* Add spacing between components */
556
- }
557
- """
558
- # HTML for Font Awesome
559
- # https://cdnjs.com/libraries/font-awesome
560
- head = '<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/7.0.1/css/all.min.css" rel="stylesheet">'
561
  # Launch the Gradio app
562
  demo.launch(
563
  allowed_paths=allowed_paths,
564
- theme=theme,
565
- css=css,
566
- head=head,
567
- footer_links=["gradio", "settings"],
568
  )
 
239
  yield value
240
 
241
 
242
+ # Set allowed_paths to serve chatbot avatar images
243
+ current_directory = os.getcwd()
244
+ allowed_paths = [current_directory + "/images"]
245
+ # Noto Color Emoji gets a nice-looking Unicode Character “🇷” (U+1F1F7) on Chrome
246
+ theme = gr.themes.Soft(
247
+ font=[
248
+ "ui-sans-serif",
249
+ "system-ui",
250
+ "sans-serif",
251
+ "Apple Color Emoji",
252
+ "Segoe UI Emoji",
253
+ "Segoe UI Symbol",
254
+ "Noto Color Emoji",
255
+ ]
256
+ )
257
+ # Custom CSS for bottom alignment
258
+ css = """
259
+ .row-container {
260
+ display: flex;
261
+ align-items: flex-end; /* Align components at the bottom */
262
+ gap: 10px; /* Add spacing between components */
263
+ }
264
+ """
265
+ # HTML for Font Awesome
266
+ # https://cdnjs.com/libraries/font-awesome
267
+ head = '<link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/7.0.1/css/all.min.css" rel="stylesheet">'
268
+
269
+
270
  with gr.Blocks(
271
  title="R-help-chat",
272
+ theme=theme,
273
+ css=css,
274
+ head=head,
275
  ) as demo:
276
 
277
  # -----------------
 
306
  )
307
  chatbot = gr.Chatbot(
308
  show_label=False,
309
+ type="messages", # Gradio 5
310
+ # buttons=["copy_all"], # Gradio 6
311
  avatar_images=(None, "images/cloud.png"),
 
312
  render=False,
313
  )
314
  # Modified from gradio/chat_interface.py
 
367
  Retrieved emails are shown below the chatbot and are used by the LLM to generate an answer.
368
  You can ask follow-up questions with the chat history as context; changing the mailing list maintains history.
369
  Press the clear button (🗑) to clear the history and start a new chat.
370
+ *Privacy notice*: Inputs and outputs are shared with OpenAI.
371
  """
372
  return intro
373
 
 
513
 
514
  # Start a new thread when the user presses the clear (trash) button
515
  # https://github.com/gradio-app/gradio/issues/9722
516
+ chatbot.clear(generate_thread_id, outputs=[thread_id], api_name=False)
517
 
518
  collection.change(
519
  # We need to build a new graph if the collection changes
 
531
  run_workflow_in_session,
532
  [input, collection, chatbot, thread_id],
533
  [chatbot, retrieved_emails, citations_text],
534
+ api_name=False,
535
  )
536
 
537
  retrieved_emails.change(
 
539
  update_textbox,
540
  [retrieved_emails, emails_textbox],
541
  [emails_textbox, emails_textbox],
542
+ api_name=False,
543
  )
544
 
545
  citations_text.change(
 
547
  update_textbox,
548
  [citations_text, citations_textbox],
549
  [citations_textbox, citations_textbox],
550
+ api_name=False,
551
  )
552
 
553
  chatbot.clear(
 
555
  lambda x: gr.update(value=x),
556
  [input],
557
  [input],
558
+ api_name=False,
559
  )
560
 
561
  # Clean up graph instances when page is closed/refreshed
 
564
 
565
  if __name__ == "__main__":
566
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
567
  # Launch the Gradio app
568
  demo.launch(
569
  allowed_paths=allowed_paths,
570
+ show_api=False,
 
 
 
571
  )
requirements.txt CHANGED
@@ -21,4 +21,4 @@ ragas==0.2.15
21
  #ragas==0.4.2
22
 
23
  # Frontend
24
- gradio==6.2.0
 
21
  #ragas==0.4.2
22
 
23
  # Frontend
24
+ gradio==5.49.1
test_main.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from main import ProcessCollection, RunChain, RunGraph
2
+ from dotenv import load_dotenv
3
+
4
+ # Setup environment variables
5
+ load_dotenv(dotenv_path=".env", override=True)
6
+
7
+ # Define email and database directories
8
+ email_dir = "test_emails/R-help/"
9
+ db_dir = "test_db"
10
+
11
+
12
+ def test_main():
13
+
14
+ # Create the test database
15
+ ProcessCollection(email_dir, db_dir)
16
+
17
+ # Define the collection (last part of the email directory path)
18
+ collection = "R-help"
19
+
20
+ # Run a query with the chain workflow
21
+ result = RunChain("What R functions are discussed?", db_dir, collection)
22
+ # We should get at least one of these
23
+ assert (
24
+ "aggregate" in result
25
+ or "t.test" in result
26
+ or "lme" in result
27
+ or "ifelse" in result
28
+ or "xyplot" in result
29
+ )
30
+
31
+ # Run a query with the graph workflow
32
+ result = RunGraph(
33
+ "What dataset was used in a question about plotting with nlme?",
34
+ db_dir,
35
+ collection,
36
+ )
37
+ assert "BodyWeight" in result["answer"]
test_retriever.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from retriever import BuildRetriever
2
+ from main import ProcessCollection
3
+ from dotenv import load_dotenv
4
+
5
+ # Setup environment variables
6
+ load_dotenv(dotenv_path=".env", override=True)
7
+
8
+ # Define email and database directories
9
+ # NOTE: Here we add the R-devel collection to the database
10
+ # (R-help was already added by the CI running test_main.py before this file)
11
+ email_dir = "test_emails/R-devel/"
12
+ db_dir = "test_db"
13
+
14
+
15
+ def test_retriever():
16
+
17
+ # Create the test database
18
+ ProcessCollection(email_dir, db_dir)
19
+
20
+ # Get a dense retriever instance
21
+ retriever = BuildRetriever(
22
+ db_dir, "R-help", "dense", top_k=1, start_year=2025, end_year=2025
23
+ )
24
+ # The result is a semantically similar match to the query
25
+ results = retriever.invoke("inscrutable")
26
+ assert (
27
+ "anyone who might know enough to actually do it" in results[0].page_content
28
+ or "makes no sense" in results[0].page_content
29
+ )
30
+ # But we don't get an exact match
31
+ assert not "inscrutable" in results[0].page_content
32
+
33
+ # Try keyword retrieval
34
+ retriever = BuildRetriever(
35
+ db_dir, "R-help", "sparse", top_k=1, start_year=2025, end_year=2025
36
+ )
37
+ results = retriever.invoke("inscrutable")
38
+ # This time we get an exact match
39
+ assert "inscrutable" in results[0].page_content
40
+
41
+ # R-devel with hybrid search
42
+ retriever = BuildRetriever(
43
+ db_dir, "R-devel", "hybrid", top_k=1, start_year=2025, end_year=2025
44
+ )
45
+ results = retriever.invoke("MCMC")
46
+ assert "MCMC" in results[0].page_content
47
+
48
+ # Search by month - sparse
49
+ retriever = BuildRetriever(
50
+ db_dir,
51
+ "R-help",
52
+ "sparse",
53
+ top_k=6,
54
+ start_year=2025,
55
+ end_year=2025,
56
+ months=["Dec"],
57
+ )
58
+ results = retriever.invoke("the")
59
+ # Check that the source file name for each result contains "December"
60
+ assert all(["December" in result.metadata["source"] for result in results])
61
+
62
+ # Search by month - dense
63
+ retriever = BuildRetriever(
64
+ db_dir,
65
+ "R-help",
66
+ "dense",
67
+ top_k=6,
68
+ start_year=2025,
69
+ end_year=2025,
70
+ months=["Oct"],
71
+ )
72
+ results = retriever.invoke("plotting")
73
+ assert all(["October" in result.metadata["source"] for result in results])
74
+ # In the test database, only one email in October 2025 has the word "plot"
75
+ assert "plot" in results[0].page_content