Shreyas Meher commited on
Commit
a29b666
·
1 Parent(s): 3b9241c

Add batch CSV processing to Question Answering tab

Browse files

Accepts CSV with 'context' and 'question' columns, runs extractive QA
on each row, and outputs results with an 'answer' column. Follows the
same pattern as NER, Binary, and Multilabel batch processing.

Files changed (1) hide show
  1. app.py +37 -0
app.py CHANGED
@@ -521,6 +521,28 @@ def process_csv_multilabel(file):
521
  return out.name
522
 
523
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
  # ============================================================================
525
  # FINETUNING
526
  # ============================================================================
@@ -1867,6 +1889,18 @@ with gr.Blocks(theme=theme, css=custom_css, title="ConfliBERT") as demo:
1867
  with gr.Column():
1868
  qa_output = gr.HTML(label="Answer")
1869
 
 
 
 
 
 
 
 
 
 
 
 
 
1870
  # ================================================================
1871
  # FINE-TUNE TAB
1872
  # ================================================================
@@ -2257,6 +2291,9 @@ with gr.Blocks(theme=theme, css=custom_css, title="ConfliBERT") as demo:
2257
  inputs=[qa_context, qa_question],
2258
  outputs=[qa_output],
2259
  )
 
 
 
2260
 
2261
  # Fine-tuning: example dataset loaders
2262
  ft_ex_binary_btn.click(
 
521
  return out.name
522
 
523
 
524
+ def process_csv_qa(file):
525
+ path = get_path(file)
526
+ if path is None:
527
+ return None
528
+ df = pd.read_csv(path)
529
+ if 'context' not in df.columns or 'question' not in df.columns:
530
+ raise ValueError("CSV must contain 'context' and 'question' columns")
531
+
532
+ answers = []
533
+ for _, row in df.iterrows():
534
+ if pd.isna(row['context']) or pd.isna(row['question']):
535
+ answers.append("")
536
+ else:
537
+ html = question_answering(str(row['context']), str(row['question']))
538
+ answers.append(re.sub(r'<[^>]+>', '', html).strip())
539
+ df['answer'] = answers
540
+
541
+ out = tempfile.NamedTemporaryFile(suffix='_qa_results.csv', delete=False)
542
+ df.to_csv(out.name, index=False)
543
+ return out.name
544
+
545
+
546
  # ============================================================================
547
  # FINETUNING
548
  # ============================================================================
 
1889
  with gr.Column():
1890
  qa_output = gr.HTML(label="Answer")
1891
 
1892
+ with gr.Accordion("Batch Processing (CSV)", open=False):
1893
+ gr.Markdown(
1894
+ "Upload a CSV file with `context` and `question` columns "
1895
+ "to process multiple questions at once."
1896
+ )
1897
+ with gr.Row():
1898
+ qa_csv_in = gr.File(
1899
+ label="Upload CSV", file_types=[".csv"],
1900
+ )
1901
+ qa_csv_out = gr.File(label="Download Results")
1902
+ qa_csv_btn = gr.Button("Process CSV", variant="secondary")
1903
+
1904
  # ================================================================
1905
  # FINE-TUNE TAB
1906
  # ================================================================
 
2291
  inputs=[qa_context, qa_question],
2292
  outputs=[qa_output],
2293
  )
2294
+ qa_csv_btn.click(
2295
+ fn=process_csv_qa, inputs=[qa_csv_in], outputs=[qa_csv_out],
2296
+ )
2297
 
2298
  # Fine-tuning: example dataset loaders
2299
  ft_ex_binary_btn.click(