Spaces:
Running
Running
Shreyas Meher commited on
Commit ·
a29b666
1
Parent(s): 3b9241c
Add batch CSV processing to Question Answering tab
Browse filesAccepts CSV with 'context' and 'question' columns, runs extractive QA
on each row, and outputs results with an 'answer' column. Follows the
same pattern as NER, Binary, and Multilabel batch processing.
app.py
CHANGED
|
@@ -521,6 +521,28 @@ def process_csv_multilabel(file):
|
|
| 521 |
return out.name
|
| 522 |
|
| 523 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 524 |
# ============================================================================
|
| 525 |
# FINETUNING
|
| 526 |
# ============================================================================
|
|
@@ -1867,6 +1889,18 @@ with gr.Blocks(theme=theme, css=custom_css, title="ConfliBERT") as demo:
|
|
| 1867 |
with gr.Column():
|
| 1868 |
qa_output = gr.HTML(label="Answer")
|
| 1869 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1870 |
# ================================================================
|
| 1871 |
# FINE-TUNE TAB
|
| 1872 |
# ================================================================
|
|
@@ -2257,6 +2291,9 @@ with gr.Blocks(theme=theme, css=custom_css, title="ConfliBERT") as demo:
|
|
| 2257 |
inputs=[qa_context, qa_question],
|
| 2258 |
outputs=[qa_output],
|
| 2259 |
)
|
|
|
|
|
|
|
|
|
|
| 2260 |
|
| 2261 |
# Fine-tuning: example dataset loaders
|
| 2262 |
ft_ex_binary_btn.click(
|
|
|
|
| 521 |
return out.name
|
| 522 |
|
| 523 |
|
| 524 |
+
def process_csv_qa(file):
|
| 525 |
+
path = get_path(file)
|
| 526 |
+
if path is None:
|
| 527 |
+
return None
|
| 528 |
+
df = pd.read_csv(path)
|
| 529 |
+
if 'context' not in df.columns or 'question' not in df.columns:
|
| 530 |
+
raise ValueError("CSV must contain 'context' and 'question' columns")
|
| 531 |
+
|
| 532 |
+
answers = []
|
| 533 |
+
for _, row in df.iterrows():
|
| 534 |
+
if pd.isna(row['context']) or pd.isna(row['question']):
|
| 535 |
+
answers.append("")
|
| 536 |
+
else:
|
| 537 |
+
html = question_answering(str(row['context']), str(row['question']))
|
| 538 |
+
answers.append(re.sub(r'<[^>]+>', '', html).strip())
|
| 539 |
+
df['answer'] = answers
|
| 540 |
+
|
| 541 |
+
out = tempfile.NamedTemporaryFile(suffix='_qa_results.csv', delete=False)
|
| 542 |
+
df.to_csv(out.name, index=False)
|
| 543 |
+
return out.name
|
| 544 |
+
|
| 545 |
+
|
| 546 |
# ============================================================================
|
| 547 |
# FINETUNING
|
| 548 |
# ============================================================================
|
|
|
|
| 1889 |
with gr.Column():
|
| 1890 |
qa_output = gr.HTML(label="Answer")
|
| 1891 |
|
| 1892 |
+
with gr.Accordion("Batch Processing (CSV)", open=False):
|
| 1893 |
+
gr.Markdown(
|
| 1894 |
+
"Upload a CSV file with `context` and `question` columns "
|
| 1895 |
+
"to process multiple questions at once."
|
| 1896 |
+
)
|
| 1897 |
+
with gr.Row():
|
| 1898 |
+
qa_csv_in = gr.File(
|
| 1899 |
+
label="Upload CSV", file_types=[".csv"],
|
| 1900 |
+
)
|
| 1901 |
+
qa_csv_out = gr.File(label="Download Results")
|
| 1902 |
+
qa_csv_btn = gr.Button("Process CSV", variant="secondary")
|
| 1903 |
+
|
| 1904 |
# ================================================================
|
| 1905 |
# FINE-TUNE TAB
|
| 1906 |
# ================================================================
|
|
|
|
| 2291 |
inputs=[qa_context, qa_question],
|
| 2292 |
outputs=[qa_output],
|
| 2293 |
)
|
| 2294 |
+
qa_csv_btn.click(
|
| 2295 |
+
fn=process_csv_qa, inputs=[qa_csv_in], outputs=[qa_csv_out],
|
| 2296 |
+
)
|
| 2297 |
|
| 2298 |
# Fine-tuning: example dataset loaders
|
| 2299 |
ft_ex_binary_btn.click(
|