Spaces:
Running
Running
Commit
·
ae1b24b
1
Parent(s):
7a755b8
Replace path textbox with folder upload option
Browse files- Add toggle between "Upload File(s)" and "Upload Folder" for PDFs and images
- Use Gradio's file_count="directory" for folder uploads
- Remove text-based path input (doesn't work for hosted apps)
- Updated all processing functions to handle folder uploads
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
app.py
CHANGED
|
@@ -486,8 +486,8 @@ def update_task_visibility(task):
|
|
| 486 |
|
| 487 |
|
| 488 |
def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
|
| 489 |
-
pdf_file,
|
| 490 |
-
image_file,
|
| 491 |
model_tier, model, model_source_input, api_key_input,
|
| 492 |
progress=gr.Progress(track_tqdm=True)):
|
| 493 |
"""Extract categories from data and display them in a table."""
|
|
@@ -536,16 +536,19 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 536 |
)
|
| 537 |
|
| 538 |
elif input_type == "PDF Documents":
|
| 539 |
-
# Use
|
| 540 |
-
if
|
| 541 |
-
|
|
|
|
|
|
|
|
|
|
| 542 |
elif pdf_file:
|
| 543 |
if isinstance(pdf_file, list):
|
| 544 |
pdf_input = [f if isinstance(f, str) else f.name for f in pdf_file]
|
| 545 |
else:
|
| 546 |
pdf_input = pdf_file if isinstance(pdf_file, str) else pdf_file.name
|
| 547 |
else:
|
| 548 |
-
yield None, None, "**Error:** Please upload
|
| 549 |
return
|
| 550 |
|
| 551 |
mode_mapping = {
|
|
@@ -566,16 +569,19 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 566 |
)
|
| 567 |
|
| 568 |
elif input_type == "Images":
|
| 569 |
-
# Use
|
| 570 |
-
if
|
| 571 |
-
|
|
|
|
|
|
|
|
|
|
| 572 |
elif image_file:
|
| 573 |
if isinstance(image_file, list):
|
| 574 |
image_input = [f if isinstance(f, str) else f.name for f in image_file]
|
| 575 |
else:
|
| 576 |
image_input = image_file if isinstance(image_file, str) else image_file.name
|
| 577 |
else:
|
| 578 |
-
yield None, None, "**Error:** Please upload image
|
| 579 |
return
|
| 580 |
|
| 581 |
result = catllm.extract(
|
|
@@ -620,8 +626,8 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 620 |
|
| 621 |
|
| 622 |
def run_classify_data(input_type, spreadsheet_file, spreadsheet_column,
|
| 623 |
-
pdf_file,
|
| 624 |
-
image_file,
|
| 625 |
cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10,
|
| 626 |
model_tier, model, model_source_input, api_key_input,
|
| 627 |
progress=gr.Progress(track_tqdm=True)):
|
|
@@ -681,10 +687,14 @@ def run_classify_data(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 681 |
)
|
| 682 |
|
| 683 |
elif input_type == "PDF Documents":
|
| 684 |
-
# Use
|
| 685 |
-
if
|
| 686 |
-
|
| 687 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 688 |
elif pdf_file:
|
| 689 |
if isinstance(pdf_file, list):
|
| 690 |
pdf_input = [f if isinstance(f, str) else f.name for f in pdf_file]
|
|
@@ -693,7 +703,7 @@ def run_classify_data(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 693 |
pdf_input = pdf_file if isinstance(pdf_file, str) else pdf_file.name
|
| 694 |
original_filename = pdf_input.split("/")[-1]
|
| 695 |
else:
|
| 696 |
-
yield None, None, None, None, "**Error:** Please upload
|
| 697 |
return
|
| 698 |
|
| 699 |
column_name = "PDF Pages"
|
|
@@ -717,10 +727,14 @@ def run_classify_data(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 717 |
)
|
| 718 |
|
| 719 |
elif input_type == "Images":
|
| 720 |
-
# Use
|
| 721 |
-
if
|
| 722 |
-
|
| 723 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 724 |
elif image_file:
|
| 725 |
if isinstance(image_file, list):
|
| 726 |
image_input = [f if isinstance(f, str) else f.name for f in image_file]
|
|
@@ -729,7 +743,7 @@ def run_classify_data(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 729 |
image_input = image_file if isinstance(image_file, str) else image_file.name
|
| 730 |
original_filename = image_input.split("/")[-1]
|
| 731 |
else:
|
| 732 |
-
yield None, None, None, None, "**Error:** Please upload image
|
| 733 |
return
|
| 734 |
|
| 735 |
column_name = "Image Files"
|
|
@@ -835,8 +849,8 @@ Provide your work in JSON format where the number belonging to each category is
|
|
| 835 |
|
| 836 |
|
| 837 |
def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
|
| 838 |
-
pdf_file,
|
| 839 |
-
image_file,
|
| 840 |
model_tier, model, model_source_input, api_key_input,
|
| 841 |
progress=gr.Progress(track_tqdm=True)):
|
| 842 |
"""Extract categories then classify data with them."""
|
|
@@ -882,10 +896,14 @@ def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 882 |
mode_param = None
|
| 883 |
|
| 884 |
elif input_type == "PDF Documents":
|
| 885 |
-
# Use
|
| 886 |
-
if
|
| 887 |
-
|
| 888 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 889 |
elif pdf_file:
|
| 890 |
if isinstance(pdf_file, list):
|
| 891 |
input_data = [f if isinstance(f, str) else f.name for f in pdf_file]
|
|
@@ -894,7 +912,7 @@ def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 894 |
input_data = pdf_file if isinstance(pdf_file, str) else pdf_file.name
|
| 895 |
original_filename = input_data.split("/")[-1]
|
| 896 |
else:
|
| 897 |
-
yield None, None, None, None, None, None, "**Error:** Please upload
|
| 898 |
return
|
| 899 |
|
| 900 |
column_name = "PDF Pages"
|
|
@@ -909,10 +927,14 @@ def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 909 |
mode_param = mode_mapping.get(pdf_mode, "image")
|
| 910 |
|
| 911 |
elif input_type == "Images":
|
| 912 |
-
# Use
|
| 913 |
-
if
|
| 914 |
-
|
| 915 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 916 |
elif image_file:
|
| 917 |
if isinstance(image_file, list):
|
| 918 |
input_data = [f if isinstance(f, str) else f.name for f in image_file]
|
|
@@ -921,7 +943,7 @@ def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 921 |
input_data = image_file if isinstance(image_file, str) else image_file.name
|
| 922 |
original_filename = input_data.split("/")[-1]
|
| 923 |
else:
|
| 924 |
-
yield None, None, None, None, None, None, "**Error:** Please upload image
|
| 925 |
return
|
| 926 |
|
| 927 |
column_name = "Image Files"
|
|
@@ -1098,12 +1120,14 @@ def reset_all():
|
|
| 1098 |
gr.update(visible=False), # image_input_group
|
| 1099 |
None, # spreadsheet_file
|
| 1100 |
gr.update(choices=[], value=None), # spreadsheet_column
|
|
|
|
| 1101 |
None, # pdf_file
|
| 1102 |
-
|
| 1103 |
"", # pdf_description
|
| 1104 |
"Image (visual documents)", # pdf_mode
|
|
|
|
| 1105 |
None, # image_file
|
| 1106 |
-
|
| 1107 |
"", # image_description
|
| 1108 |
None, # task_mode
|
| 1109 |
]
|
|
@@ -1206,15 +1230,20 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1206 |
|
| 1207 |
# PDF input group
|
| 1208 |
with gr.Group(visible=False) as pdf_input_group:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1209 |
pdf_file = gr.File(
|
| 1210 |
label="Upload PDF Document(s)",
|
| 1211 |
file_types=[".pdf"],
|
| 1212 |
file_count="multiple"
|
| 1213 |
)
|
| 1214 |
-
|
| 1215 |
-
label="
|
| 1216 |
-
|
| 1217 |
-
|
| 1218 |
)
|
| 1219 |
pdf_description = gr.Textbox(
|
| 1220 |
label="Document Description",
|
|
@@ -1229,15 +1258,20 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1229 |
|
| 1230 |
# Image input group
|
| 1231 |
with gr.Group(visible=False) as image_input_group:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1232 |
image_file = gr.File(
|
| 1233 |
label="Upload Images",
|
| 1234 |
file_types=["image"],
|
| 1235 |
file_count="multiple"
|
| 1236 |
)
|
| 1237 |
-
|
| 1238 |
-
label="
|
| 1239 |
-
|
| 1240 |
-
|
| 1241 |
)
|
| 1242 |
image_description = gr.Textbox(
|
| 1243 |
label="Image Description",
|
|
@@ -1374,6 +1408,32 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1374 |
outputs=[spreadsheet_file, spreadsheet_column, status]
|
| 1375 |
)
|
| 1376 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1377 |
add_category_btn.click(
|
| 1378 |
fn=add_category_field,
|
| 1379 |
inputs=[category_count],
|
|
@@ -1410,8 +1470,8 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1410 |
|
| 1411 |
# Main run button handler - dispatches based on task_mode
|
| 1412 |
def dispatch_run(task, input_type, spreadsheet_file, spreadsheet_column,
|
| 1413 |
-
pdf_file,
|
| 1414 |
-
image_file,
|
| 1415 |
cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10,
|
| 1416 |
model_tier, model, model_source, api_key,
|
| 1417 |
progress=gr.Progress(track_tqdm=True)):
|
|
@@ -1419,8 +1479,8 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1419 |
if task == "extract":
|
| 1420 |
for update in run_extract_categories(
|
| 1421 |
input_type, spreadsheet_file, spreadsheet_column,
|
| 1422 |
-
pdf_file,
|
| 1423 |
-
image_file,
|
| 1424 |
model_tier, model, model_source, api_key,
|
| 1425 |
progress
|
| 1426 |
):
|
|
@@ -1436,8 +1496,8 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1436 |
elif task == "assign":
|
| 1437 |
for update in run_classify_data(
|
| 1438 |
input_type, spreadsheet_file, spreadsheet_column,
|
| 1439 |
-
pdf_file,
|
| 1440 |
-
image_file,
|
| 1441 |
cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10,
|
| 1442 |
model_tier, model, model_source, api_key,
|
| 1443 |
progress
|
|
@@ -1454,8 +1514,8 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1454 |
elif task == "extract_and_assign":
|
| 1455 |
for update in run_extract_and_assign(
|
| 1456 |
input_type, spreadsheet_file, spreadsheet_column,
|
| 1457 |
-
pdf_file,
|
| 1458 |
-
image_file,
|
| 1459 |
model_tier, model, model_source, api_key,
|
| 1460 |
progress
|
| 1461 |
):
|
|
@@ -1473,8 +1533,8 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1473 |
run_btn.click(
|
| 1474 |
fn=dispatch_run,
|
| 1475 |
inputs=[task_mode, input_type, spreadsheet_file, spreadsheet_column,
|
| 1476 |
-
pdf_file,
|
| 1477 |
-
image_file,
|
| 1478 |
outputs=[extracted_categories, extract_download, distribution_plot, results, download_file, status]
|
| 1479 |
)
|
| 1480 |
|
|
@@ -1484,8 +1544,8 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1484 |
outputs=[
|
| 1485 |
input_type, text_input_group, pdf_input_group, image_input_group,
|
| 1486 |
spreadsheet_file, spreadsheet_column,
|
| 1487 |
-
pdf_file,
|
| 1488 |
-
image_file,
|
| 1489 |
task_mode
|
| 1490 |
] + category_inputs + [
|
| 1491 |
add_category_btn, category_count,
|
|
|
|
| 486 |
|
| 487 |
|
| 488 |
def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
|
| 489 |
+
pdf_file, pdf_folder, pdf_description, pdf_mode,
|
| 490 |
+
image_file, image_folder, image_description,
|
| 491 |
model_tier, model, model_source_input, api_key_input,
|
| 492 |
progress=gr.Progress(track_tqdm=True)):
|
| 493 |
"""Extract categories from data and display them in a table."""
|
|
|
|
| 536 |
)
|
| 537 |
|
| 538 |
elif input_type == "PDF Documents":
|
| 539 |
+
# Use folder if provided, otherwise use uploaded files
|
| 540 |
+
if pdf_folder:
|
| 541 |
+
if isinstance(pdf_folder, list):
|
| 542 |
+
pdf_input = [f if isinstance(f, str) else f.name for f in pdf_folder if str(f.name if hasattr(f, 'name') else f).lower().endswith('.pdf')]
|
| 543 |
+
else:
|
| 544 |
+
pdf_input = pdf_folder if isinstance(pdf_folder, str) else pdf_folder.name
|
| 545 |
elif pdf_file:
|
| 546 |
if isinstance(pdf_file, list):
|
| 547 |
pdf_input = [f if isinstance(f, str) else f.name for f in pdf_file]
|
| 548 |
else:
|
| 549 |
pdf_input = pdf_file if isinstance(pdf_file, str) else pdf_file.name
|
| 550 |
else:
|
| 551 |
+
yield None, None, "**Error:** Please upload PDF file(s) or a folder"
|
| 552 |
return
|
| 553 |
|
| 554 |
mode_mapping = {
|
|
|
|
| 569 |
)
|
| 570 |
|
| 571 |
elif input_type == "Images":
|
| 572 |
+
# Use folder if provided, otherwise use uploaded files
|
| 573 |
+
if image_folder:
|
| 574 |
+
if isinstance(image_folder, list):
|
| 575 |
+
image_input = [f if isinstance(f, str) else f.name for f in image_folder]
|
| 576 |
+
else:
|
| 577 |
+
image_input = image_folder if isinstance(image_folder, str) else image_folder.name
|
| 578 |
elif image_file:
|
| 579 |
if isinstance(image_file, list):
|
| 580 |
image_input = [f if isinstance(f, str) else f.name for f in image_file]
|
| 581 |
else:
|
| 582 |
image_input = image_file if isinstance(image_file, str) else image_file.name
|
| 583 |
else:
|
| 584 |
+
yield None, None, "**Error:** Please upload image file(s) or a folder"
|
| 585 |
return
|
| 586 |
|
| 587 |
result = catllm.extract(
|
|
|
|
| 626 |
|
| 627 |
|
| 628 |
def run_classify_data(input_type, spreadsheet_file, spreadsheet_column,
|
| 629 |
+
pdf_file, pdf_folder, pdf_description, pdf_mode,
|
| 630 |
+
image_file, image_folder, image_description,
|
| 631 |
cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10,
|
| 632 |
model_tier, model, model_source_input, api_key_input,
|
| 633 |
progress=gr.Progress(track_tqdm=True)):
|
|
|
|
| 687 |
)
|
| 688 |
|
| 689 |
elif input_type == "PDF Documents":
|
| 690 |
+
# Use folder if provided, otherwise use uploaded files
|
| 691 |
+
if pdf_folder:
|
| 692 |
+
if isinstance(pdf_folder, list):
|
| 693 |
+
pdf_input = [f if isinstance(f, str) else f.name for f in pdf_folder if str(f.name if hasattr(f, 'name') else f).lower().endswith('.pdf')]
|
| 694 |
+
original_filename = "pdf_folder"
|
| 695 |
+
else:
|
| 696 |
+
pdf_input = pdf_folder if isinstance(pdf_folder, str) else pdf_folder.name
|
| 697 |
+
original_filename = pdf_input.split("/")[-1]
|
| 698 |
elif pdf_file:
|
| 699 |
if isinstance(pdf_file, list):
|
| 700 |
pdf_input = [f if isinstance(f, str) else f.name for f in pdf_file]
|
|
|
|
| 703 |
pdf_input = pdf_file if isinstance(pdf_file, str) else pdf_file.name
|
| 704 |
original_filename = pdf_input.split("/")[-1]
|
| 705 |
else:
|
| 706 |
+
yield None, None, None, None, "**Error:** Please upload PDF file(s) or a folder"
|
| 707 |
return
|
| 708 |
|
| 709 |
column_name = "PDF Pages"
|
|
|
|
| 727 |
)
|
| 728 |
|
| 729 |
elif input_type == "Images":
|
| 730 |
+
# Use folder if provided, otherwise use uploaded files
|
| 731 |
+
if image_folder:
|
| 732 |
+
if isinstance(image_folder, list):
|
| 733 |
+
image_input = [f if isinstance(f, str) else f.name for f in image_folder]
|
| 734 |
+
original_filename = "image_folder"
|
| 735 |
+
else:
|
| 736 |
+
image_input = image_folder if isinstance(image_folder, str) else image_folder.name
|
| 737 |
+
original_filename = image_input.split("/")[-1]
|
| 738 |
elif image_file:
|
| 739 |
if isinstance(image_file, list):
|
| 740 |
image_input = [f if isinstance(f, str) else f.name for f in image_file]
|
|
|
|
| 743 |
image_input = image_file if isinstance(image_file, str) else image_file.name
|
| 744 |
original_filename = image_input.split("/")[-1]
|
| 745 |
else:
|
| 746 |
+
yield None, None, None, None, "**Error:** Please upload image file(s) or a folder"
|
| 747 |
return
|
| 748 |
|
| 749 |
column_name = "Image Files"
|
|
|
|
| 849 |
|
| 850 |
|
| 851 |
def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
|
| 852 |
+
pdf_file, pdf_folder, pdf_description, pdf_mode,
|
| 853 |
+
image_file, image_folder, image_description,
|
| 854 |
model_tier, model, model_source_input, api_key_input,
|
| 855 |
progress=gr.Progress(track_tqdm=True)):
|
| 856 |
"""Extract categories then classify data with them."""
|
|
|
|
| 896 |
mode_param = None
|
| 897 |
|
| 898 |
elif input_type == "PDF Documents":
|
| 899 |
+
# Use folder if provided, otherwise use uploaded files
|
| 900 |
+
if pdf_folder:
|
| 901 |
+
if isinstance(pdf_folder, list):
|
| 902 |
+
input_data = [f if isinstance(f, str) else f.name for f in pdf_folder if str(f.name if hasattr(f, 'name') else f).lower().endswith('.pdf')]
|
| 903 |
+
original_filename = "pdf_folder"
|
| 904 |
+
else:
|
| 905 |
+
input_data = pdf_folder if isinstance(pdf_folder, str) else pdf_folder.name
|
| 906 |
+
original_filename = input_data.split("/")[-1]
|
| 907 |
elif pdf_file:
|
| 908 |
if isinstance(pdf_file, list):
|
| 909 |
input_data = [f if isinstance(f, str) else f.name for f in pdf_file]
|
|
|
|
| 912 |
input_data = pdf_file if isinstance(pdf_file, str) else pdf_file.name
|
| 913 |
original_filename = input_data.split("/")[-1]
|
| 914 |
else:
|
| 915 |
+
yield None, None, None, None, None, None, "**Error:** Please upload PDF file(s) or a folder"
|
| 916 |
return
|
| 917 |
|
| 918 |
column_name = "PDF Pages"
|
|
|
|
| 927 |
mode_param = mode_mapping.get(pdf_mode, "image")
|
| 928 |
|
| 929 |
elif input_type == "Images":
|
| 930 |
+
# Use folder if provided, otherwise use uploaded files
|
| 931 |
+
if image_folder:
|
| 932 |
+
if isinstance(image_folder, list):
|
| 933 |
+
input_data = [f if isinstance(f, str) else f.name for f in image_folder]
|
| 934 |
+
original_filename = "image_folder"
|
| 935 |
+
else:
|
| 936 |
+
input_data = image_folder if isinstance(image_folder, str) else image_folder.name
|
| 937 |
+
original_filename = input_data.split("/")[-1]
|
| 938 |
elif image_file:
|
| 939 |
if isinstance(image_file, list):
|
| 940 |
input_data = [f if isinstance(f, str) else f.name for f in image_file]
|
|
|
|
| 943 |
input_data = image_file if isinstance(image_file, str) else image_file.name
|
| 944 |
original_filename = input_data.split("/")[-1]
|
| 945 |
else:
|
| 946 |
+
yield None, None, None, None, None, None, "**Error:** Please upload image file(s) or a folder"
|
| 947 |
return
|
| 948 |
|
| 949 |
column_name = "Image Files"
|
|
|
|
| 1120 |
gr.update(visible=False), # image_input_group
|
| 1121 |
None, # spreadsheet_file
|
| 1122 |
gr.update(choices=[], value=None), # spreadsheet_column
|
| 1123 |
+
"Upload File(s)", # pdf_upload_type
|
| 1124 |
None, # pdf_file
|
| 1125 |
+
None, # pdf_folder
|
| 1126 |
"", # pdf_description
|
| 1127 |
"Image (visual documents)", # pdf_mode
|
| 1128 |
+
"Upload File(s)", # image_upload_type
|
| 1129 |
None, # image_file
|
| 1130 |
+
None, # image_folder
|
| 1131 |
"", # image_description
|
| 1132 |
None, # task_mode
|
| 1133 |
]
|
|
|
|
| 1230 |
|
| 1231 |
# PDF input group
|
| 1232 |
with gr.Group(visible=False) as pdf_input_group:
|
| 1233 |
+
pdf_upload_type = gr.Radio(
|
| 1234 |
+
choices=["Upload File(s)", "Upload Folder"],
|
| 1235 |
+
value="Upload File(s)",
|
| 1236 |
+
label="Upload Type"
|
| 1237 |
+
)
|
| 1238 |
pdf_file = gr.File(
|
| 1239 |
label="Upload PDF Document(s)",
|
| 1240 |
file_types=[".pdf"],
|
| 1241 |
file_count="multiple"
|
| 1242 |
)
|
| 1243 |
+
pdf_folder = gr.File(
|
| 1244 |
+
label="Upload PDF Folder",
|
| 1245 |
+
file_count="directory",
|
| 1246 |
+
visible=False
|
| 1247 |
)
|
| 1248 |
pdf_description = gr.Textbox(
|
| 1249 |
label="Document Description",
|
|
|
|
| 1258 |
|
| 1259 |
# Image input group
|
| 1260 |
with gr.Group(visible=False) as image_input_group:
|
| 1261 |
+
image_upload_type = gr.Radio(
|
| 1262 |
+
choices=["Upload File(s)", "Upload Folder"],
|
| 1263 |
+
value="Upload File(s)",
|
| 1264 |
+
label="Upload Type"
|
| 1265 |
+
)
|
| 1266 |
image_file = gr.File(
|
| 1267 |
label="Upload Images",
|
| 1268 |
file_types=["image"],
|
| 1269 |
file_count="multiple"
|
| 1270 |
)
|
| 1271 |
+
image_folder = gr.File(
|
| 1272 |
+
label="Upload Image Folder",
|
| 1273 |
+
file_count="directory",
|
| 1274 |
+
visible=False
|
| 1275 |
)
|
| 1276 |
image_description = gr.Textbox(
|
| 1277 |
label="Image Description",
|
|
|
|
| 1408 |
outputs=[spreadsheet_file, spreadsheet_column, status]
|
| 1409 |
)
|
| 1410 |
|
| 1411 |
+
# Toggle between file and folder upload for PDFs
|
| 1412 |
+
def toggle_pdf_upload(upload_type):
|
| 1413 |
+
if upload_type == "Upload File(s)":
|
| 1414 |
+
return gr.update(visible=True), gr.update(visible=False)
|
| 1415 |
+
else:
|
| 1416 |
+
return gr.update(visible=False), gr.update(visible=True)
|
| 1417 |
+
|
| 1418 |
+
pdf_upload_type.change(
|
| 1419 |
+
fn=toggle_pdf_upload,
|
| 1420 |
+
inputs=[pdf_upload_type],
|
| 1421 |
+
outputs=[pdf_file, pdf_folder]
|
| 1422 |
+
)
|
| 1423 |
+
|
| 1424 |
+
# Toggle between file and folder upload for Images
|
| 1425 |
+
def toggle_image_upload(upload_type):
|
| 1426 |
+
if upload_type == "Upload File(s)":
|
| 1427 |
+
return gr.update(visible=True), gr.update(visible=False)
|
| 1428 |
+
else:
|
| 1429 |
+
return gr.update(visible=False), gr.update(visible=True)
|
| 1430 |
+
|
| 1431 |
+
image_upload_type.change(
|
| 1432 |
+
fn=toggle_image_upload,
|
| 1433 |
+
inputs=[image_upload_type],
|
| 1434 |
+
outputs=[image_file, image_folder]
|
| 1435 |
+
)
|
| 1436 |
+
|
| 1437 |
add_category_btn.click(
|
| 1438 |
fn=add_category_field,
|
| 1439 |
inputs=[category_count],
|
|
|
|
| 1470 |
|
| 1471 |
# Main run button handler - dispatches based on task_mode
|
| 1472 |
def dispatch_run(task, input_type, spreadsheet_file, spreadsheet_column,
|
| 1473 |
+
pdf_file, pdf_folder_val, pdf_description, pdf_mode,
|
| 1474 |
+
image_file, image_folder_val, image_description,
|
| 1475 |
cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10,
|
| 1476 |
model_tier, model, model_source, api_key,
|
| 1477 |
progress=gr.Progress(track_tqdm=True)):
|
|
|
|
| 1479 |
if task == "extract":
|
| 1480 |
for update in run_extract_categories(
|
| 1481 |
input_type, spreadsheet_file, spreadsheet_column,
|
| 1482 |
+
pdf_file, pdf_folder_val, pdf_description, pdf_mode,
|
| 1483 |
+
image_file, image_folder_val, image_description,
|
| 1484 |
model_tier, model, model_source, api_key,
|
| 1485 |
progress
|
| 1486 |
):
|
|
|
|
| 1496 |
elif task == "assign":
|
| 1497 |
for update in run_classify_data(
|
| 1498 |
input_type, spreadsheet_file, spreadsheet_column,
|
| 1499 |
+
pdf_file, pdf_folder_val, pdf_description, pdf_mode,
|
| 1500 |
+
image_file, image_folder_val, image_description,
|
| 1501 |
cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10,
|
| 1502 |
model_tier, model, model_source, api_key,
|
| 1503 |
progress
|
|
|
|
| 1514 |
elif task == "extract_and_assign":
|
| 1515 |
for update in run_extract_and_assign(
|
| 1516 |
input_type, spreadsheet_file, spreadsheet_column,
|
| 1517 |
+
pdf_file, pdf_folder_val, pdf_description, pdf_mode,
|
| 1518 |
+
image_file, image_folder_val, image_description,
|
| 1519 |
model_tier, model, model_source, api_key,
|
| 1520 |
progress
|
| 1521 |
):
|
|
|
|
| 1533 |
run_btn.click(
|
| 1534 |
fn=dispatch_run,
|
| 1535 |
inputs=[task_mode, input_type, spreadsheet_file, spreadsheet_column,
|
| 1536 |
+
pdf_file, pdf_folder, pdf_description, pdf_mode,
|
| 1537 |
+
image_file, image_folder, image_description] + category_inputs + [model_tier, model, model_source, api_key],
|
| 1538 |
outputs=[extracted_categories, extract_download, distribution_plot, results, download_file, status]
|
| 1539 |
)
|
| 1540 |
|
|
|
|
| 1544 |
outputs=[
|
| 1545 |
input_type, text_input_group, pdf_input_group, image_input_group,
|
| 1546 |
spreadsheet_file, spreadsheet_column,
|
| 1547 |
+
pdf_upload_type, pdf_file, pdf_folder, pdf_description, pdf_mode,
|
| 1548 |
+
image_upload_type, image_file, image_folder, image_description,
|
| 1549 |
task_mode
|
| 1550 |
] + category_inputs + [
|
| 1551 |
add_category_btn, category_count,
|