Spaces:
Running
Running
Commit
·
7a755b8
1
Parent(s):
6a41c27
Add file/directory path input option for PDFs and images
Browse files- Add text input for entering local file or directory paths
- Path input works as alternative to file upload
- Supports single files, multiple files, or directories
- Updated all processing functions to handle path input
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
app.py
CHANGED
|
@@ -486,8 +486,8 @@ def update_task_visibility(task):
|
|
| 486 |
|
| 487 |
|
| 488 |
def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
|
| 489 |
-
pdf_file, pdf_description, pdf_mode,
|
| 490 |
-
image_file, image_description,
|
| 491 |
model_tier, model, model_source_input, api_key_input,
|
| 492 |
progress=gr.Progress(track_tqdm=True)):
|
| 493 |
"""Extract categories from data and display them in a table."""
|
|
@@ -536,11 +536,18 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 536 |
)
|
| 537 |
|
| 538 |
elif input_type == "PDF Documents":
|
| 539 |
-
if
|
| 540 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
return
|
| 542 |
|
| 543 |
-
pdf_path = pdf_file if isinstance(pdf_file, str) else pdf_file.name
|
| 544 |
mode_mapping = {
|
| 545 |
"Image (visual documents)": "image",
|
| 546 |
"Text (text-heavy)": "text",
|
|
@@ -549,7 +556,7 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 549 |
actual_mode = mode_mapping.get(pdf_mode, "image")
|
| 550 |
|
| 551 |
result = catllm.extract(
|
| 552 |
-
input_data=
|
| 553 |
api_key=actual_api_key,
|
| 554 |
input_type="pdf",
|
| 555 |
description=pdf_description or "document",
|
|
@@ -559,18 +566,20 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 559 |
)
|
| 560 |
|
| 561 |
elif input_type == "Images":
|
| 562 |
-
if
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
|
|
|
| 569 |
else:
|
| 570 |
-
|
|
|
|
| 571 |
|
| 572 |
result = catllm.extract(
|
| 573 |
-
input_data=
|
| 574 |
api_key=actual_api_key,
|
| 575 |
input_type="image",
|
| 576 |
description=image_description or "images",
|
|
@@ -611,8 +620,8 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 611 |
|
| 612 |
|
| 613 |
def run_classify_data(input_type, spreadsheet_file, spreadsheet_column,
|
| 614 |
-
pdf_file, pdf_description, pdf_mode,
|
| 615 |
-
image_file, image_description,
|
| 616 |
cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10,
|
| 617 |
model_tier, model, model_source_input, api_key_input,
|
| 618 |
progress=gr.Progress(track_tqdm=True)):
|
|
@@ -672,12 +681,21 @@ def run_classify_data(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 672 |
)
|
| 673 |
|
| 674 |
elif input_type == "PDF Documents":
|
| 675 |
-
if
|
| 676 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 677 |
return
|
| 678 |
|
| 679 |
-
pdf_path = pdf_file if isinstance(pdf_file, str) else pdf_file.name
|
| 680 |
-
original_filename = pdf_path.split("/")[-1]
|
| 681 |
column_name = "PDF Pages"
|
| 682 |
|
| 683 |
mode_mapping = {
|
|
@@ -688,7 +706,7 @@ def run_classify_data(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 688 |
actual_mode = mode_mapping.get(pdf_mode, "image")
|
| 689 |
|
| 690 |
result = catllm.classify(
|
| 691 |
-
input_data=
|
| 692 |
categories=categories,
|
| 693 |
api_key=actual_api_key,
|
| 694 |
input_type="pdf",
|
|
@@ -699,20 +717,25 @@ def run_classify_data(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 699 |
)
|
| 700 |
|
| 701 |
elif input_type == "Images":
|
| 702 |
-
if
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 708 |
else:
|
| 709 |
-
|
|
|
|
| 710 |
|
| 711 |
-
original_filename = "images"
|
| 712 |
column_name = "Image Files"
|
| 713 |
|
| 714 |
result = catllm.classify(
|
| 715 |
-
input_data=
|
| 716 |
categories=categories,
|
| 717 |
api_key=actual_api_key,
|
| 718 |
input_type="image",
|
|
@@ -812,8 +835,8 @@ Provide your work in JSON format where the number belonging to each category is
|
|
| 812 |
|
| 813 |
|
| 814 |
def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
|
| 815 |
-
pdf_file, pdf_description, pdf_mode,
|
| 816 |
-
image_file, image_description,
|
| 817 |
model_tier, model, model_source_input, api_key_input,
|
| 818 |
progress=gr.Progress(track_tqdm=True)):
|
| 819 |
"""Extract categories then classify data with them."""
|
|
@@ -859,13 +882,21 @@ def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 859 |
mode_param = None
|
| 860 |
|
| 861 |
elif input_type == "PDF Documents":
|
| 862 |
-
if
|
| 863 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 864 |
return
|
| 865 |
|
| 866 |
-
pdf_path = pdf_file if isinstance(pdf_file, str) else pdf_file.name
|
| 867 |
-
input_data = pdf_path
|
| 868 |
-
original_filename = pdf_path.split("/")[-1]
|
| 869 |
column_name = "PDF Pages"
|
| 870 |
input_type_param = "pdf"
|
| 871 |
description = pdf_description or "document"
|
|
@@ -878,16 +909,21 @@ def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 878 |
mode_param = mode_mapping.get(pdf_mode, "image")
|
| 879 |
|
| 880 |
elif input_type == "Images":
|
| 881 |
-
if
|
| 882 |
-
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 887 |
else:
|
| 888 |
-
|
|
|
|
| 889 |
|
| 890 |
-
original_filename = "images"
|
| 891 |
column_name = "Image Files"
|
| 892 |
input_type_param = "image"
|
| 893 |
description = image_description or "images"
|
|
@@ -1063,9 +1099,11 @@ def reset_all():
|
|
| 1063 |
None, # spreadsheet_file
|
| 1064 |
gr.update(choices=[], value=None), # spreadsheet_column
|
| 1065 |
None, # pdf_file
|
|
|
|
| 1066 |
"", # pdf_description
|
| 1067 |
"Image (visual documents)", # pdf_mode
|
| 1068 |
None, # image_file
|
|
|
|
| 1069 |
"", # image_description
|
| 1070 |
None, # task_mode
|
| 1071 |
]
|
|
@@ -1169,8 +1207,14 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1169 |
# PDF input group
|
| 1170 |
with gr.Group(visible=False) as pdf_input_group:
|
| 1171 |
pdf_file = gr.File(
|
| 1172 |
-
label="Upload PDF Document",
|
| 1173 |
-
file_types=[".pdf"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1174 |
)
|
| 1175 |
pdf_description = gr.Textbox(
|
| 1176 |
label="Document Description",
|
|
@@ -1190,6 +1234,11 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1190 |
file_types=["image"],
|
| 1191 |
file_count="multiple"
|
| 1192 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1193 |
image_description = gr.Textbox(
|
| 1194 |
label="Image Description",
|
| 1195 |
placeholder="e.g., 'product photos', 'social media posts'",
|
|
@@ -1361,8 +1410,8 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1361 |
|
| 1362 |
# Main run button handler - dispatches based on task_mode
|
| 1363 |
def dispatch_run(task, input_type, spreadsheet_file, spreadsheet_column,
|
| 1364 |
-
pdf_file, pdf_description, pdf_mode,
|
| 1365 |
-
image_file, image_description,
|
| 1366 |
cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10,
|
| 1367 |
model_tier, model, model_source, api_key,
|
| 1368 |
progress=gr.Progress(track_tqdm=True)):
|
|
@@ -1370,8 +1419,8 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1370 |
if task == "extract":
|
| 1371 |
for update in run_extract_categories(
|
| 1372 |
input_type, spreadsheet_file, spreadsheet_column,
|
| 1373 |
-
pdf_file, pdf_description, pdf_mode,
|
| 1374 |
-
image_file, image_description,
|
| 1375 |
model_tier, model, model_source, api_key,
|
| 1376 |
progress
|
| 1377 |
):
|
|
@@ -1387,8 +1436,8 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1387 |
elif task == "assign":
|
| 1388 |
for update in run_classify_data(
|
| 1389 |
input_type, spreadsheet_file, spreadsheet_column,
|
| 1390 |
-
pdf_file, pdf_description, pdf_mode,
|
| 1391 |
-
image_file, image_description,
|
| 1392 |
cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10,
|
| 1393 |
model_tier, model, model_source, api_key,
|
| 1394 |
progress
|
|
@@ -1405,8 +1454,8 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1405 |
elif task == "extract_and_assign":
|
| 1406 |
for update in run_extract_and_assign(
|
| 1407 |
input_type, spreadsheet_file, spreadsheet_column,
|
| 1408 |
-
pdf_file, pdf_description, pdf_mode,
|
| 1409 |
-
image_file, image_description,
|
| 1410 |
model_tier, model, model_source, api_key,
|
| 1411 |
progress
|
| 1412 |
):
|
|
@@ -1424,8 +1473,8 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1424 |
run_btn.click(
|
| 1425 |
fn=dispatch_run,
|
| 1426 |
inputs=[task_mode, input_type, spreadsheet_file, spreadsheet_column,
|
| 1427 |
-
pdf_file, pdf_description, pdf_mode,
|
| 1428 |
-
image_file, image_description] + category_inputs + [model_tier, model, model_source, api_key],
|
| 1429 |
outputs=[extracted_categories, extract_download, distribution_plot, results, download_file, status]
|
| 1430 |
)
|
| 1431 |
|
|
@@ -1435,8 +1484,8 @@ Soria, C. (2025). CatLLM: A Python package for LLM-based text classification. DO
|
|
| 1435 |
outputs=[
|
| 1436 |
input_type, text_input_group, pdf_input_group, image_input_group,
|
| 1437 |
spreadsheet_file, spreadsheet_column,
|
| 1438 |
-
pdf_file, pdf_description, pdf_mode,
|
| 1439 |
-
image_file, image_description,
|
| 1440 |
task_mode
|
| 1441 |
] + category_inputs + [
|
| 1442 |
add_category_btn, category_count,
|
|
|
|
| 486 |
|
| 487 |
|
| 488 |
def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
|
| 489 |
+
pdf_file, pdf_path, pdf_description, pdf_mode,
|
| 490 |
+
image_file, image_path, image_description,
|
| 491 |
model_tier, model, model_source_input, api_key_input,
|
| 492 |
progress=gr.Progress(track_tqdm=True)):
|
| 493 |
"""Extract categories from data and display them in a table."""
|
|
|
|
| 536 |
)
|
| 537 |
|
| 538 |
elif input_type == "PDF Documents":
|
| 539 |
+
# Use path if provided, otherwise use uploaded file
|
| 540 |
+
if pdf_path and pdf_path.strip():
|
| 541 |
+
pdf_input = pdf_path.strip()
|
| 542 |
+
elif pdf_file:
|
| 543 |
+
if isinstance(pdf_file, list):
|
| 544 |
+
pdf_input = [f if isinstance(f, str) else f.name for f in pdf_file]
|
| 545 |
+
else:
|
| 546 |
+
pdf_input = pdf_file if isinstance(pdf_file, str) else pdf_file.name
|
| 547 |
+
else:
|
| 548 |
+
yield None, None, "**Error:** Please upload a PDF file or enter a path"
|
| 549 |
return
|
| 550 |
|
|
|
|
| 551 |
mode_mapping = {
|
| 552 |
"Image (visual documents)": "image",
|
| 553 |
"Text (text-heavy)": "text",
|
|
|
|
| 556 |
actual_mode = mode_mapping.get(pdf_mode, "image")
|
| 557 |
|
| 558 |
result = catllm.extract(
|
| 559 |
+
input_data=pdf_input,
|
| 560 |
api_key=actual_api_key,
|
| 561 |
input_type="pdf",
|
| 562 |
description=pdf_description or "document",
|
|
|
|
| 566 |
)
|
| 567 |
|
| 568 |
elif input_type == "Images":
|
| 569 |
+
# Use path if provided, otherwise use uploaded file
|
| 570 |
+
if image_path and image_path.strip():
|
| 571 |
+
image_input = image_path.strip()
|
| 572 |
+
elif image_file:
|
| 573 |
+
if isinstance(image_file, list):
|
| 574 |
+
image_input = [f if isinstance(f, str) else f.name for f in image_file]
|
| 575 |
+
else:
|
| 576 |
+
image_input = image_file if isinstance(image_file, str) else image_file.name
|
| 577 |
else:
|
| 578 |
+
yield None, None, "**Error:** Please upload image files or enter a path"
|
| 579 |
+
return
|
| 580 |
|
| 581 |
result = catllm.extract(
|
| 582 |
+
input_data=image_input,
|
| 583 |
api_key=actual_api_key,
|
| 584 |
input_type="image",
|
| 585 |
description=image_description or "images",
|
|
|
|
| 620 |
|
| 621 |
|
| 622 |
def run_classify_data(input_type, spreadsheet_file, spreadsheet_column,
|
| 623 |
+
pdf_file, pdf_path, pdf_description, pdf_mode,
|
| 624 |
+
image_file, image_path, image_description,
|
| 625 |
cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10,
|
| 626 |
model_tier, model, model_source_input, api_key_input,
|
| 627 |
progress=gr.Progress(track_tqdm=True)):
|
|
|
|
| 681 |
)
|
| 682 |
|
| 683 |
elif input_type == "PDF Documents":
|
| 684 |
+
# Use path if provided, otherwise use uploaded file
|
| 685 |
+
if pdf_path and pdf_path.strip():
|
| 686 |
+
pdf_input = pdf_path.strip()
|
| 687 |
+
original_filename = pdf_input.split("/")[-1]
|
| 688 |
+
elif pdf_file:
|
| 689 |
+
if isinstance(pdf_file, list):
|
| 690 |
+
pdf_input = [f if isinstance(f, str) else f.name for f in pdf_file]
|
| 691 |
+
original_filename = "multiple_pdfs"
|
| 692 |
+
else:
|
| 693 |
+
pdf_input = pdf_file if isinstance(pdf_file, str) else pdf_file.name
|
| 694 |
+
original_filename = pdf_input.split("/")[-1]
|
| 695 |
+
else:
|
| 696 |
+
yield None, None, None, None, "**Error:** Please upload a PDF file or enter a path"
|
| 697 |
return
|
| 698 |
|
|
|
|
|
|
|
| 699 |
column_name = "PDF Pages"
|
| 700 |
|
| 701 |
mode_mapping = {
|
|
|
|
| 706 |
actual_mode = mode_mapping.get(pdf_mode, "image")
|
| 707 |
|
| 708 |
result = catllm.classify(
|
| 709 |
+
input_data=pdf_input,
|
| 710 |
categories=categories,
|
| 711 |
api_key=actual_api_key,
|
| 712 |
input_type="pdf",
|
|
|
|
| 717 |
)
|
| 718 |
|
| 719 |
elif input_type == "Images":
|
| 720 |
+
# Use path if provided, otherwise use uploaded file
|
| 721 |
+
if image_path and image_path.strip():
|
| 722 |
+
image_input = image_path.strip()
|
| 723 |
+
original_filename = image_input.split("/")[-1]
|
| 724 |
+
elif image_file:
|
| 725 |
+
if isinstance(image_file, list):
|
| 726 |
+
image_input = [f if isinstance(f, str) else f.name for f in image_file]
|
| 727 |
+
original_filename = "multiple_images"
|
| 728 |
+
else:
|
| 729 |
+
image_input = image_file if isinstance(image_file, str) else image_file.name
|
| 730 |
+
original_filename = image_input.split("/")[-1]
|
| 731 |
else:
|
| 732 |
+
yield None, None, None, None, "**Error:** Please upload image files or enter a path"
|
| 733 |
+
return
|
| 734 |
|
|
|
|
| 735 |
column_name = "Image Files"
|
| 736 |
|
| 737 |
result = catllm.classify(
|
| 738 |
+
input_data=image_input,
|
| 739 |
categories=categories,
|
| 740 |
api_key=actual_api_key,
|
| 741 |
input_type="image",
|
|
|
|
| 835 |
|
| 836 |
|
| 837 |
def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
|
| 838 |
+
pdf_file, pdf_path, pdf_description, pdf_mode,
|
| 839 |
+
image_file, image_path, image_description,
|
| 840 |
model_tier, model, model_source_input, api_key_input,
|
| 841 |
progress=gr.Progress(track_tqdm=True)):
|
| 842 |
"""Extract categories then classify data with them."""
|
|
|
|
| 882 |
mode_param = None
|
| 883 |
|
| 884 |
elif input_type == "PDF Documents":
|
| 885 |
+
# Use path if provided, otherwise use uploaded file
|
| 886 |
+
if pdf_path and pdf_path.strip():
|
| 887 |
+
input_data = pdf_path.strip()
|
| 888 |
+
original_filename = input_data.split("/")[-1]
|
| 889 |
+
elif pdf_file:
|
| 890 |
+
if isinstance(pdf_file, list):
|
| 891 |
+
input_data = [f if isinstance(f, str) else f.name for f in pdf_file]
|
| 892 |
+
original_filename = "multiple_pdfs"
|
| 893 |
+
else:
|
| 894 |
+
input_data = pdf_file if isinstance(pdf_file, str) else pdf_file.name
|
| 895 |
+
original_filename = input_data.split("/")[-1]
|
| 896 |
+
else:
|
| 897 |
+
yield None, None, None, None, None, None, "**Error:** Please upload a PDF file or enter a path"
|
| 898 |
return
|
| 899 |
|
|
|
|
|
|
|
|
|
|
| 900 |
column_name = "PDF Pages"
|
| 901 |
input_type_param = "pdf"
|
| 902 |
description = pdf_description or "document"
|
|
|
|
| 909 |
mode_param = mode_mapping.get(pdf_mode, "image")
|
| 910 |
|
| 911 |
elif input_type == "Images":
|
| 912 |
+
# Use path if provided, otherwise use uploaded file
|
| 913 |
+
if image_path and image_path.strip():
|
| 914 |
+
input_data = image_path.strip()
|
| 915 |
+
original_filename = input_data.split("/")[-1]
|
| 916 |
+
elif image_file:
|
| 917 |
+
if isinstance(image_file, list):
|
| 918 |
+
input_data = [f if isinstance(f, str) else f.name for f in image_file]
|
| 919 |
+
original_filename = "multiple_images"
|
| 920 |
+
else:
|
| 921 |
+
input_data = image_file if isinstance(image_file, str) else image_file.name
|
| 922 |
+
original_filename = input_data.split("/")[-1]
|
| 923 |
else:
|
| 924 |
+
yield None, None, None, None, None, None, "**Error:** Please upload image files or enter a path"
|
| 925 |
+
return
|
| 926 |
|
|
|
|
| 927 |
column_name = "Image Files"
|
| 928 |
input_type_param = "image"
|
| 929 |
description = image_description or "images"
|
|
|
|
| 1099 |
None, # spreadsheet_file
|
| 1100 |
gr.update(choices=[], value=None), # spreadsheet_column
|
| 1101 |
None, # pdf_file
|
| 1102 |
+
"", # pdf_path
|
| 1103 |
"", # pdf_description
|
| 1104 |
"Image (visual documents)", # pdf_mode
|
| 1105 |
None, # image_file
|
| 1106 |
+
"", # image_path
|
| 1107 |
"", # image_description
|
| 1108 |
None, # task_mode
|
| 1109 |
]
|
|
|
|
| 1207 |
# PDF input group
|
| 1208 |
with gr.Group(visible=False) as pdf_input_group:
|
| 1209 |
pdf_file = gr.File(
|
| 1210 |
+
label="Upload PDF Document(s)",
|
| 1211 |
+
file_types=[".pdf"],
|
| 1212 |
+
file_count="multiple"
|
| 1213 |
+
)
|
| 1214 |
+
pdf_path = gr.Textbox(
|
| 1215 |
+
label="Or Enter File/Directory Path",
|
| 1216 |
+
placeholder="e.g., /path/to/documents/ or /path/to/file.pdf",
|
| 1217 |
+
info="Local path to PDF file or directory containing PDFs"
|
| 1218 |
)
|
| 1219 |
pdf_description = gr.Textbox(
|
| 1220 |
label="Document Description",
|
|
|
|
| 1234 |
file_types=["image"],
|
| 1235 |
file_count="multiple"
|
| 1236 |
)
|
| 1237 |
+
image_path = gr.Textbox(
|
| 1238 |
+
label="Or Enter File/Directory Path",
|
| 1239 |
+
placeholder="e.g., /path/to/images/ or /path/to/image.jpg",
|
| 1240 |
+
info="Local path to image file or directory containing images"
|
| 1241 |
+
)
|
| 1242 |
image_description = gr.Textbox(
|
| 1243 |
label="Image Description",
|
| 1244 |
placeholder="e.g., 'product photos', 'social media posts'",
|
|
|
|
| 1410 |
|
| 1411 |
# Main run button handler - dispatches based on task_mode
|
| 1412 |
def dispatch_run(task, input_type, spreadsheet_file, spreadsheet_column,
|
| 1413 |
+
pdf_file, pdf_path_val, pdf_description, pdf_mode,
|
| 1414 |
+
image_file, image_path_val, image_description,
|
| 1415 |
cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10,
|
| 1416 |
model_tier, model, model_source, api_key,
|
| 1417 |
progress=gr.Progress(track_tqdm=True)):
|
|
|
|
| 1419 |
if task == "extract":
|
| 1420 |
for update in run_extract_categories(
|
| 1421 |
input_type, spreadsheet_file, spreadsheet_column,
|
| 1422 |
+
pdf_file, pdf_path_val, pdf_description, pdf_mode,
|
| 1423 |
+
image_file, image_path_val, image_description,
|
| 1424 |
model_tier, model, model_source, api_key,
|
| 1425 |
progress
|
| 1426 |
):
|
|
|
|
| 1436 |
elif task == "assign":
|
| 1437 |
for update in run_classify_data(
|
| 1438 |
input_type, spreadsheet_file, spreadsheet_column,
|
| 1439 |
+
pdf_file, pdf_path_val, pdf_description, pdf_mode,
|
| 1440 |
+
image_file, image_path_val, image_description,
|
| 1441 |
cat1, cat2, cat3, cat4, cat5, cat6, cat7, cat8, cat9, cat10,
|
| 1442 |
model_tier, model, model_source, api_key,
|
| 1443 |
progress
|
|
|
|
| 1454 |
elif task == "extract_and_assign":
|
| 1455 |
for update in run_extract_and_assign(
|
| 1456 |
input_type, spreadsheet_file, spreadsheet_column,
|
| 1457 |
+
pdf_file, pdf_path_val, pdf_description, pdf_mode,
|
| 1458 |
+
image_file, image_path_val, image_description,
|
| 1459 |
model_tier, model, model_source, api_key,
|
| 1460 |
progress
|
| 1461 |
):
|
|
|
|
| 1473 |
run_btn.click(
|
| 1474 |
fn=dispatch_run,
|
| 1475 |
inputs=[task_mode, input_type, spreadsheet_file, spreadsheet_column,
|
| 1476 |
+
pdf_file, pdf_path, pdf_description, pdf_mode,
|
| 1477 |
+
image_file, image_path, image_description] + category_inputs + [model_tier, model, model_source, api_key],
|
| 1478 |
outputs=[extracted_categories, extract_download, distribution_plot, results, download_file, status]
|
| 1479 |
)
|
| 1480 |
|
|
|
|
| 1484 |
outputs=[
|
| 1485 |
input_type, text_input_group, pdf_input_group, image_input_group,
|
| 1486 |
spreadsheet_file, spreadsheet_column,
|
| 1487 |
+
pdf_file, pdf_path, pdf_description, pdf_mode,
|
| 1488 |
+
image_file, image_path, image_description,
|
| 1489 |
task_mode
|
| 1490 |
] + category_inputs + [
|
| 1491 |
add_category_btn, category_count,
|