Spaces:
Running
Running
Commit
·
02239e1
1
Parent(s):
028cf2d
Adjust extraction params for images (multiple categories per image)
Browse files- Images: use fewer divisions (max 3) since each image yields multiple categories
- Images: increase categories_per_chunk to 12
- For 10 images: uses 2 divisions (5 images each) instead of erroring
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
app.py
CHANGED
|
@@ -589,9 +589,10 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 589 |
yield None, None, "**Error:** Please upload image file(s) or a folder"
|
| 590 |
return
|
| 591 |
|
| 592 |
-
#
|
| 593 |
num_items = len(image_input) if isinstance(image_input, list) else 1
|
| 594 |
-
|
|
|
|
| 595 |
|
| 596 |
result = catllm.extract(
|
| 597 |
input_data=image_input,
|
|
@@ -600,7 +601,8 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 600 |
description=image_description or "images",
|
| 601 |
user_model=model,
|
| 602 |
model_source=model_source,
|
| 603 |
-
divisions=divisions
|
|
|
|
| 604 |
)
|
| 605 |
|
| 606 |
else:
|
|
@@ -965,12 +967,19 @@ def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 965 |
yield None, None, None, None, None, None, f"**Error:** Unknown input type: {input_type}"
|
| 966 |
return
|
| 967 |
|
| 968 |
-
# Calculate sensible divisions based on input size
|
| 969 |
if isinstance(input_data, list):
|
| 970 |
num_items = len(input_data)
|
| 971 |
else:
|
| 972 |
num_items = 1
|
| 973 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 974 |
|
| 975 |
# Extract categories
|
| 976 |
extract_kwargs = {
|
|
@@ -980,7 +989,8 @@ def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
|
|
| 980 |
'description': description,
|
| 981 |
'user_model': model,
|
| 982 |
'model_source': model_source,
|
| 983 |
-
'divisions': divisions
|
|
|
|
| 984 |
}
|
| 985 |
if mode_param:
|
| 986 |
extract_kwargs['mode'] = mode_param
|
|
|
|
| 589 |
yield None, None, "**Error:** Please upload image file(s) or a folder"
|
| 590 |
return
|
| 591 |
|
| 592 |
+
# For images, use fewer divisions since each image can have multiple categories
|
| 593 |
num_items = len(image_input) if isinstance(image_input, list) else 1
|
| 594 |
+
# Use 1 division for small sets, max 3 for larger sets
|
| 595 |
+
divisions = min(3, max(1, num_items // 5))
|
| 596 |
|
| 597 |
result = catllm.extract(
|
| 598 |
input_data=image_input,
|
|
|
|
| 601 |
description=image_description or "images",
|
| 602 |
user_model=model,
|
| 603 |
model_source=model_source,
|
| 604 |
+
divisions=divisions,
|
| 605 |
+
categories_per_chunk=12 # Images often have multiple categories each
|
| 606 |
)
|
| 607 |
|
| 608 |
else:
|
|
|
|
| 967 |
yield None, None, None, None, None, None, f"**Error:** Unknown input type: {input_type}"
|
| 968 |
return
|
| 969 |
|
| 970 |
+
# Calculate sensible divisions based on input size and type
|
| 971 |
if isinstance(input_data, list):
|
| 972 |
num_items = len(input_data)
|
| 973 |
else:
|
| 974 |
num_items = 1
|
| 975 |
+
|
| 976 |
+
# Images can have multiple categories per item, so use fewer divisions
|
| 977 |
+
if input_type_param == "image":
|
| 978 |
+
divisions = min(3, max(1, num_items // 5))
|
| 979 |
+
categories_per_chunk = 12
|
| 980 |
+
else:
|
| 981 |
+
divisions = min(5, max(1, num_items // 3))
|
| 982 |
+
categories_per_chunk = 10
|
| 983 |
|
| 984 |
# Extract categories
|
| 985 |
extract_kwargs = {
|
|
|
|
| 989 |
'description': description,
|
| 990 |
'user_model': model,
|
| 991 |
'model_source': model_source,
|
| 992 |
+
'divisions': divisions,
|
| 993 |
+
'categories_per_chunk': categories_per_chunk
|
| 994 |
}
|
| 995 |
if mode_param:
|
| 996 |
extract_kwargs['mode'] = mode_param
|