Spaces:

CatLLM
/

survey-classifier

Running

chrissoria Claude commited on 14 days ago

Commit

02239e1

1 Parent(s): 028cf2d

Adjust extraction params for images (multiple categories per image)

- Images: use fewer divisions (max 3) since each image yields multiple categories
- Images: increase categories_per_chunk to 12
- For 10 images: uses 2 divisions (5 images each) instead of erroring

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show

app.py +16 -6

app.py CHANGED Viewed

@@ -589,9 +589,10 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
                 yield None, None, "**Error:** Please upload image file(s) or a folder"
                 return
-            # Calculate sensible divisions based on input size
             num_items = len(image_input) if isinstance(image_input, list) else 1
-            divisions = min(5, max(1, num_items // 3))
             result = catllm.extract(
                 input_data=image_input,
@@ -600,7 +601,8 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
                 description=image_description or "images",
                 user_model=model,
                 model_source=model_source,
-                divisions=divisions
             )
         else:
@@ -965,12 +967,19 @@ def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
             yield None, None, None, None, None, None, f"**Error:** Unknown input type: {input_type}"
             return
-        # Calculate sensible divisions based on input size
         if isinstance(input_data, list):
             num_items = len(input_data)
         else:
             num_items = 1
-        divisions = min(5, max(1, num_items // 3))
         # Extract categories
         extract_kwargs = {
@@ -980,7 +989,8 @@ def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
             'description': description,
             'user_model': model,
             'model_source': model_source,
-            'divisions': divisions
         }
         if mode_param:
             extract_kwargs['mode'] = mode_param

                 yield None, None, "**Error:** Please upload image file(s) or a folder"
                 return
+            # For images, use fewer divisions since each image can have multiple categories
             num_items = len(image_input) if isinstance(image_input, list) else 1
+            # Use 1 division for small sets, max 3 for larger sets
+            divisions = min(3, max(1, num_items // 5))
             result = catllm.extract(
                 input_data=image_input,
                 description=image_description or "images",
                 user_model=model,
                 model_source=model_source,
+                divisions=divisions,
+                categories_per_chunk=12  # Images often have multiple categories each
             )
         else:
             yield None, None, None, None, None, None, f"**Error:** Unknown input type: {input_type}"
             return
+        # Calculate sensible divisions based on input size and type
         if isinstance(input_data, list):
             num_items = len(input_data)
         else:
             num_items = 1
+        # Images can have multiple categories per item, so use fewer divisions
+        if input_type_param == "image":
+            divisions = min(3, max(1, num_items // 5))
+            categories_per_chunk = 12
+        else:
+            divisions = min(5, max(1, num_items // 3))
+            categories_per_chunk = 10
         # Extract categories
         extract_kwargs = {
             'description': description,
             'user_model': model,
             'model_source': model_source,
+            'divisions': divisions,
+            'categories_per_chunk': categories_per_chunk
         }
         if mode_param:
             extract_kwargs['mode'] = mode_param