Spaces:

CatLLM
/

survey-classifier

Running

chrissoria Claude commited on 21 days ago

Commit

028cf2d

1 Parent(s): ae1b24b

Auto-adjust divisions parameter based on input size

- Calculate sensible divisions: min(5, max(1, num_items // 3))
- Prevents "Cannot extract N categories from chunks of only M items" error
- For 10 images: uses 3 divisions instead of default 5

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show

app.py +21 -3

app.py CHANGED Viewed

@@ -558,6 +558,10 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
             }
             actual_mode = mode_mapping.get(pdf_mode, "image")
             result = catllm.extract(
                 input_data=pdf_input,
                 api_key=actual_api_key,
@@ -565,7 +569,8 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
                 description=pdf_description or "document",
                 mode=actual_mode,
                 user_model=model,
-                model_source=model_source
             )
         elif input_type == "Images":
@@ -584,13 +589,18 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
                 yield None, None, "**Error:** Please upload image file(s) or a folder"
                 return
             result = catllm.extract(
                 input_data=image_input,
                 api_key=actual_api_key,
                 input_type="image",
                 description=image_description or "images",
                 user_model=model,
-                model_source=model_source
             )
         else:
@@ -955,6 +965,13 @@ def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
             yield None, None, None, None, None, None, f"**Error:** Unknown input type: {input_type}"
             return
         # Extract categories
         extract_kwargs = {
             'input_data': input_data,
@@ -962,7 +979,8 @@ def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
             'input_type': input_type_param,
             'description': description,
             'user_model': model,
-            'model_source': model_source
         }
         if mode_param:
             extract_kwargs['mode'] = mode_param

             }
             actual_mode = mode_mapping.get(pdf_mode, "image")
+            # Calculate sensible divisions based on input size
+            num_items = len(pdf_input) if isinstance(pdf_input, list) else 1
+            divisions = min(5, max(1, num_items // 3))
             result = catllm.extract(
                 input_data=pdf_input,
                 api_key=actual_api_key,
                 description=pdf_description or "document",
                 mode=actual_mode,
                 user_model=model,
+                model_source=model_source,
+                divisions=divisions
             )
         elif input_type == "Images":
                 yield None, None, "**Error:** Please upload image file(s) or a folder"
                 return
+            # Calculate sensible divisions based on input size
+            num_items = len(image_input) if isinstance(image_input, list) else 1
+            divisions = min(5, max(1, num_items // 3))
             result = catllm.extract(
                 input_data=image_input,
                 api_key=actual_api_key,
                 input_type="image",
                 description=image_description or "images",
                 user_model=model,
+                model_source=model_source,
+                divisions=divisions
             )
         else:
             yield None, None, None, None, None, None, f"**Error:** Unknown input type: {input_type}"
             return
+        # Calculate sensible divisions based on input size
+        if isinstance(input_data, list):
+            num_items = len(input_data)
+        else:
+            num_items = 1
+        divisions = min(5, max(1, num_items // 3))
         # Extract categories
         extract_kwargs = {
             'input_data': input_data,
             'input_type': input_type_param,
             'description': description,
             'user_model': model,
+            'model_source': model_source,
+            'divisions': divisions
         }
         if mode_param:
             extract_kwargs['mode'] = mode_param