chrissoria Claude commited on
Commit
02239e1
·
1 Parent(s): 028cf2d

Adjust extraction params for images (multiple categories per image)

Browse files

- Images: use fewer divisions (max 3) since each image yields multiple categories
- Images: increase categories_per_chunk to 12
- For 10 images: uses 2 divisions (5 images each) instead of erroring

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +16 -6
app.py CHANGED
@@ -589,9 +589,10 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
589
  yield None, None, "**Error:** Please upload image file(s) or a folder"
590
  return
591
 
592
- # Calculate sensible divisions based on input size
593
  num_items = len(image_input) if isinstance(image_input, list) else 1
594
- divisions = min(5, max(1, num_items // 3))
 
595
 
596
  result = catllm.extract(
597
  input_data=image_input,
@@ -600,7 +601,8 @@ def run_extract_categories(input_type, spreadsheet_file, spreadsheet_column,
600
  description=image_description or "images",
601
  user_model=model,
602
  model_source=model_source,
603
- divisions=divisions
 
604
  )
605
 
606
  else:
@@ -965,12 +967,19 @@ def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
965
  yield None, None, None, None, None, None, f"**Error:** Unknown input type: {input_type}"
966
  return
967
 
968
- # Calculate sensible divisions based on input size
969
  if isinstance(input_data, list):
970
  num_items = len(input_data)
971
  else:
972
  num_items = 1
973
- divisions = min(5, max(1, num_items // 3))
 
 
 
 
 
 
 
974
 
975
  # Extract categories
976
  extract_kwargs = {
@@ -980,7 +989,8 @@ def run_extract_and_assign(input_type, spreadsheet_file, spreadsheet_column,
980
  'description': description,
981
  'user_model': model,
982
  'model_source': model_source,
983
- 'divisions': divisions
 
984
  }
985
  if mode_param:
986
  extract_kwargs['mode'] = mode_param
 
589
  yield None, None, "**Error:** Please upload image file(s) or a folder"
590
  return
591
 
592
+ # For images, use fewer divisions since each image can have multiple categories
593
  num_items = len(image_input) if isinstance(image_input, list) else 1
594
+ # Use 1 division for small sets, max 3 for larger sets
595
+ divisions = min(3, max(1, num_items // 5))
596
 
597
  result = catllm.extract(
598
  input_data=image_input,
 
601
  description=image_description or "images",
602
  user_model=model,
603
  model_source=model_source,
604
+ divisions=divisions,
605
+ categories_per_chunk=12 # Images often have multiple categories each
606
  )
607
 
608
  else:
 
967
  yield None, None, None, None, None, None, f"**Error:** Unknown input type: {input_type}"
968
  return
969
 
970
+ # Calculate sensible divisions based on input size and type
971
  if isinstance(input_data, list):
972
  num_items = len(input_data)
973
  else:
974
  num_items = 1
975
+
976
+ # Images can have multiple categories per item, so use fewer divisions
977
+ if input_type_param == "image":
978
+ divisions = min(3, max(1, num_items // 5))
979
+ categories_per_chunk = 12
980
+ else:
981
+ divisions = min(5, max(1, num_items // 3))
982
+ categories_per_chunk = 10
983
 
984
  # Extract categories
985
  extract_kwargs = {
 
989
  'description': description,
990
  'user_model': model,
991
  'model_source': model_source,
992
+ 'divisions': divisions,
993
+ 'categories_per_chunk': categories_per_chunk
994
  }
995
  if mode_param:
996
  extract_kwargs['mode'] = mode_param