AnseMin commited on
Commit
dbc3034
·
1 Parent(s): 932831a

Error: Error processing document with GOT-OCR: 'QWenTokenizer' object has no attribute 'startswith'

Browse files
Files changed (1) hide show
  1. src/parsers/got_ocr_parser.py +17 -25
src/parsers/got_ocr_parser.py CHANGED
@@ -155,13 +155,13 @@ class GotOcrParser(DocumentParser):
155
  if hasattr(torch.amp, 'autocast'):
156
  with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
157
  try:
158
- # Try first as third positional argument (based on example code)
159
  return original_chat(self, tokenizer, image_path, ocr_type_val, **kwargs_copy)
160
  except TypeError as e:
161
- logger.warning(f"First call approach failed: {e}, trying with keyword argument")
162
  try:
163
- # Try again with keyword argument
164
- return original_chat(self, tokenizer, image_path, ocr_type=ocr_type_val, **kwargs_copy)
165
  except Exception as e2:
166
  logger.warning(f"Second call approach also failed: {e2}")
167
  # Fall back to just passing the original arguments unchanged
@@ -175,12 +175,13 @@ class GotOcrParser(DocumentParser):
175
  else:
176
  # Same approach without autocast
177
  try:
178
- # Try first as third positional argument
179
  return original_chat(self, tokenizer, image_path, ocr_type_val, **kwargs_copy)
180
- except TypeError:
181
- # Try again with keyword argument
182
  try:
183
- return original_chat(self, tokenizer, image_path, ocr_type=ocr_type_val, **kwargs_copy)
 
184
  except:
185
  # Fall back to just passing the original arguments unchanged
186
  return original_chat(self, tokenizer, image_path, **kwargs)
@@ -381,12 +382,12 @@ class GotOcrParser(DocumentParser):
381
  # Use the model's chat method as shown in the documentation
382
  logger.info(f"Processing image with GOT-OCR: {file_path}")
383
  try:
384
- # Use the updated patched method with ocr_type as a keyword arg
385
  logger.info(f"Using OCR method: {ocr_type}")
386
  result = self._model.chat(
387
  self._tokenizer,
388
  str(file_path),
389
- ocr_type=ocr_type
390
  )
391
  except RuntimeError as e:
392
  if "bfloat16" in str(e) or "BFloat16" in str(e):
@@ -398,21 +399,12 @@ class GotOcrParser(DocumentParser):
398
  old_dtype = torch.get_default_dtype()
399
  torch.set_default_dtype(torch.float16)
400
 
401
- # Try with positional argument first based on documentation
402
- try:
403
- logger.info("Trying positional ocr_type parameter")
404
- result = self._model.chat(
405
- self._tokenizer,
406
- str(file_path),
407
- ocr_type
408
- )
409
- except Exception as inner_e:
410
- logger.warning(f"Positional parameter failed: {inner_e}, trying keyword")
411
- result = self._model.chat(
412
- self._tokenizer,
413
- str(file_path),
414
- ocr_type=ocr_type
415
- )
416
 
417
  # Restore default dtype
418
  torch.set_default_dtype(old_dtype)
 
155
  if hasattr(torch.amp, 'autocast'):
156
  with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
157
  try:
158
+ # Always use ocr_type as third positional argument per original signature
159
  return original_chat(self, tokenizer, image_path, ocr_type_val, **kwargs_copy)
160
  except TypeError as e:
161
+ logger.warning(f"First call approach failed: {e}, trying alternative approach")
162
  try:
163
+ # Try passing image_path as string in case that's the issue
164
+ return original_chat(self, tokenizer, str(image_path), ocr_type_val, **kwargs_copy)
165
  except Exception as e2:
166
  logger.warning(f"Second call approach also failed: {e2}")
167
  # Fall back to just passing the original arguments unchanged
 
175
  else:
176
  # Same approach without autocast
177
  try:
178
+ # Always use ocr_type as third positional argument
179
  return original_chat(self, tokenizer, image_path, ocr_type_val, **kwargs_copy)
180
+ except TypeError as e:
181
+ logger.warning(f"Call without autocast failed: {e}, trying alternative approach")
182
  try:
183
+ # Try passing image_path as string in case that's the issue
184
+ return original_chat(self, tokenizer, str(image_path), ocr_type_val, **kwargs_copy)
185
  except:
186
  # Fall back to just passing the original arguments unchanged
187
  return original_chat(self, tokenizer, image_path, **kwargs)
 
382
  # Use the model's chat method as shown in the documentation
383
  logger.info(f"Processing image with GOT-OCR: {file_path}")
384
  try:
385
+ # Use ocr_type as a positional argument based on the correct signature
386
  logger.info(f"Using OCR method: {ocr_type}")
387
  result = self._model.chat(
388
  self._tokenizer,
389
  str(file_path),
390
+ ocr_type # Pass as positional arg, not keyword
391
  )
392
  except RuntimeError as e:
393
  if "bfloat16" in str(e) or "BFloat16" in str(e):
 
399
  old_dtype = torch.get_default_dtype()
400
  torch.set_default_dtype(torch.float16)
401
 
402
+ # Call with positional argument for ocr_type
403
+ result = self._model.chat(
404
+ self._tokenizer,
405
+ str(file_path),
406
+ ocr_type
407
+ )
 
 
 
 
 
 
 
 
 
408
 
409
  # Restore default dtype
410
  torch.set_default_dtype(old_dtype)