AnseMin commited on
Commit
e5648b4
·
1 Parent(s): e045d86

Error: Error processing document with GOT-OCR: 'QWenTokenizer' object has no attribute 'startswith'

Browse files
Files changed (1) hide show
  1. src/parsers/got_ocr_parser.py +10 -10
src/parsers/got_ocr_parser.py CHANGED
@@ -148,17 +148,17 @@ class GotOcrParser(DocumentParser):
148
  if hasattr(torch.amp, 'autocast'):
149
  with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
150
  try:
151
- # Pass all arguments directly to maintain signature
152
- return original_chat(self, tokenizer, image_path, ocr_type, **kwargs)
153
  except TypeError as e:
154
  logger.warning(f"First call approach failed: {e}, trying alternative approach")
155
  try:
156
  # Try passing image_path as string in case that's the issue
157
- return original_chat(self, tokenizer, str(image_path), ocr_type, **kwargs)
158
  except Exception as e2:
159
  logger.warning(f"Second call approach also failed: {e2}")
160
- # Fall back to original method
161
- return original_chat(self, tokenizer, image_path, ocr_type, **kwargs)
162
  except RuntimeError as e:
163
  if "bfloat16" in str(e):
164
  logger.error(f"BFloat16 error encountered despite patching: {e}")
@@ -168,16 +168,16 @@ class GotOcrParser(DocumentParser):
168
  else:
169
  # Same approach without autocast
170
  try:
171
- # Direct call with all positional args
172
- return original_chat(self, tokenizer, image_path, ocr_type, **kwargs)
173
  except TypeError as e:
174
  logger.warning(f"Call without autocast failed: {e}, trying alternative approach")
175
  try:
176
  # Try passing image_path as string in case that's the issue
177
- return original_chat(self, tokenizer, str(image_path), ocr_type, **kwargs)
178
  except:
179
- # Fall back to original method
180
- return original_chat(self, tokenizer, image_path, ocr_type, **kwargs)
181
 
182
  # Apply the patch
183
  import types
 
148
  if hasattr(torch.amp, 'autocast'):
149
  with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
150
  try:
151
+ # Pass arguments correctly - without 'self' as first arg since original_chat is already bound
152
+ return original_chat(tokenizer, image_path, ocr_type, **kwargs)
153
  except TypeError as e:
154
  logger.warning(f"First call approach failed: {e}, trying alternative approach")
155
  try:
156
  # Try passing image_path as string in case that's the issue
157
+ return original_chat(tokenizer, str(image_path), ocr_type, **kwargs)
158
  except Exception as e2:
159
  logger.warning(f"Second call approach also failed: {e2}")
160
+ # Fall back to original method with keyword args
161
+ return original_chat(tokenizer, image_path, ocr_type=ocr_type, **kwargs)
162
  except RuntimeError as e:
163
  if "bfloat16" in str(e):
164
  logger.error(f"BFloat16 error encountered despite patching: {e}")
 
168
  else:
169
  # Same approach without autocast
170
  try:
171
+ # Direct call without 'self' as first arg
172
+ return original_chat(tokenizer, image_path, ocr_type, **kwargs)
173
  except TypeError as e:
174
  logger.warning(f"Call without autocast failed: {e}, trying alternative approach")
175
  try:
176
  # Try passing image_path as string in case that's the issue
177
+ return original_chat(tokenizer, str(image_path), ocr_type, **kwargs)
178
  except:
179
+ # Fall back to keyword arguments
180
+ return original_chat(tokenizer, image_path, ocr_type=ocr_type, **kwargs)
181
 
182
  # Apply the patch
183
  import types