Spaces:
Sleeping
Sleeping
fix: Ensure proper `pad_token_id` configuration and `attention_mask` generation for DeepSeek OCR model.
Browse files
app_hf.py
CHANGED
|
@@ -92,6 +92,8 @@ class ModelManager:
|
|
| 92 |
print(f"Loading {model_name} to CPU...")
|
| 93 |
if model_name == DEEPSEEK_MODEL:
|
| 94 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, cache_dir=_hf_cache_dir)
|
|
|
|
|
|
|
| 95 |
model = AutoModel.from_pretrained(
|
| 96 |
model_name,
|
| 97 |
trust_remote_code=True,
|
|
@@ -100,6 +102,8 @@ class ModelManager:
|
|
| 100 |
cache_dir=_hf_cache_dir,
|
| 101 |
torch_dtype=dtype
|
| 102 |
)
|
|
|
|
|
|
|
| 103 |
model.eval()
|
| 104 |
self.models[model_name] = model
|
| 105 |
self.processors[model_name] = tokenizer
|
|
@@ -222,13 +226,21 @@ def run_ocr(input_image, input_file, model_choice, custom_prompt):
|
|
| 222 |
return_dict=True,
|
| 223 |
return_tensors="pt"
|
| 224 |
).to("cuda") # Ensure inputs are on cuda
|
|
|
|
|
|
|
|
|
|
| 225 |
|
| 226 |
with torch.no_grad(), _autocast_ctx:
|
| 227 |
-
output = model.generate(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
|
| 233 |
except Exception as e:
|
| 234 |
all_results.append(f"--- Page/Image {i+1} ---\nПомилка: {str(e)}")
|
|
|
|
| 92 |
print(f"Loading {model_name} to CPU...")
|
| 93 |
if model_name == DEEPSEEK_MODEL:
|
| 94 |
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, cache_dir=_hf_cache_dir)
|
| 95 |
+
if getattr(tokenizer, "pad_token_id", None) is None and getattr(tokenizer, "eos_token_id", None) is not None:
|
| 96 |
+
tokenizer.pad_token_id = tokenizer.eos_token_id
|
| 97 |
model = AutoModel.from_pretrained(
|
| 98 |
model_name,
|
| 99 |
trust_remote_code=True,
|
|
|
|
| 102 |
cache_dir=_hf_cache_dir,
|
| 103 |
torch_dtype=dtype
|
| 104 |
)
|
| 105 |
+
if hasattr(model, "config") and getattr(model.config, "pad_token_id", None) is None and getattr(tokenizer, "pad_token_id", None) is not None:
|
| 106 |
+
model.config.pad_token_id = tokenizer.pad_token_id
|
| 107 |
model.eval()
|
| 108 |
self.models[model_name] = model
|
| 109 |
self.processors[model_name] = tokenizer
|
|
|
|
| 226 |
return_dict=True,
|
| 227 |
return_tensors="pt"
|
| 228 |
).to("cuda") # Ensure inputs are on cuda
|
| 229 |
+
|
| 230 |
+
if "attention_mask" not in inputs:
|
| 231 |
+
inputs["attention_mask"] = torch.ones_like(inputs["input_ids"], dtype=torch.long)
|
| 232 |
|
| 233 |
with torch.no_grad(), _autocast_ctx:
|
| 234 |
+
output = model.generate(
|
| 235 |
+
**inputs,
|
| 236 |
+
max_new_tokens=4096,
|
| 237 |
+
do_sample=False,
|
| 238 |
+
pad_token_id=processor_or_tokenizer.tokenizer.pad_token_id,
|
| 239 |
+
)
|
| 240 |
|
| 241 |
+
input_len = inputs["input_ids"].shape[-1]
|
| 242 |
+
res = processor_or_tokenizer.decode(output[0][input_len:], skip_special_tokens=True)
|
| 243 |
+
all_results.append(f"--- Page/Image {i+1} ---\n{res}")
|
| 244 |
|
| 245 |
except Exception as e:
|
| 246 |
all_results.append(f"--- Page/Image {i+1} ---\nПомилка: {str(e)}")
|