Spaces:
Running
Running
aal-hawa commited on
Commit ·
7f53dc2
1
Parent(s): 4d706d5
edit
Browse files
app.py
CHANGED
|
@@ -41,7 +41,7 @@ def load_model():
|
|
| 41 |
device_map=None,
|
| 42 |
low_cpu_mem_usage=True,
|
| 43 |
token=token,
|
| 44 |
-
).float() # convert all params from bfloat16 to float32
|
| 45 |
model.eval()
|
| 46 |
print("HunyuanOCR loaded.")
|
| 47 |
|
|
@@ -77,7 +77,15 @@ def ocr_process(image):
|
|
| 77 |
inputs = processor(
|
| 78 |
text=[text_prompt], images=[image_input],
|
| 79 |
padding=True, return_tensors="pt"
|
| 80 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
|
| 82 |
with torch.no_grad():
|
| 83 |
generated_ids = model.generate(**inputs, max_new_tokens=16384, do_sample=False)
|
|
@@ -116,4 +124,4 @@ with gr.Blocks(title="HunyuanOCR") as demo:
|
|
| 116 |
image_input.change(ocr_process, image_input, ocr_output)
|
| 117 |
|
| 118 |
if __name__ == "__main__":
|
| 119 |
-
demo.launch(server_name="0.0.0.0")
|
|
|
|
| 41 |
device_map=None,
|
| 42 |
low_cpu_mem_usage=True,
|
| 43 |
token=token,
|
| 44 |
+
).float() # convert all model params from bfloat16 to float32
|
| 45 |
model.eval()
|
| 46 |
print("HunyuanOCR loaded.")
|
| 47 |
|
|
|
|
| 77 |
inputs = processor(
|
| 78 |
text=[text_prompt], images=[image_input],
|
| 79 |
padding=True, return_tensors="pt"
|
| 80 |
+
)
|
| 81 |
+
|
| 82 |
+
# CRITICAL: The processor outputs bfloat16 tensors for pixel_values,
|
| 83 |
+
# but the model is now float32. Convert ALL input tensors to float32.
|
| 84 |
+
for key in inputs:
|
| 85 |
+
if isinstance(inputs[key], torch.Tensor) and inputs[key().is_floating_point():
|
| 86 |
+
inputs[key] = inputs[key].float()
|
| 87 |
+
|
| 88 |
+
inputs = inputs.to("cpu")
|
| 89 |
|
| 90 |
with torch.no_grad():
|
| 91 |
generated_ids = model.generate(**inputs, max_new_tokens=16384, do_sample=False)
|
|
|
|
| 124 |
image_input.change(ocr_process, image_input, ocr_output)
|
| 125 |
|
| 126 |
if __name__ == "__main__":
|
| 127 |
+
demo.launch(server_name="0.0.0.0")
|