aal-hawa commited on
Commit
7f53dc2
·
1 Parent(s): 4d706d5
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -41,7 +41,7 @@ def load_model():
41
  device_map=None,
42
  low_cpu_mem_usage=True,
43
  token=token,
44
- ).float() # convert all params from bfloat16 to float32 for CPU
45
  model.eval()
46
  print("HunyuanOCR loaded.")
47
 
@@ -77,7 +77,15 @@ def ocr_process(image):
77
  inputs = processor(
78
  text=[text_prompt], images=[image_input],
79
  padding=True, return_tensors="pt"
80
- ).to("cpu")
 
 
 
 
 
 
 
 
81
 
82
  with torch.no_grad():
83
  generated_ids = model.generate(**inputs, max_new_tokens=16384, do_sample=False)
@@ -116,4 +124,4 @@ with gr.Blocks(title="HunyuanOCR") as demo:
116
  image_input.change(ocr_process, image_input, ocr_output)
117
 
118
  if __name__ == "__main__":
119
- demo.launch(server_name="0.0.0.0")
 
41
  device_map=None,
42
  low_cpu_mem_usage=True,
43
  token=token,
44
+ ).float() # convert all model params from bfloat16 to float32
45
  model.eval()
46
  print("HunyuanOCR loaded.")
47
 
 
77
  inputs = processor(
78
  text=[text_prompt], images=[image_input],
79
  padding=True, return_tensors="pt"
80
+ )
81
+
82
+ # CRITICAL: The processor outputs bfloat16 tensors for pixel_values,
83
+ # but the model is now float32. Convert ALL input tensors to float32.
84
+ for key in inputs:
85
+ if isinstance(inputs[key], torch.Tensor) and inputs[key().is_floating_point():
86
+ inputs[key] = inputs[key].float()
87
+
88
+ inputs = inputs.to("cpu")
89
 
90
  with torch.no_grad():
91
  generated_ids = model.generate(**inputs, max_new_tokens=16384, do_sample=False)
 
124
  image_input.change(ocr_process, image_input, ocr_output)
125
 
126
  if __name__ == "__main__":
127
+ demo.launch(server_name="0.0.0.0")