aal-hawa commited on
Commit
6aefbe1
·
1 Parent(s): 700ddf9
Files changed (1) hide show
  1. app.py +8 -9
app.py CHANGED
@@ -6,7 +6,7 @@ from PIL import Image
6
  from transformers import AutoProcessor, HunYuanVLForConditionalGeneration
7
 
8
  # ============================================================
9
- # HunyuanOCR Image Text Extraction
10
  # ============================================================
11
  MODEL_ID = "tencent/HunyuanOCR"
12
  model = None
@@ -31,7 +31,6 @@ def load_model():
31
  global model, processor
32
  if model is not None:
33
  return
34
- import os
35
  token = os.getenv("HF_TOKEN", None)
36
  print("Loading HunyuanOCR ...")
37
  processor = AutoProcessor.from_pretrained(MODEL_ID, use_fast=False, token=token)
@@ -41,7 +40,7 @@ def load_model():
41
  device_map=None,
42
  low_cpu_mem_usage=True,
43
  token=token,
44
- ).float() # convert all model params from bfloat16 to float32
45
  model.eval()
46
  print("HunyuanOCR loaded.")
47
 
@@ -79,12 +78,12 @@ def ocr_process(image):
79
  padding=True, return_tensors="pt"
80
  )
81
 
82
- # CRITICAL: The processor outputs bfloat16 tensors for pixel_values,
83
- # but the model is now float32. Convert ALL input tensors to float32.
84
  for key in inputs:
85
  if isinstance(inputs[key], torch.Tensor):
86
- if inputs[key].is_floating_point():
87
- inputs[key] = inputs[key].float()
88
 
89
  inputs = inputs.to("cpu")
90
 
@@ -113,7 +112,7 @@ def ocr_process(image):
113
  # ============================================================
114
  with gr.Blocks(title="HunyuanOCR") as demo:
115
  gr.Markdown("""
116
- # 📄 HunyuanOCR Text Extraction
117
  Upload an image and the model will detect and extract all text with coordinates.
118
  """)
119
 
@@ -125,4 +124,4 @@ with gr.Blocks(title="HunyuanOCR") as demo:
125
  image_input.change(ocr_process, image_input, ocr_output)
126
 
127
  if __name__ == "__main__":
128
- demo.launch(server_name="0.0.0.0")
 
6
  from transformers import AutoProcessor, HunYuanVLForConditionalGeneration
7
 
8
  # ============================================================
9
+ # HunyuanOCR - Image Text Extraction
10
  # ============================================================
11
  MODEL_ID = "tencent/HunyuanOCR"
12
  model = None
 
31
  global model, processor
32
  if model is not None:
33
  return
 
34
  token = os.getenv("HF_TOKEN", None)
35
  print("Loading HunyuanOCR ...")
36
  processor = AutoProcessor.from_pretrained(MODEL_ID, use_fast=False, token=token)
 
40
  device_map=None,
41
  low_cpu_mem_usage=True,
42
  token=token,
43
+ ).float()
44
  model.eval()
45
  print("HunyuanOCR loaded.")
46
 
 
78
  padding=True, return_tensors="pt"
79
  )
80
 
81
+ # The processor outputs bfloat16 tensors, but model is float32.
82
+ # Convert all floating-point input tensors to float32.
83
  for key in inputs:
84
  if isinstance(inputs[key], torch.Tensor):
85
+ if inputs[key].is_floating_point():
86
+ inputs[key] = inputs[key].float()
87
 
88
  inputs = inputs.to("cpu")
89
 
 
112
  # ============================================================
113
  with gr.Blocks(title="HunyuanOCR") as demo:
114
  gr.Markdown("""
115
+ # HunyuanOCR - Text Extraction
116
  Upload an image and the model will detect and extract all text with coordinates.
117
  """)
118
 
 
124
  image_input.change(ocr_process, image_input, ocr_output)
125
 
126
  if __name__ == "__main__":
127
+ demo.launch(server_name="0.0.0.0")