Spaces:

recentechstudio
/

HunyuanOCR

Running

App Files Files Community

aal-hawa commited on 22 days ago

Commit

6aefbe1

1 Parent(s): 700ddf9

edit

Browse files

Files changed (1) hide show

app.py +8 -9

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ from PIL import Image
 from transformers import AutoProcessor, HunYuanVLForConditionalGeneration
 # ============================================================
-# HunyuanOCR – Image Text Extraction
 # ============================================================
 MODEL_ID = "tencent/HunyuanOCR"
 model = None
@@ -31,7 +31,6 @@ def load_model():
     global model, processor
     if model is not None:
         return
-    import os
     token = os.getenv("HF_TOKEN", None)
     print("Loading HunyuanOCR ...")
     processor = AutoProcessor.from_pretrained(MODEL_ID, use_fast=False, token=token)
@@ -41,7 +40,7 @@ def load_model():
         device_map=None,
         low_cpu_mem_usage=True,
         token=token,
-    ).float()  # convert all model params from bfloat16 to float32
     model.eval()
     print("HunyuanOCR loaded.")
@@ -79,12 +78,12 @@ def ocr_process(image):
             padding=True, return_tensors="pt"
         )
-        # CRITICAL: The processor outputs bfloat16 tensors for pixel_values,
-        # but the model is now float32. Convert ALL input tensors to float32.
         for key in inputs:
             if isinstance(inputs[key], torch.Tensor):
-    			if inputs[key].is_floating_point():
-                	inputs[key] = inputs[key].float()
         inputs = inputs.to("cpu")
@@ -113,7 +112,7 @@ def ocr_process(image):
 # ============================================================
 with gr.Blocks(title="HunyuanOCR") as demo:
     gr.Markdown("""
-    # 📄 HunyuanOCR – Text Extraction
     Upload an image and the model will detect and extract all text with coordinates.
     """)
@@ -125,4 +124,4 @@ with gr.Blocks(title="HunyuanOCR") as demo:
     image_input.change(ocr_process, image_input, ocr_output)
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0")

 from transformers import AutoProcessor, HunYuanVLForConditionalGeneration
 # ============================================================
+# HunyuanOCR - Image Text Extraction
 # ============================================================
 MODEL_ID = "tencent/HunyuanOCR"
 model = None
     global model, processor
     if model is not None:
         return
     token = os.getenv("HF_TOKEN", None)
     print("Loading HunyuanOCR ...")
     processor = AutoProcessor.from_pretrained(MODEL_ID, use_fast=False, token=token)
         device_map=None,
         low_cpu_mem_usage=True,
         token=token,
+    ).float()
     model.eval()
     print("HunyuanOCR loaded.")
             padding=True, return_tensors="pt"
         )
+        # The processor outputs bfloat16 tensors, but model is float32.
+        # Convert all floating-point input tensors to float32.
         for key in inputs:
             if isinstance(inputs[key], torch.Tensor):
+                if inputs[key].is_floating_point():
+                    inputs[key] = inputs[key].float()
         inputs = inputs.to("cpu")
 # ============================================================
 with gr.Blocks(title="HunyuanOCR") as demo:
     gr.Markdown("""
+    # HunyuanOCR - Text Extraction
     Upload an image and the model will detect and extract all text with coordinates.
     """)
     image_input.change(ocr_process, image_input, ocr_output)
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0")