Spaces:

imperiusrex
/

PrintedTextOCR

Sleeping

App Files Files Community

imperiusrex commited on Aug 1, 2025

Commit

fa96b42

verified ·

1 Parent(s): f03e8a4

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -4

app.py CHANGED Viewed

@@ -2,16 +2,18 @@ import gradio as gr
 import torch
 import numpy as np
 import cv2
-import os
 from PIL import Image
 from transformers import CLIPProcessor, CLIPModel
 from paddleocr import PaddleOCR
-from spaces import GPU  # Required for ZeroGPU on Hugging Face
 # Setup
 clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
 lang_map = {
     "english": "en",
     "telugu": "te",
@@ -26,9 +28,10 @@ candidates = [
     "This is Korean text"
 ]
-ocr_detector = PaddleOCR(use_angle_cls=False, lang='en', det=True, rec=False, use_gpu=True)
-@GPU
 def ocr_pipeline(image_np):
     image_pil = Image.fromarray(image_np).convert("RGB")
     img_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
@@ -61,12 +64,15 @@ def ocr_pipeline(image_np):
             continue
         clip_inputs = clip_processor(text=candidates, images=Image.fromarray(crop), return_tensors="pt", padding=True)
         with torch.no_grad():
             probs = clip_model(**clip_inputs).logits_per_image.softmax(dim=1)
         lang_index = probs.argmax().item()
         lang_detected = candidates[lang_index].split()[-2].lower()
         lang_code = lang_map.get(lang_detected, "en")
         ocr = PaddleOCR(lang=lang_code, use_angle_cls=False, det=False, rec=True, use_gpu=False)
         result = ocr.ocr(crop)

 import torch
 import numpy as np
 import cv2
 from PIL import Image
 from transformers import CLIPProcessor, CLIPModel
 from paddleocr import PaddleOCR
 # Setup
 clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
 clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
+# Set device to CPU
+device = "cpu"
+clip_model.to(device)
 lang_map = {
     "english": "en",
     "telugu": "te",
     "This is Korean text"
 ]
+# Initialize PaddleOCR for CPU use
+# It is important to set use_gpu=False for both detection and recognition
+ocr_detector = PaddleOCR(use_angle_cls=False, lang='en', det=True, rec=False, use_gpu=False)
 def ocr_pipeline(image_np):
     image_pil = Image.fromarray(image_np).convert("RGB")
     img_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
             continue
         clip_inputs = clip_processor(text=candidates, images=Image.fromarray(crop), return_tensors="pt", padding=True)
+        clip_inputs.to(device)
         with torch.no_grad():
             probs = clip_model(**clip_inputs).logits_per_image.softmax(dim=1)
         lang_index = probs.argmax().item()
         lang_detected = candidates[lang_index].split()[-2].lower()
         lang_code = lang_map.get(lang_detected, "en")
+        # Initialize PaddleOCR for recognition with the detected language, ensuring GPU is not used
         ocr = PaddleOCR(lang=lang_code, use_angle_cls=False, det=False, rec=True, use_gpu=False)
         result = ocr.ocr(crop)