Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,16 +2,18 @@ import gradio as gr
|
|
| 2 |
import torch
|
| 3 |
import numpy as np
|
| 4 |
import cv2
|
| 5 |
-
import os
|
| 6 |
from PIL import Image
|
| 7 |
from transformers import CLIPProcessor, CLIPModel
|
| 8 |
from paddleocr import PaddleOCR
|
| 9 |
-
from spaces import GPU # Required for ZeroGPU on Hugging Face
|
| 10 |
|
| 11 |
# Setup
|
| 12 |
clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
|
| 13 |
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
lang_map = {
|
| 16 |
"english": "en",
|
| 17 |
"telugu": "te",
|
|
@@ -26,9 +28,10 @@ candidates = [
|
|
| 26 |
"This is Korean text"
|
| 27 |
]
|
| 28 |
|
| 29 |
-
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
@GPU
|
| 32 |
def ocr_pipeline(image_np):
|
| 33 |
image_pil = Image.fromarray(image_np).convert("RGB")
|
| 34 |
img_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
|
|
@@ -61,12 +64,15 @@ def ocr_pipeline(image_np):
|
|
| 61 |
continue
|
| 62 |
|
| 63 |
clip_inputs = clip_processor(text=candidates, images=Image.fromarray(crop), return_tensors="pt", padding=True)
|
|
|
|
|
|
|
| 64 |
with torch.no_grad():
|
| 65 |
probs = clip_model(**clip_inputs).logits_per_image.softmax(dim=1)
|
| 66 |
lang_index = probs.argmax().item()
|
| 67 |
lang_detected = candidates[lang_index].split()[-2].lower()
|
| 68 |
lang_code = lang_map.get(lang_detected, "en")
|
| 69 |
|
|
|
|
| 70 |
ocr = PaddleOCR(lang=lang_code, use_angle_cls=False, det=False, rec=True, use_gpu=False)
|
| 71 |
|
| 72 |
result = ocr.ocr(crop)
|
|
|
|
| 2 |
import torch
|
| 3 |
import numpy as np
|
| 4 |
import cv2
|
|
|
|
| 5 |
from PIL import Image
|
| 6 |
from transformers import CLIPProcessor, CLIPModel
|
| 7 |
from paddleocr import PaddleOCR
|
|
|
|
| 8 |
|
| 9 |
# Setup
|
| 10 |
clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
|
| 11 |
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
|
| 12 |
|
| 13 |
+
# Set device to CPU
|
| 14 |
+
device = "cpu"
|
| 15 |
+
clip_model.to(device)
|
| 16 |
+
|
| 17 |
lang_map = {
|
| 18 |
"english": "en",
|
| 19 |
"telugu": "te",
|
|
|
|
| 28 |
"This is Korean text"
|
| 29 |
]
|
| 30 |
|
| 31 |
+
# Initialize PaddleOCR for CPU use
|
| 32 |
+
# It is important to set use_gpu=False for both detection and recognition
|
| 33 |
+
ocr_detector = PaddleOCR(use_angle_cls=False, lang='en', det=True, rec=False, use_gpu=False)
|
| 34 |
|
|
|
|
| 35 |
def ocr_pipeline(image_np):
|
| 36 |
image_pil = Image.fromarray(image_np).convert("RGB")
|
| 37 |
img_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
|
|
|
|
| 64 |
continue
|
| 65 |
|
| 66 |
clip_inputs = clip_processor(text=candidates, images=Image.fromarray(crop), return_tensors="pt", padding=True)
|
| 67 |
+
clip_inputs.to(device)
|
| 68 |
+
|
| 69 |
with torch.no_grad():
|
| 70 |
probs = clip_model(**clip_inputs).logits_per_image.softmax(dim=1)
|
| 71 |
lang_index = probs.argmax().item()
|
| 72 |
lang_detected = candidates[lang_index].split()[-2].lower()
|
| 73 |
lang_code = lang_map.get(lang_detected, "en")
|
| 74 |
|
| 75 |
+
# Initialize PaddleOCR for recognition with the detected language, ensuring GPU is not used
|
| 76 |
ocr = PaddleOCR(lang=lang_code, use_angle_cls=False, det=False, rec=True, use_gpu=False)
|
| 77 |
|
| 78 |
result = ocr.ocr(crop)
|