imperiusrex commited on
Commit
fa96b42
·
verified ·
1 Parent(s): f03e8a4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -4
app.py CHANGED
@@ -2,16 +2,18 @@ import gradio as gr
2
  import torch
3
  import numpy as np
4
  import cv2
5
- import os
6
  from PIL import Image
7
  from transformers import CLIPProcessor, CLIPModel
8
  from paddleocr import PaddleOCR
9
- from spaces import GPU # Required for ZeroGPU on Hugging Face
10
 
11
  # Setup
12
  clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
13
  clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
14
 
 
 
 
 
15
  lang_map = {
16
  "english": "en",
17
  "telugu": "te",
@@ -26,9 +28,10 @@ candidates = [
26
  "This is Korean text"
27
  ]
28
 
29
- ocr_detector = PaddleOCR(use_angle_cls=False, lang='en', det=True, rec=False, use_gpu=True)
 
 
30
 
31
- @GPU
32
  def ocr_pipeline(image_np):
33
  image_pil = Image.fromarray(image_np).convert("RGB")
34
  img_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
@@ -61,12 +64,15 @@ def ocr_pipeline(image_np):
61
  continue
62
 
63
  clip_inputs = clip_processor(text=candidates, images=Image.fromarray(crop), return_tensors="pt", padding=True)
 
 
64
  with torch.no_grad():
65
  probs = clip_model(**clip_inputs).logits_per_image.softmax(dim=1)
66
  lang_index = probs.argmax().item()
67
  lang_detected = candidates[lang_index].split()[-2].lower()
68
  lang_code = lang_map.get(lang_detected, "en")
69
 
 
70
  ocr = PaddleOCR(lang=lang_code, use_angle_cls=False, det=False, rec=True, use_gpu=False)
71
 
72
  result = ocr.ocr(crop)
 
2
  import torch
3
  import numpy as np
4
  import cv2
 
5
  from PIL import Image
6
  from transformers import CLIPProcessor, CLIPModel
7
  from paddleocr import PaddleOCR
 
8
 
9
  # Setup
10
  clip_model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
11
  clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
12
 
13
+ # Set device to CPU
14
+ device = "cpu"
15
+ clip_model.to(device)
16
+
17
  lang_map = {
18
  "english": "en",
19
  "telugu": "te",
 
28
  "This is Korean text"
29
  ]
30
 
31
+ # Initialize PaddleOCR for CPU use
32
+ # It is important to set use_gpu=False for both detection and recognition
33
+ ocr_detector = PaddleOCR(use_angle_cls=False, lang='en', det=True, rec=False, use_gpu=False)
34
 
 
35
  def ocr_pipeline(image_np):
36
  image_pil = Image.fromarray(image_np).convert("RGB")
37
  img_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
 
64
  continue
65
 
66
  clip_inputs = clip_processor(text=candidates, images=Image.fromarray(crop), return_tensors="pt", padding=True)
67
+ clip_inputs.to(device)
68
+
69
  with torch.no_grad():
70
  probs = clip_model(**clip_inputs).logits_per_image.softmax(dim=1)
71
  lang_index = probs.argmax().item()
72
  lang_detected = candidates[lang_index].split()[-2].lower()
73
  lang_code = lang_map.get(lang_detected, "en")
74
 
75
+ # Initialize PaddleOCR for recognition with the detected language, ensuring GPU is not used
76
  ocr = PaddleOCR(lang=lang_code, use_angle_cls=False, det=False, rec=True, use_gpu=False)
77
 
78
  result = ocr.ocr(crop)