GiantAnalytics commited on
Commit
16ca38c
·
verified ·
1 Parent(s): ef9ba78

Second Version

Browse files
Files changed (1) hide show
  1. app.py +41 -18
app.py CHANGED
@@ -9,9 +9,25 @@ import numpy as np
9
  ocr = PaddleOCR(use_angle_cls=True, lang='ar')
10
 
11
  def ocr_extract_text(image):
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  # Perform OCR on the uploaded image
13
  result = ocr.ocr(image, cls=True)
14
 
 
 
 
15
  # Initialize a list to store detected text
16
  detected_text = []
17
 
@@ -25,29 +41,36 @@ def ocr_extract_text(image):
25
  texts = [line[1][0] for line in result[0]] # Detected text
26
  scores = [line[1][1] for line in result[0]] # Confidence scores
27
 
28
- annotated_image = draw_ocr(image, boxes, texts, scores, font_path="path/to/arabic/font.ttf")
 
 
 
 
29
 
30
- # Convert the annotated image to a format that Gradio can display
31
- annotated_image = Image.fromarray(np.uint8(annotated_image))
32
 
33
- # Return the text and annotated image
34
- return "\n".join(detected_text), annotated_image
35
 
36
- # Define Gradio interface with the button to trigger OCR
37
- def start_ocr(image):
38
- # Trigger OCR extraction when button is clicked
39
- return ocr_extract_text(image)
40
 
41
  iface = gr.Interface(
42
- fn=start_ocr, # Function to trigger OCR extraction on button click
43
- inputs=gr.Image(type="pil"), # Image input only
44
- outputs=[gr.Textbox(label="Extracted Text"), gr.Image(label="Annotated Image")], # Outputs
45
- live=False, # Set live=False as we want to trigger the process with a button click
46
- title="Arabic OCR Extractor", # Title of the interface
47
- description="Upload an Arabic document or image, and click 'Start Extracting Text' to extract the text using OCR.", # Description
48
- allow_flagging="never" # Prevent flagging if it's not required
 
 
 
 
49
  )
50
 
 
51
  if __name__ == "__main__":
52
- # This ensures Gradio runs in debug mode and with queue for better debugging
53
- iface.queue().launch(debug=True)
 
9
  ocr = PaddleOCR(use_angle_cls=True, lang='ar')
10
 
11
  def ocr_extract_text(image):
12
+ if image is None:
13
+ return "No image provided", None
14
+
15
+ # Convert PIL Image to numpy array if needed
16
+ if isinstance(image, Image.Image):
17
+ image = np.array(image)
18
+
19
+ # Ensure image is in BGR format for PaddleOCR
20
+ if len(image.shape) == 3 and image.shape[2] == 4: # RGBA
21
+ image = cv2.cvtColor(image, cv2.COLOR_RGBA2BGR)
22
+ elif len(image.shape) == 3 and image.shape[2] == 3: # RGB
23
+ image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
24
+
25
  # Perform OCR on the uploaded image
26
  result = ocr.ocr(image, cls=True)
27
 
28
+ if result[0] is None:
29
+ return "No text detected in the image", image
30
+
31
  # Initialize a list to store detected text
32
  detected_text = []
33
 
 
41
  texts = [line[1][0] for line in result[0]] # Detected text
42
  scores = [line[1][1] for line in result[0]] # Confidence scores
43
 
44
+ try:
45
+ annotated_image = draw_ocr(image, boxes, texts, scores)
46
+ except Exception as e:
47
+ print(f"Error annotating image: {e}")
48
+ annotated_image = image
49
 
50
+ # Join the detected text with Arabic-compatible line breaks
51
+ text_output = "\n".join(detected_text)
52
 
53
+ return text_output, annotated_image
 
54
 
55
+ # Define Gradio interface with Arabic-friendly styling
56
+ css = """
57
+ .output-text { direction: rtl; text-align: right; }
58
+ """
59
 
60
  iface = gr.Interface(
61
+ fn=ocr_extract_text,
62
+ inputs=gr.Image(type="numpy", label="Upload Image"),
63
+ outputs=[
64
+ gr.Textbox(label="Extracted Arabic Text", elem_classes=["output-text"]),
65
+ gr.Image(label="Annotated Image")
66
+ ],
67
+ title="Arabic OCR Extractor",
68
+ description="Upload an Arabic document or image to extract the text using OCR.",
69
+ css=css,
70
+ examples=[], # You can add example images here
71
+ cache_examples=True
72
  )
73
 
74
+ # For Colab and Hugging Face Spaces compatibility
75
  if __name__ == "__main__":
76
+ iface.launch(debug=True, share=True) # share=True enables Colab public URL