gopichandra commited on
Commit
69fa2c9
·
verified ·
1 Parent(s): 84fc131

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -14
app.py CHANGED
@@ -2,20 +2,34 @@ import gradio as gr
2
  from paddleocr import PaddleOCR
3
  from PIL import Image, ImageOps
4
  import numpy as np
 
 
5
 
6
  # Initialize PaddleOCR
7
  ocr = PaddleOCR(use_angle_cls=True, lang='en')
8
 
9
- # Function to process uploaded/captured images
10
- def process_uploaded_image(image, camera_type="rear"):
11
- if image is None:
12
- return "No image uploaded."
13
-
14
- # Mirror the image for rear camera
 
 
 
 
 
 
 
 
 
 
 
 
15
  if camera_type == "rear":
16
  image = ImageOps.mirror(image)
17
-
18
- # Perform OCR on the corrected image
19
  result = ocr.ocr(np.array(image))
20
  extracted_text = []
21
  for line in result[0]:
@@ -27,9 +41,9 @@ def create_app():
27
  with gr.Blocks() as app:
28
  gr.Markdown("## OCR Processor with Rear Camera Fix")
29
 
30
- # Input for image capture and camera type selection
31
  with gr.Row():
32
- image_input = gr.Image(source="webcam", type="pil", label="Capture Image from Webcam")
33
  camera_type = gr.Dropdown(
34
  choices=["rear", "front"],
35
  value="rear",
@@ -39,11 +53,11 @@ def create_app():
39
  # Output for extracted text
40
  extracted_text = gr.Textbox(label="Extracted Text")
41
 
42
- # Button to process the image
43
- process_button = gr.Button("Process Image")
44
  process_button.click(
45
- fn=process_uploaded_image,
46
- inputs=[image_input, camera_type],
47
  outputs=extracted_text
48
  )
49
 
 
2
  from paddleocr import PaddleOCR
3
  from PIL import Image, ImageOps
4
  import numpy as np
5
+ import cv2
6
+ import io
7
 
8
  # Initialize PaddleOCR
9
  ocr = PaddleOCR(use_angle_cls=True, lang='en')
10
 
11
+ # Function to extract an image from video and process it
12
+ def process_video_frame(video_data, camera_type="rear"):
13
+ if video_data is None:
14
+ return "No video feed provided."
15
+
16
+ # Decode video bytes
17
+ video_stream = io.BytesIO(video_data)
18
+ video_stream.seek(0)
19
+ video_array = np.frombuffer(video_stream.read(), np.uint8)
20
+ video_frame = cv2.imdecode(video_array, cv2.IMREAD_COLOR)
21
+
22
+ if video_frame is None:
23
+ return "Failed to decode video feed."
24
+
25
+ # Convert frame to PIL Image
26
+ image = Image.fromarray(cv2.cvtColor(video_frame, cv2.COLOR_BGR2RGB))
27
+
28
+ # Flip the image horizontally for rear camera
29
  if camera_type == "rear":
30
  image = ImageOps.mirror(image)
31
+
32
+ # Perform OCR
33
  result = ocr.ocr(np.array(image))
34
  extracted_text = []
35
  for line in result[0]:
 
41
  with gr.Blocks() as app:
42
  gr.Markdown("## OCR Processor with Rear Camera Fix")
43
 
44
+ # Inputs for video feed and camera type
45
  with gr.Row():
46
+ video_input = gr.Video(label="Capture Video Feed")
47
  camera_type = gr.Dropdown(
48
  choices=["rear", "front"],
49
  value="rear",
 
53
  # Output for extracted text
54
  extracted_text = gr.Textbox(label="Extracted Text")
55
 
56
+ # Button to process the video feed
57
+ process_button = gr.Button("Process Frame for OCR")
58
  process_button.click(
59
+ fn=process_video_frame,
60
+ inputs=[video_input, camera_type],
61
  outputs=extracted_text
62
  )
63