app / app.py
kedimestan's picture
Create app.py
36cf9cf verified
import cv2
import easyocr
import numpy as np
from gtts import gTTS
import os
import pygame
import gradio as gr
# Initialize OCR reader and TTS system
reader = easyocr.Reader(['tr'])
pygame.mixer.init()
def capture_and_process():
# Open webcam, capture frame, and save it
capture = cv2.VideoCapture(0)
ret, frame = capture.read()
capture.release()
if not ret:
return "Failed to capture image", None
# Save the captured image
filename = 'captured_image.png'
cv2.imwrite(filename, frame)
# Run OCR on the saved image
results = reader.readtext(filename)
# Prepare text-to-speech for each detected text
detected_text = []
for result in results:
if result[1].strip() == "":
continue
text = result[1]
detected_text.append(text)
# Convert text to speech and play it
tts = gTTS(text=text.lower(), lang='tr')
tts.save("output.mp3")
pygame.mixer.music.load("output.mp3")
pygame.mixer.music.play()
# Wait until the speech is done
while pygame.mixer.music.get_busy():
pygame.time.Clock().tick(10)
# Return the captured image and detected text
return detected_text, frame[..., ::-1] # Convert BGR to RGB for display in Gradio
# Gradio interface
interface = gr.Interface(
fn=capture_and_process,
inputs=None,
outputs=[gr.outputs.Textbox(label="Detected Text"), gr.outputs.Image(type="numpy", label="Captured Image")],
live=True
)
# Launch the app
if __name__ == "__main__":
interface.launch()