VISPA / app.py
Sabari231024's picture
Create app.py
dd3c69b
import streamlit as st
import requests
import gtts as gt
from PIL import Image
from googletrans import Translator
import cv2
def take_photo():
camera = cv2.VideoCapture(0)
ret, frame = camera.read()
image = Image.fromarray(frame)
camera.release()
return image
def trans(text, lang='ta'):
translator = Translator()
out = translator.translate(text, dest=lang)
tts = gt.gTTS(text=out.text, lang=lang)
tts.save("audio.mp3")
return "done"
def object_recognition(lang):
image = take_photo()
API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
headers = {"Authorization": "Bearer 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"}
def query(filename):
with open(filename, "rb") as f:
data = f.read()
response = requests.post(API_URL, headers=headers, data=data)
return response.json()
output = query(image)
text = output[0]['generated_text']
op = trans(text, lang)
return op
def ocr_detection(lang):
image = take_photo()
# Assuming you have the correct endpoint and API key for the OCR service
# client = Client("https://kneelesh48-tesseract-ocr.hf.space/")
# result = client.predict(image, "afr", api_name="/tesseract-ocr")
# print(result)
# op = trans(result, lang)
op = trans("OCR Detection Result", lang) # Placeholder result for demonstration
return op
def operator(img, value, lang):
if value == "1":
op = object_recognition(lang)
elif value == "2":
op = ocr_detection(lang)
else:
op = trans("Sorry, I can't perform this operation.", lang)
return op
# Create Streamlit app
st.title("Image Processing App")
# Add input components
image_input = st.checkbox("Take a photo")
if image_input:
image = take_photo()
st.image(image, caption="Captured Image", use_column_width=True)
operation = st.selectbox("Select an operation", ["Object Recognition", "OCR Detection"])
if operation:
lang = st.text_input("Enter language code (e.g., 'ta' for Tamil)")
result = operator(image, operation[0], lang)
st.text("Result: " + result)