import streamlit as st import requests import gtts as gt from PIL import Image from googletrans import Translator import cv2 def take_photo(): camera = cv2.VideoCapture(0) ret, frame = camera.read() image = Image.fromarray(frame) camera.release() return image def trans(text, lang='ta'): translator = Translator() out = translator.translate(text, dest=lang) tts = gt.gTTS(text=out.text, lang=lang) tts.save("audio.mp3") return "done" def object_recognition(lang): image = take_photo() API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large" headers = {"Authorization": "Bearer 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"} def query(filename): with open(filename, "rb") as f: data = f.read() response = requests.post(API_URL, headers=headers, data=data) return response.json() output = query(image) text = output[0]['generated_text'] op = trans(text, lang) return op def ocr_detection(lang): image = take_photo() # Assuming you have the correct endpoint and API key for the OCR service # client = Client("https://kneelesh48-tesseract-ocr.hf.space/") # result = client.predict(image, "afr", api_name="/tesseract-ocr") # print(result) # op = trans(result, lang) op = trans("OCR Detection Result", lang) # Placeholder result for demonstration return op def operator(img, value, lang): if value == "1": op = object_recognition(lang) elif value == "2": op = ocr_detection(lang) else: op = trans("Sorry, I can't perform this operation.", lang) return op # Create Streamlit app st.title("Image Processing App") # Add input components image_input = st.checkbox("Take a photo") if image_input: image = take_photo() st.image(image, caption="Captured Image", use_column_width=True) operation = st.selectbox("Select an operation", ["Object Recognition", "OCR Detection"]) if operation: lang = st.text_input("Enter language code (e.g., 'ta' for Tamil)") result = operator(image, operation[0], lang) st.text("Result: " + result)