Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import gtts as gt | |
| from PIL import Image | |
| from gradio_client import Client | |
| from googletrans import Translator | |
| import cv2 | |
| import numpy as np | |
| import tempfile | |
| import base64 | |
| from io import BytesIO | |
| def trans(text, lang='ta'): | |
| translator = Translator() | |
| out = translator.translate(text, dest=lang) | |
| tts = gt.gTTS(text=out.text, lang=lang) | |
| # Save the audio as a temporary file | |
| temp_audio_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) | |
| tts.save(temp_audio_file.name) | |
| return temp_audio_file.name | |
| def object_recognition(image_array, lang): | |
| # Convert the NumPy array to PIL Image | |
| image = Image.fromarray(image_array) | |
| API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large" | |
| headers = {"Authorization": "Bearer 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"} | |
| with open("temp_image.jpg", "wb") as f: | |
| image.save(f, format="JPEG") | |
| with open("temp_image.jpg", "rb") as f: | |
| response = requests.post(API_URL, headers=headers, data=f) | |
| output = response.json() | |
| result = output[0]['generated_text'] | |
| text = "Object recognition result for the captured image." | |
| audio_file = trans(result, lang) | |
| return audio_file | |
| def ocr_detection(image_array, lang): | |
| image = Image.fromarray(image_array) | |
| buffered = BytesIO() | |
| image.save(buffered, format="PNG") | |
| image_base64 = base64.b64encode(buffered.getvalue()).decode() | |
| response = requests.post("https://pragnakalp-ocr-image-to-text.hf.space/run/predict", json={ | |
| "data": [ | |
| "PaddleOCR", | |
| f"data:image/png;base64,{image_base64}", | |
| ] | |
| }).json() | |
| data = response.get("data", []) | |
| text = " ".join(data) | |
| audio_file = trans(text, lang) | |
| return audio_file | |
| def operator(image_array, value, lang): | |
| if value == "1": | |
| audio_file = object_recognition(image_array, lang) | |
| elif value == "2": | |
| audio_file = ocr_detection(image_array, lang) | |
| else: | |
| text = "Sorry, I can't perform this operation." | |
| audio_file = trans(text, lang) | |
| return audio_file | |
| # Create Gradio interface | |
| iface = gr.Interface(fn=operator, inputs=["image", "text", "text"], outputs="audio") | |
| iface.launch(share=True) | |