Spaces:

Sabari231024
/

VISPA2

Sleeping

App Files Files Community

Sabari231024 commited on Oct 25, 2023

Commit

b46c7c0

1 Parent(s): df3f41f

Create app,py

Browse files

Files changed (1) hide show

app,py +62 -0

app,py ADDED Viewed

	@@ -0,0 +1,62 @@

+import gradio as gr
+import requests
+import gtts as gt
+from PIL import Image
+from gradio_client import Client
+from googletrans import Translator
+import cv2
+import numpy as np
+import tempfile
+def trans(text, lang='ta'):
+    translator = Translator()
+    out = translator.translate(text, dest=lang)
+    tts = gt.gTTS(text=out.text, lang=lang)
+    # Save the audio as a temporary file
+    temp_audio_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
+    tts.save(temp_audio_file.name)
+    return temp_audio_file.name
+def object_recognition(image_array, lang):
+    # Convert the NumPy array to PIL Image
+    image = Image.fromarray(image_array)
+    API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
+    headers = {"Authorization": "Bearer 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"}
+    with open("temp_image.jpg", "wb") as f:
+        image.save(f, format="JPEG")
+    with open("temp_image.jpg", "rb") as f:
+        response = requests.post(API_URL, headers=headers, data=f)
+    output = response.json()
+    result = output[0]['generated_text']
+    text = "Object recognition result for the captured image."
+    audio_file = trans(result, lang)
+    return audio_file
+def ocr_detection(image_array, lang):
+    # Convert the NumPy array to PIL Image
+    image = Image.fromarray(image_array)
+    client = Client("https://kneelesh48-tesseract-ocr.hf.space/")
+    result = client.predict(image, "afr", api_name="/tesseract-ocr")
+    print(result)
+    text = "OCR detection result for the captured image."
+    audio_file = trans(result, lang)
+    return audio_file
+def operator(image_array, value, lang):
+    if value == "1":
+        audio_file = object_recognition(image_array, lang)
+    elif value == "2":
+        audio_file = ocr_detection(image_array, lang)
+    else:
+        text = "Sorry, I can't perform this operation."
+        audio_file = trans(text, lang)
+    return audio_file
+# Create Gradio interface
+iface = gr.Interface(fn=operator, inputs=["image", "text", "text"], outputs="audio")
+iface.launch(share=True)