Spaces:

Sabari231024
/

VISPA2

Sleeping

App Files Files Community

VISPA2 / app.py

Sabari231024

Update app.py

6e41706 about 2 years ago

raw

history blame contribute delete

2.28 kB

	import gradio as gr
	import requests
	import gtts as gt
	from PIL import Image
	from gradio_client import Client
	from googletrans import Translator
	import cv2
	import numpy as np
	import tempfile
	import base64
	from io import BytesIO

	def trans(text, lang='ta'):
	translator = Translator()
	out = translator.translate(text, dest=lang)
	tts = gt.gTTS(text=out.text, lang=lang)
	# Save the audio as a temporary file
	temp_audio_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
	tts.save(temp_audio_file.name)
	return temp_audio_file.name

	def object_recognition(image_array, lang):
	# Convert the NumPy array to PIL Image
	image = Image.fromarray(image_array)

	API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
	headers = {"Authorization": "Bearer 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"}

	with open("temp_image.jpg", "wb") as f:
	image.save(f, format="JPEG")

	with open("temp_image.jpg", "rb") as f:
	response = requests.post(API_URL, headers=headers, data=f)

	output = response.json()
	result = output[0]['generated_text']
	text = "Object recognition result for the captured image."
	audio_file = trans(result, lang)
	return audio_file

	def ocr_detection(image_array, lang):
	image = Image.fromarray(image_array)

	buffered = BytesIO()
	image.save(buffered, format="PNG")
	image_base64 = base64.b64encode(buffered.getvalue()).decode()

	response = requests.post("https://pragnakalp-ocr-image-to-text.hf.space/run/predict", json={
	"data": [
	"PaddleOCR",
	f"data:image/png;base64,{image_base64}",
	]
	}).json()

	data = response.get("data", [])

	text = " ".join(data)
	audio_file = trans(text, lang)

	return audio_file


	def operator(image_array, value, lang):
	if value == "1":
	audio_file = object_recognition(image_array, lang)
	elif value == "2":
	audio_file = ocr_detection(image_array, lang)
	else:
	text = "Sorry, I can't perform this operation."
	audio_file = trans(text, lang)
	return audio_file

	# Create Gradio interface
	iface = gr.Interface(fn=operator, inputs=["image", "text", "text"], outputs="audio")
	iface.launch(share=True)