Sabari231024 commited on
Commit
b46c7c0
·
1 Parent(s): df3f41f

Create app,py

Browse files
Files changed (1) hide show
  1. app,py +62 -0
app,py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import gtts as gt
4
+ from PIL import Image
5
+ from gradio_client import Client
6
+ from googletrans import Translator
7
+ import cv2
8
+ import numpy as np
9
+ import tempfile
10
+
11
+ def trans(text, lang='ta'):
12
+ translator = Translator()
13
+ out = translator.translate(text, dest=lang)
14
+ tts = gt.gTTS(text=out.text, lang=lang)
15
+ # Save the audio as a temporary file
16
+ temp_audio_file = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False)
17
+ tts.save(temp_audio_file.name)
18
+ return temp_audio_file.name
19
+
20
+ def object_recognition(image_array, lang):
21
+ # Convert the NumPy array to PIL Image
22
+ image = Image.fromarray(image_array)
23
+
24
+ API_URL = "https://api-inference.huggingface.co/models/Salesforce/blip-image-captioning-large"
25
+ headers = {"Authorization": "Bearer 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"}
26
+
27
+ with open("temp_image.jpg", "wb") as f:
28
+ image.save(f, format="JPEG")
29
+
30
+ with open("temp_image.jpg", "rb") as f:
31
+ response = requests.post(API_URL, headers=headers, data=f)
32
+
33
+ output = response.json()
34
+ result = output[0]['generated_text']
35
+ text = "Object recognition result for the captured image."
36
+ audio_file = trans(result, lang)
37
+ return audio_file
38
+
39
+ def ocr_detection(image_array, lang):
40
+ # Convert the NumPy array to PIL Image
41
+ image = Image.fromarray(image_array)
42
+
43
+ client = Client("https://kneelesh48-tesseract-ocr.hf.space/")
44
+ result = client.predict(image, "afr", api_name="/tesseract-ocr")
45
+ print(result)
46
+ text = "OCR detection result for the captured image."
47
+ audio_file = trans(result, lang)
48
+ return audio_file
49
+
50
+ def operator(image_array, value, lang):
51
+ if value == "1":
52
+ audio_file = object_recognition(image_array, lang)
53
+ elif value == "2":
54
+ audio_file = ocr_detection(image_array, lang)
55
+ else:
56
+ text = "Sorry, I can't perform this operation."
57
+ audio_file = trans(text, lang)
58
+ return audio_file
59
+
60
+ # Create Gradio interface
61
+ iface = gr.Interface(fn=operator, inputs=["image", "text", "text"], outputs="audio")
62
+ iface.launch(share=True)