Sapna36 commited on
Commit
76dd525
·
verified ·
1 Parent(s): 4208fcc

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import torch
4
+ import speech_recognition as sr
5
+ from transformers import pipeline
6
+ from gtts import gTTS
7
+ from flask import Flask, request, jsonify
8
+ import gradio as gr
9
+
10
+ app = Flask(__name__)
11
+
12
+ # Load translation model
13
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ur-en")
14
+
15
+ # Speech recognition function
16
+ def recognize_speech(audio_file):
17
+ recognizer = sr.Recognizer()
18
+ with sr.AudioFile(audio_file) as source:
19
+ audio_data = recognizer.record(source)
20
+ try:
21
+ text = recognizer.recognize_google(audio_data, language="ur-PK") # Detect Urdu/Pashto
22
+ return text
23
+ except sr.UnknownValueError:
24
+ return "Could not understand audio"
25
+ except sr.RequestError:
26
+ return "Could not request results"
27
+
28
+ # Text-to-speech conversion
29
+ def text_to_speech(text, lang):
30
+ tts = gTTS(text=text, lang=lang)
31
+ temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
32
+ temp_audio.close()
33
+ tts.save(temp_audio.name)
34
+ return temp_audio.name
35
+
36
+ @app.route("/process", methods=["POST"])
37
+ def process_audio():
38
+ file = request.files["audio"]
39
+ filename = "input.wav"
40
+ file.save(filename)
41
+
42
+ text = recognize_speech(filename)
43
+
44
+ if "پښتو" in text or "Pashto" in text:
45
+ response = translator(text, src="ps", tgt="ur")[0]["translation_text"]
46
+ response_audio = text_to_speech(response, "ur")
47
+ else:
48
+ response = translator(text, src="ur", tgt="ps")[0]["translation_text"]
49
+ response_audio = text_to_speech(response, "ps")
50
+
51
+ return jsonify({"response": response, "audio": response_audio})
52
+
53
+ # Gradio UI
54
+ def chat_interface(audio_input):
55
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file:
56
+ temp_file.write(audio_input)
57
+ temp_file_path = temp_file.name
58
+
59
+ text = recognize_speech(temp_file_path)
60
+
61
+ if "پښتو" in text or "Pashto" in text:
62
+ response = translator(text, src="ps", tgt="ur")[0]["translation_text"]
63
+ response_audio = text_to_speech(response, "ur")
64
+ else:
65
+ response = translator(text, src="ur", tgt="ps")[0]["translation_text"]
66
+ response_audio = text_to_speech(response, "ps")
67
+
68
+ return response, response_audio
69
+
70
+ gr.Interface(
71
+ fn=chat_interface,
72
+ inputs=gr.Audio(source="microphone", type="filepath"),
73
+ outputs=[gr.Textbox(label="Translation"), gr.Audio(label="AI Voice Response")],
74
+ live=True
75
+ ).launch()
76
+
77
+ if __name__ == "__main__":
78
+ app.run(host="0.0.0.0", port=7860)