Subbu1304 commited on
Commit
ec3b8b9
Β·
verified Β·
1 Parent(s): 47f1f46

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -47
app.py CHANGED
@@ -1,49 +1,93 @@
1
- import gradio as gr
2
- import speech_recognition as sr
3
-
4
- def recognize_speech():
5
- # Initialize recognizer
6
- recognizer = sr.Recognizer()
7
-
8
- # Microphone input
9
- with sr.Microphone() as source:
10
- print("Say something...")
11
- recognizer.adjust_for_ambient_noise(source)
12
- audio = recognizer.listen(source)
13
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  try:
15
- # Recognize speech using Google Web Speech API
16
- text = recognizer.recognize_google(audio)
17
- print("You said: " + text)
18
- return text
19
- except sr.UnknownValueError:
20
- print("Sorry, I could not understand the audio.")
21
- return "Sorry, I could not understand."
22
- except sr.RequestError:
23
- print("Could not request results from Google Speech Recognition service.")
24
- return "Could not request results."
25
-
26
- def voice_login_interface():
27
- # Step 1: Welcome message
28
- welcome_text = "Welcome to Briyani Hub."
29
-
30
- # Step 2: Voice input for name
31
- name_prompt = "Please enter your name letter by letter."
32
- name_input = recognize_speech()
33
-
34
- # Step 3: Voice input for Gmail
35
- gmail_prompt = "Please enter your Gmail."
36
- gmail_input = recognize_speech()
37
-
38
- return f"{welcome_text}\n{name_prompt} {name_input}\n{gmail_prompt} {gmail_input}"
39
-
40
- # Create the Gradio interface
41
- iface = gr.Interface(fn=voice_login_interface,
42
- inputs=[],
43
- outputs="text",
44
- live=True,
45
- title="Briyani Hub Voice Login",
46
- description="A voice-assisted login page for Briyani Hub")
47
-
48
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
49
 
 
1
+ import torch
2
+ from flask import Flask, render_template, request, jsonify
3
+ import os
4
+ from transformers import pipeline
5
+ from gtts import gTTS
6
+ from pydub import AudioSegment
7
+ from pydub.silence import detect_nonsilent
8
+ from waitress import serve
9
+
10
+ app = Flask(__name__)
11
+
12
+ # Use whisper-small for faster processing and better speed
13
+ device = "cuda" if torch.cuda.is_available() else "cpu"
14
+ asr_model = pipeline("automatic-speech-recognition", model="openai/whisper-small", device=0 if device == "cuda" else -1)
15
+
16
+ # Function to generate audio prompts
17
+ def generate_audio_prompt(text, filename):
18
+ tts = gTTS(text=text, lang="en")
19
+ tts.save(os.path.join("static", filename))
20
+
21
+ # Generate required voice prompts
22
+ prompts = {
23
+ "welcome": "Welcome to Biryani Hub.",
24
+ "ask_name": "Tell me your name.",
25
+ "ask_email": "Please provide your email address.",
26
+ "thank_you": "Thank you for registration."
27
+ }
28
+
29
+ for key, text in prompts.items():
30
+ generate_audio_prompt(text, f"{key}.mp3")
31
+
32
+ # Symbol mapping for proper recognition
33
+ SYMBOL_MAPPING = {
34
+ "at the rate": "@",
35
+ "at": "@",
36
+ "dot": ".",
37
+ "underscore": "_",
38
+ "hash": "#",
39
+ "plus": "+",
40
+ "dash": "-",
41
+ "comma": ",",
42
+ "space": " "
43
+ }
44
+
45
+ # Function to convert audio to WAV format
46
+ def convert_to_wav(input_path, output_path):
47
  try:
48
+ audio = AudioSegment.from_file(input_path)
49
+ audio = audio.set_frame_rate(16000).set_channels(1) # Convert to 16kHz, mono
50
+ audio.export(output_path, format="wav")
51
+ except Exception as e:
52
+ raise Exception(f"Audio conversion failed: {str(e)}")
53
+
54
+ # Function to check if audio contains actual speech
55
+ def is_silent_audio(audio_path):
56
+ audio = AudioSegment.from_wav(audio_path)
57
+ nonsilent_parts = detect_nonsilent(audio, min_silence_len=500, silence_thresh=audio.dBFS-16) # Reduced silence duration
58
+ return len(nonsilent_parts) == 0 # If no speech detected
59
+
60
+ @app.route("/")
61
+ def index():
62
+ return render_template("index.html")
63
+
64
+ @app.route("/transcribe", methods=["POST"])
65
+ def transcribe():
66
+ if "audio" not in request.files:
67
+ return jsonify({"error": "No audio file provided"}), 400
68
+
69
+ audio_file = request.files["audio"]
70
+ input_audio_path = os.path.join("static", "temp_input.wav")
71
+ output_audio_path = os.path.join("static", "temp.wav")
72
+ audio_file.save(input_audio_path)
73
+
74
+ try:
75
+ # Convert to WAV
76
+ convert_to_wav(input_audio_path, output_audio_path)
77
+
78
+ # Check for silence
79
+ if is_silent_audio(output_audio_path):
80
+ return jsonify({"error": "No speech detected. Please try again."}), 400
81
+
82
+ # Use Whisper ASR model for transcription
83
+ result = asr_model(output_audio_path, generate_kwargs={"language": "en"})
84
+ transcribed_text = result["text"].strip().capitalize()
85
+
86
+ return jsonify({"text": transcribed_text})
87
+ except Exception as e:
88
+ return jsonify({"error": f"Speech recognition error: {str(e)}"}), 500
89
+
90
+ # Start Production Server
91
+ if __name__ == "__main__":
92
+ serve(app, host="0.0.0.0", port=7860)
93