SreekarB commited on
Commit
e96e5df
·
verified ·
1 Parent(s): 571a743

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +47 -12
  2. app.py +83 -0
  3. requirements.txt +4 -0
  4. voice_replay.py +100 -0
README.md CHANGED
@@ -1,12 +1,47 @@
1
- ---
2
- title: SLP
3
- emoji: 🚀
4
- colorFrom: purple
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 5.21.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Voice Recorder and Analyzer
2
+
3
+ A web application that records audio input, plays it back, and analyzes the speech content using Gradio and Hugging Face Spaces.
4
+
5
+ ## Features
6
+
7
+ - Audio recording via microphone
8
+ - Audio playback
9
+ - Speech-to-text transcription using Google's Speech Recognition API
10
+ - Download recordings
11
+
12
+ ## Hugging Face Deployment
13
+
14
+ This application is designed to be deployed on Hugging Face Spaces:
15
+
16
+ 1. Create a new Space on Hugging Face
17
+ 2. Select "Gradio" as the SDK
18
+ 3. Upload these files to your Space:
19
+ - `app.py`
20
+ - `requirements.txt`
21
+
22
+ The application will automatically deploy.
23
+
24
+ ## Local Development
25
+
26
+ To run this application locally:
27
+
28
+ 1. Install the required dependencies:
29
+
30
+ ```bash
31
+ pip install -r requirements.txt
32
+ ```
33
+
34
+ 2. Run the application:
35
+
36
+ ```bash
37
+ python app.py
38
+ ```
39
+
40
+ 3. Open the URL shown in the terminal (typically http://127.0.0.1:7860)
41
+
42
+ ## How to Use
43
+
44
+ 1. Click the microphone button to record your voice
45
+ 2. Click "Record and Analyze" to process the recording
46
+ 3. Listen to the playback and view the transcription
47
+ 4. Download the recording if desired
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import tempfile
4
+ import os
5
+ import wave
6
+ from datetime import datetime
7
+ import speech_recognition as sr
8
+
9
+ def process_audio(audio_data, sample_rate):
10
+ """Process audio data for playback and analysis"""
11
+ # Save the audio data to a temporary file for analysis
12
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
13
+ temp_filename = temp_file.name
14
+ temp_file.close()
15
+
16
+ # Create a WAV file with the audio data
17
+ with wave.open(temp_filename, 'wb') as wf:
18
+ wf.setnchannels(1)
19
+ wf.setsampwidth(2) # 16-bit audio
20
+ wf.setframerate(sample_rate)
21
+ wf.writeframes((audio_data * 32767).astype(np.int16).tobytes())
22
+
23
+ # Perform speech recognition
24
+ recognizer = sr.Recognizer()
25
+ transcription = "Speech not recognized"
26
+
27
+ try:
28
+ with sr.AudioFile(temp_filename) as source:
29
+ audio = recognizer.record(source)
30
+ transcription = recognizer.recognize_google(audio)
31
+ except sr.UnknownValueError:
32
+ transcription = "Speech not recognized"
33
+ except sr.RequestError as e:
34
+ transcription = f"Error: {str(e)}"
35
+
36
+ # Clean up temporary file
37
+ os.unlink(temp_filename)
38
+
39
+ # Save the recording with timestamp (for the user to download)
40
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
41
+ save_filename = f"recording_{timestamp}.wav"
42
+
43
+ with wave.open(save_filename, 'wb') as wf:
44
+ wf.setnchannels(1)
45
+ wf.setsampwidth(2)
46
+ wf.setframerate(sample_rate)
47
+ wf.writeframes((audio_data * 32767).astype(np.int16).tobytes())
48
+
49
+ # Return audio for playback and transcription
50
+ return (sample_rate, audio_data), transcription, save_filename
51
+
52
+ # Create Gradio interface
53
+ with gr.Blocks(title="Voice Recorder and Analyzer") as demo:
54
+ gr.Markdown("# Voice Recorder and Analyzer")
55
+ gr.Markdown("Record your voice, play it back, and see the speech-to-text transcription.")
56
+
57
+ with gr.Row():
58
+ with gr.Column():
59
+ audio_input = gr.Audio(source="microphone", type="numpy", label="Record Audio")
60
+ record_button = gr.Button("Record and Analyze")
61
+
62
+ with gr.Column():
63
+ audio_output = gr.Audio(label="Playback", interactive=False)
64
+ transcription = gr.Textbox(label="Transcription")
65
+ file_output = gr.File(label="Download Recording")
66
+
67
+ record_button.click(
68
+ fn=process_audio,
69
+ inputs=[audio_input],
70
+ outputs=[audio_output, transcription, file_output]
71
+ )
72
+
73
+ gr.Markdown("""
74
+ ## How to use
75
+ 1. Click the microphone button to record your voice
76
+ 2. Click "Record and Analyze" to process the recording
77
+ 3. Listen to the playback and see the transcription
78
+ 4. Download the recording if desired
79
+ """)
80
+
81
+ # Launch the app
82
+ if __name__ == "__main__":
83
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=3.50.0
2
+ numpy>=1.19.0
3
+ SpeechRecognition>=3.8.1
4
+ wave
voice_replay.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pyaudio
2
+ import wave
3
+ import threading
4
+ import numpy as np
5
+ from datetime import datetime
6
+ import speech_recognition as sr
7
+
8
+ # Audio parameters
9
+ FORMAT = pyaudio.paInt16
10
+ CHANNELS = 1
11
+ RATE = 44100
12
+ CHUNK = 1024
13
+ RECORD_SECONDS = 5 # Default recording time
14
+
15
+ def record_and_replay():
16
+ audio = pyaudio.PyAudio()
17
+
18
+ # Create streams for recording and playback
19
+ input_stream = audio.open(format=FORMAT, channels=CHANNELS,
20
+ rate=RATE, input=True,
21
+ frames_per_buffer=CHUNK)
22
+
23
+ output_stream = audio.open(format=FORMAT, channels=CHANNELS,
24
+ rate=RATE, output=True,
25
+ frames_per_buffer=CHUNK)
26
+
27
+ print("Recording and playing back in real-time. Press Ctrl+C to stop.")
28
+
29
+ # Start speech recognition in a separate thread
30
+ analysis_thread = threading.Thread(target=analyze_speech, args=(audio,))
31
+ analysis_thread.daemon = True
32
+ analysis_thread.start()
33
+
34
+ try:
35
+ # Read and play continuously
36
+ while True:
37
+ data = input_stream.read(CHUNK)
38
+ output_stream.write(data)
39
+
40
+ # Save data for potential saving
41
+ frames.append(data)
42
+
43
+ except KeyboardInterrupt:
44
+ print("Recording stopped.")
45
+
46
+ # Clean up
47
+ input_stream.stop_stream()
48
+ input_stream.close()
49
+ output_stream.stop_stream()
50
+ output_stream.close()
51
+ audio.terminate()
52
+
53
+ # Save the recorded audio
54
+ save_audio(frames)
55
+
56
+ def save_audio(frames):
57
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
58
+ filename = f"recording_{timestamp}.wav"
59
+
60
+ wf = wave.open(filename, 'wb')
61
+ wf.setnchannels(CHANNELS)
62
+ wf.setsampwidth(pyaudio.PyAudio().get_sample_size(FORMAT))
63
+ wf.setframerate(RATE)
64
+ wf.writeframes(b''.join(frames))
65
+ wf.close()
66
+
67
+ print(f"Recording saved as {filename}")
68
+
69
+ def analyze_speech(audio):
70
+ """Analyze speech in a separate thread"""
71
+ recognizer = sr.Recognizer()
72
+
73
+ while True:
74
+ # Create a temporary audio file from recent frames
75
+ if len(frames) > RATE // CHUNK * 2: # Analyze every ~2 seconds of audio
76
+ temp_frames = frames[-RATE // CHUNK * 2:]
77
+
78
+ # Save temp audio file
79
+ temp_file = "temp_analysis.wav"
80
+ wf = wave.open(temp_file, 'wb')
81
+ wf.setnchannels(CHANNELS)
82
+ wf.setsampwidth(audio.get_sample_size(FORMAT))
83
+ wf.setframerate(RATE)
84
+ wf.writeframes(b''.join(temp_frames))
85
+ wf.close()
86
+
87
+ # Process with speech recognition
88
+ try:
89
+ with sr.AudioFile(temp_file) as source:
90
+ audio_data = recognizer.record(source)
91
+ text = recognizer.recognize_google(audio_data)
92
+ print(f"Recognized: {text}")
93
+ except sr.UnknownValueError:
94
+ print("Speech not recognized")
95
+ except sr.RequestError as e:
96
+ print(f"Could not request results; {e}")
97
+
98
+ if __name__ == "__main__":
99
+ frames = []
100
+ record_and_replay()