Upload 4 files
Browse files- README.md +47 -12
- app.py +83 -0
- requirements.txt +4 -0
- voice_replay.py +100 -0
README.md
CHANGED
|
@@ -1,12 +1,47 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
-
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Voice Recorder and Analyzer
|
| 2 |
+
|
| 3 |
+
A web application that records audio input, plays it back, and analyzes the speech content using Gradio and Hugging Face Spaces.
|
| 4 |
+
|
| 5 |
+
## Features
|
| 6 |
+
|
| 7 |
+
- Audio recording via microphone
|
| 8 |
+
- Audio playback
|
| 9 |
+
- Speech-to-text transcription using Google's Speech Recognition API
|
| 10 |
+
- Download recordings
|
| 11 |
+
|
| 12 |
+
## Hugging Face Deployment
|
| 13 |
+
|
| 14 |
+
This application is designed to be deployed on Hugging Face Spaces:
|
| 15 |
+
|
| 16 |
+
1. Create a new Space on Hugging Face
|
| 17 |
+
2. Select "Gradio" as the SDK
|
| 18 |
+
3. Upload these files to your Space:
|
| 19 |
+
- `app.py`
|
| 20 |
+
- `requirements.txt`
|
| 21 |
+
|
| 22 |
+
The application will automatically deploy.
|
| 23 |
+
|
| 24 |
+
## Local Development
|
| 25 |
+
|
| 26 |
+
To run this application locally:
|
| 27 |
+
|
| 28 |
+
1. Install the required dependencies:
|
| 29 |
+
|
| 30 |
+
```bash
|
| 31 |
+
pip install -r requirements.txt
|
| 32 |
+
```
|
| 33 |
+
|
| 34 |
+
2. Run the application:
|
| 35 |
+
|
| 36 |
+
```bash
|
| 37 |
+
python app.py
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
3. Open the URL shown in the terminal (typically http://127.0.0.1:7860)
|
| 41 |
+
|
| 42 |
+
## How to Use
|
| 43 |
+
|
| 44 |
+
1. Click the microphone button to record your voice
|
| 45 |
+
2. Click "Record and Analyze" to process the recording
|
| 46 |
+
3. Listen to the playback and view the transcription
|
| 47 |
+
4. Download the recording if desired
|
app.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import numpy as np
|
| 3 |
+
import tempfile
|
| 4 |
+
import os
|
| 5 |
+
import wave
|
| 6 |
+
from datetime import datetime
|
| 7 |
+
import speech_recognition as sr
|
| 8 |
+
|
| 9 |
+
def process_audio(audio_data, sample_rate):
|
| 10 |
+
"""Process audio data for playback and analysis"""
|
| 11 |
+
# Save the audio data to a temporary file for analysis
|
| 12 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
|
| 13 |
+
temp_filename = temp_file.name
|
| 14 |
+
temp_file.close()
|
| 15 |
+
|
| 16 |
+
# Create a WAV file with the audio data
|
| 17 |
+
with wave.open(temp_filename, 'wb') as wf:
|
| 18 |
+
wf.setnchannels(1)
|
| 19 |
+
wf.setsampwidth(2) # 16-bit audio
|
| 20 |
+
wf.setframerate(sample_rate)
|
| 21 |
+
wf.writeframes((audio_data * 32767).astype(np.int16).tobytes())
|
| 22 |
+
|
| 23 |
+
# Perform speech recognition
|
| 24 |
+
recognizer = sr.Recognizer()
|
| 25 |
+
transcription = "Speech not recognized"
|
| 26 |
+
|
| 27 |
+
try:
|
| 28 |
+
with sr.AudioFile(temp_filename) as source:
|
| 29 |
+
audio = recognizer.record(source)
|
| 30 |
+
transcription = recognizer.recognize_google(audio)
|
| 31 |
+
except sr.UnknownValueError:
|
| 32 |
+
transcription = "Speech not recognized"
|
| 33 |
+
except sr.RequestError as e:
|
| 34 |
+
transcription = f"Error: {str(e)}"
|
| 35 |
+
|
| 36 |
+
# Clean up temporary file
|
| 37 |
+
os.unlink(temp_filename)
|
| 38 |
+
|
| 39 |
+
# Save the recording with timestamp (for the user to download)
|
| 40 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 41 |
+
save_filename = f"recording_{timestamp}.wav"
|
| 42 |
+
|
| 43 |
+
with wave.open(save_filename, 'wb') as wf:
|
| 44 |
+
wf.setnchannels(1)
|
| 45 |
+
wf.setsampwidth(2)
|
| 46 |
+
wf.setframerate(sample_rate)
|
| 47 |
+
wf.writeframes((audio_data * 32767).astype(np.int16).tobytes())
|
| 48 |
+
|
| 49 |
+
# Return audio for playback and transcription
|
| 50 |
+
return (sample_rate, audio_data), transcription, save_filename
|
| 51 |
+
|
| 52 |
+
# Create Gradio interface
|
| 53 |
+
with gr.Blocks(title="Voice Recorder and Analyzer") as demo:
|
| 54 |
+
gr.Markdown("# Voice Recorder and Analyzer")
|
| 55 |
+
gr.Markdown("Record your voice, play it back, and see the speech-to-text transcription.")
|
| 56 |
+
|
| 57 |
+
with gr.Row():
|
| 58 |
+
with gr.Column():
|
| 59 |
+
audio_input = gr.Audio(source="microphone", type="numpy", label="Record Audio")
|
| 60 |
+
record_button = gr.Button("Record and Analyze")
|
| 61 |
+
|
| 62 |
+
with gr.Column():
|
| 63 |
+
audio_output = gr.Audio(label="Playback", interactive=False)
|
| 64 |
+
transcription = gr.Textbox(label="Transcription")
|
| 65 |
+
file_output = gr.File(label="Download Recording")
|
| 66 |
+
|
| 67 |
+
record_button.click(
|
| 68 |
+
fn=process_audio,
|
| 69 |
+
inputs=[audio_input],
|
| 70 |
+
outputs=[audio_output, transcription, file_output]
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
gr.Markdown("""
|
| 74 |
+
## How to use
|
| 75 |
+
1. Click the microphone button to record your voice
|
| 76 |
+
2. Click "Record and Analyze" to process the recording
|
| 77 |
+
3. Listen to the playback and see the transcription
|
| 78 |
+
4. Download the recording if desired
|
| 79 |
+
""")
|
| 80 |
+
|
| 81 |
+
# Launch the app
|
| 82 |
+
if __name__ == "__main__":
|
| 83 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=3.50.0
|
| 2 |
+
numpy>=1.19.0
|
| 3 |
+
SpeechRecognition>=3.8.1
|
| 4 |
+
wave
|
voice_replay.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pyaudio
|
| 2 |
+
import wave
|
| 3 |
+
import threading
|
| 4 |
+
import numpy as np
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
import speech_recognition as sr
|
| 7 |
+
|
| 8 |
+
# Audio parameters
|
| 9 |
+
FORMAT = pyaudio.paInt16
|
| 10 |
+
CHANNELS = 1
|
| 11 |
+
RATE = 44100
|
| 12 |
+
CHUNK = 1024
|
| 13 |
+
RECORD_SECONDS = 5 # Default recording time
|
| 14 |
+
|
| 15 |
+
def record_and_replay():
|
| 16 |
+
audio = pyaudio.PyAudio()
|
| 17 |
+
|
| 18 |
+
# Create streams for recording and playback
|
| 19 |
+
input_stream = audio.open(format=FORMAT, channels=CHANNELS,
|
| 20 |
+
rate=RATE, input=True,
|
| 21 |
+
frames_per_buffer=CHUNK)
|
| 22 |
+
|
| 23 |
+
output_stream = audio.open(format=FORMAT, channels=CHANNELS,
|
| 24 |
+
rate=RATE, output=True,
|
| 25 |
+
frames_per_buffer=CHUNK)
|
| 26 |
+
|
| 27 |
+
print("Recording and playing back in real-time. Press Ctrl+C to stop.")
|
| 28 |
+
|
| 29 |
+
# Start speech recognition in a separate thread
|
| 30 |
+
analysis_thread = threading.Thread(target=analyze_speech, args=(audio,))
|
| 31 |
+
analysis_thread.daemon = True
|
| 32 |
+
analysis_thread.start()
|
| 33 |
+
|
| 34 |
+
try:
|
| 35 |
+
# Read and play continuously
|
| 36 |
+
while True:
|
| 37 |
+
data = input_stream.read(CHUNK)
|
| 38 |
+
output_stream.write(data)
|
| 39 |
+
|
| 40 |
+
# Save data for potential saving
|
| 41 |
+
frames.append(data)
|
| 42 |
+
|
| 43 |
+
except KeyboardInterrupt:
|
| 44 |
+
print("Recording stopped.")
|
| 45 |
+
|
| 46 |
+
# Clean up
|
| 47 |
+
input_stream.stop_stream()
|
| 48 |
+
input_stream.close()
|
| 49 |
+
output_stream.stop_stream()
|
| 50 |
+
output_stream.close()
|
| 51 |
+
audio.terminate()
|
| 52 |
+
|
| 53 |
+
# Save the recorded audio
|
| 54 |
+
save_audio(frames)
|
| 55 |
+
|
| 56 |
+
def save_audio(frames):
|
| 57 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 58 |
+
filename = f"recording_{timestamp}.wav"
|
| 59 |
+
|
| 60 |
+
wf = wave.open(filename, 'wb')
|
| 61 |
+
wf.setnchannels(CHANNELS)
|
| 62 |
+
wf.setsampwidth(pyaudio.PyAudio().get_sample_size(FORMAT))
|
| 63 |
+
wf.setframerate(RATE)
|
| 64 |
+
wf.writeframes(b''.join(frames))
|
| 65 |
+
wf.close()
|
| 66 |
+
|
| 67 |
+
print(f"Recording saved as {filename}")
|
| 68 |
+
|
| 69 |
+
def analyze_speech(audio):
|
| 70 |
+
"""Analyze speech in a separate thread"""
|
| 71 |
+
recognizer = sr.Recognizer()
|
| 72 |
+
|
| 73 |
+
while True:
|
| 74 |
+
# Create a temporary audio file from recent frames
|
| 75 |
+
if len(frames) > RATE // CHUNK * 2: # Analyze every ~2 seconds of audio
|
| 76 |
+
temp_frames = frames[-RATE // CHUNK * 2:]
|
| 77 |
+
|
| 78 |
+
# Save temp audio file
|
| 79 |
+
temp_file = "temp_analysis.wav"
|
| 80 |
+
wf = wave.open(temp_file, 'wb')
|
| 81 |
+
wf.setnchannels(CHANNELS)
|
| 82 |
+
wf.setsampwidth(audio.get_sample_size(FORMAT))
|
| 83 |
+
wf.setframerate(RATE)
|
| 84 |
+
wf.writeframes(b''.join(temp_frames))
|
| 85 |
+
wf.close()
|
| 86 |
+
|
| 87 |
+
# Process with speech recognition
|
| 88 |
+
try:
|
| 89 |
+
with sr.AudioFile(temp_file) as source:
|
| 90 |
+
audio_data = recognizer.record(source)
|
| 91 |
+
text = recognizer.recognize_google(audio_data)
|
| 92 |
+
print(f"Recognized: {text}")
|
| 93 |
+
except sr.UnknownValueError:
|
| 94 |
+
print("Speech not recognized")
|
| 95 |
+
except sr.RequestError as e:
|
| 96 |
+
print(f"Could not request results; {e}")
|
| 97 |
+
|
| 98 |
+
if __name__ == "__main__":
|
| 99 |
+
frames = []
|
| 100 |
+
record_and_replay()
|