Spaces:

SreekarB
/

SLP

Sleeping

App Files Files Community

SreekarB commited on Mar 18, 2025

Commit

e96e5df

verified ·

1 Parent(s): 571a743

Upload 4 files

Browse files

Files changed (4) hide show

README.md +47 -12
app.py +83 -0
requirements.txt +4 -0
voice_replay.py +100 -0

README.md CHANGED Viewed

@@ -1,12 +1,47 @@
----
-title: SLP
-emoji: 🚀
-colorFrom: purple
-colorTo: indigo
-sdk: gradio
-sdk_version: 5.21.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# Voice Recorder and Analyzer
+A web application that records audio input, plays it back, and analyzes the speech content using Gradio and Hugging Face Spaces.
+## Features
+- Audio recording via microphone
+- Audio playback
+- Speech-to-text transcription using Google's Speech Recognition API
+- Download recordings
+## Hugging Face Deployment
+This application is designed to be deployed on Hugging Face Spaces:
+1. Create a new Space on Hugging Face
+2. Select "Gradio" as the SDK
+3. Upload these files to your Space:
+   - `app.py`
+   - `requirements.txt`
+The application will automatically deploy.
+## Local Development
+To run this application locally:
+1. Install the required dependencies:
+```bash
+pip install -r requirements.txt
+```
+2. Run the application:
+```bash
+python app.py
+```
+3. Open the URL shown in the terminal (typically http://127.0.0.1:7860)
+## How to Use
+1. Click the microphone button to record your voice
+2. Click "Record and Analyze" to process the recording
+3. Listen to the playback and view the transcription
+4. Download the recording if desired

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import gradio as gr
+import numpy as np
+import tempfile
+import os
+import wave
+from datetime import datetime
+import speech_recognition as sr
+def process_audio(audio_data, sample_rate):
+    """Process audio data for playback and analysis"""
+    # Save the audio data to a temporary file for analysis
+    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
+    temp_filename = temp_file.name
+    temp_file.close()
+    # Create a WAV file with the audio data
+    with wave.open(temp_filename, 'wb') as wf:
+        wf.setnchannels(1)
+        wf.setsampwidth(2)  # 16-bit audio
+        wf.setframerate(sample_rate)
+        wf.writeframes((audio_data * 32767).astype(np.int16).tobytes())
+    # Perform speech recognition
+    recognizer = sr.Recognizer()
+    transcription = "Speech not recognized"
+    try:
+        with sr.AudioFile(temp_filename) as source:
+            audio = recognizer.record(source)
+            transcription = recognizer.recognize_google(audio)
+    except sr.UnknownValueError:
+        transcription = "Speech not recognized"
+    except sr.RequestError as e:
+        transcription = f"Error: {str(e)}"
+    # Clean up temporary file
+    os.unlink(temp_filename)
+    # Save the recording with timestamp (for the user to download)
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    save_filename = f"recording_{timestamp}.wav"
+    with wave.open(save_filename, 'wb') as wf:
+        wf.setnchannels(1)
+        wf.setsampwidth(2)
+        wf.setframerate(sample_rate)
+        wf.writeframes((audio_data * 32767).astype(np.int16).tobytes())
+    # Return audio for playback and transcription
+    return (sample_rate, audio_data), transcription, save_filename
+# Create Gradio interface
+with gr.Blocks(title="Voice Recorder and Analyzer") as demo:
+    gr.Markdown("# Voice Recorder and Analyzer")
+    gr.Markdown("Record your voice, play it back, and see the speech-to-text transcription.")
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(source="microphone", type="numpy", label="Record Audio")
+            record_button = gr.Button("Record and Analyze")
+        with gr.Column():
+            audio_output = gr.Audio(label="Playback", interactive=False)
+            transcription = gr.Textbox(label="Transcription")
+            file_output = gr.File(label="Download Recording")
+    record_button.click(
+        fn=process_audio,
+        inputs=[audio_input],
+        outputs=[audio_output, transcription, file_output]
+    )
+    gr.Markdown("""
+    ## How to use
+    1. Click the microphone button to record your voice
+    2. Click "Record and Analyze" to process the recording
+    3. Listen to the playback and see the transcription
+    4. Download the recording if desired
+    """)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio>=3.50.0
+numpy>=1.19.0
+SpeechRecognition>=3.8.1
+wave

voice_replay.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import pyaudio
+import wave
+import threading
+import numpy as np
+from datetime import datetime
+import speech_recognition as sr
+# Audio parameters
+FORMAT = pyaudio.paInt16
+CHANNELS = 1
+RATE = 44100
+CHUNK = 1024
+RECORD_SECONDS = 5  # Default recording time
+def record_and_replay():
+    audio = pyaudio.PyAudio()
+    # Create streams for recording and playback
+    input_stream = audio.open(format=FORMAT, channels=CHANNELS,
+                    rate=RATE, input=True,
+                    frames_per_buffer=CHUNK)
+    output_stream = audio.open(format=FORMAT, channels=CHANNELS,
+                    rate=RATE, output=True,
+                    frames_per_buffer=CHUNK)
+    print("Recording and playing back in real-time. Press Ctrl+C to stop.")
+    # Start speech recognition in a separate thread
+    analysis_thread = threading.Thread(target=analyze_speech, args=(audio,))
+    analysis_thread.daemon = True
+    analysis_thread.start()
+    try:
+        # Read and play continuously
+        while True:
+            data = input_stream.read(CHUNK)
+            output_stream.write(data)
+            # Save data for potential saving
+            frames.append(data)
+    except KeyboardInterrupt:
+        print("Recording stopped.")
+    # Clean up
+    input_stream.stop_stream()
+    input_stream.close()
+    output_stream.stop_stream()
+    output_stream.close()
+    audio.terminate()
+    # Save the recorded audio
+    save_audio(frames)
+def save_audio(frames):
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    filename = f"recording_{timestamp}.wav"
+    wf = wave.open(filename, 'wb')
+    wf.setnchannels(CHANNELS)
+    wf.setsampwidth(pyaudio.PyAudio().get_sample_size(FORMAT))
+    wf.setframerate(RATE)
+    wf.writeframes(b''.join(frames))
+    wf.close()
+    print(f"Recording saved as {filename}")
+def analyze_speech(audio):
+    """Analyze speech in a separate thread"""
+    recognizer = sr.Recognizer()
+    while True:
+        # Create a temporary audio file from recent frames
+        if len(frames) > RATE // CHUNK * 2:  # Analyze every ~2 seconds of audio
+            temp_frames = frames[-RATE // CHUNK * 2:]
+            # Save temp audio file
+            temp_file = "temp_analysis.wav"
+            wf = wave.open(temp_file, 'wb')
+            wf.setnchannels(CHANNELS)
+            wf.setsampwidth(audio.get_sample_size(FORMAT))
+            wf.setframerate(RATE)
+            wf.writeframes(b''.join(temp_frames))
+            wf.close()
+            # Process with speech recognition
+            try:
+                with sr.AudioFile(temp_file) as source:
+                    audio_data = recognizer.record(source)
+                    text = recognizer.recognize_google(audio_data)
+                    print(f"Recognized: {text}")
+            except sr.UnknownValueError:
+                print("Speech not recognized")
+            except sr.RequestError as e:
+                print(f"Could not request results; {e}")
+if __name__ == "__main__":
+    frames = []
+    record_and_replay()