Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor | |
| import torch | |
| import numpy as np | |
| import tempfile | |
| import wave | |
| # Load Wav2Vec2 model and processor | |
| processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h") | |
| model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h") | |
| # Streamlit App | |
| st.title("Phonics/Personalized Reading App") | |
| st.write("Record your audio and we will transcribe it.") | |
| # Audio recording using HTML5 | |
| record_button = st.button("Record Audio") | |
| if record_button: | |
| st.markdown(""" | |
| <audio id="audio" controls></audio> | |
| <button id="start" onclick="startRecording()">Start Recording</button> | |
| <button id="stop" onclick="stopRecording()" disabled>Stop Recording</button> | |
| <script> | |
| let mediaRecorder; | |
| let audioChunks = []; | |
| async function startRecording() { | |
| const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); | |
| mediaRecorder = new MediaRecorder(stream); | |
| mediaRecorder.ondataavailable = event => { | |
| audioChunks.push(event.data); | |
| }; | |
| mediaRecorder.onstop = () => { | |
| const audioBlob = new Blob(audioChunks, { type: 'audio/wav' }); | |
| const audioUrl = URL.createObjectURL(audioBlob); | |
| const audioElement = document.getElementById('audio'); | |
| audioElement.src = audioUrl; | |
| // Prepare to send audio to server | |
| const formData = new FormData(); | |
| formData.append('audio', audioBlob, 'recording.wav'); | |
| fetch('/upload', { | |
| method: 'POST', | |
| body: formData | |
| }).then(response => response.json()).then(data => { | |
| st.session_state.transcription = data.transcription; | |
| st.experimental_rerun(); // Refresh the app to show the transcription | |
| }); | |
| }; | |
| mediaRecorder.start(); | |
| document.getElementById('start').disabled = true; | |
| document.getElementById('stop').disabled = false; | |
| } | |
| function stopRecording() { | |
| mediaRecorder.stop(); | |
| document.getElementById('start').disabled = false; | |
| document.getElementById('stop').disabled = true; | |
| } | |
| </script> | |
| """, unsafe_allow_html=True) | |
| # Display the transcription | |
| if 'transcription' in st.session_state: | |
| st.write("Transcription:") | |
| st.write(st.session_state.transcription) | |
| # Handle audio file upload | |
| uploaded_file = st.file_uploader("Or upload your audio file", type=["wav", "mp3"]) | |
| if uploaded_file is not None: | |
| # Save uploaded audio file to a temporary file | |
| with tempfile.NamedTemporaryFile(delete=True) as temp_file: | |
| temp_file.write(uploaded_file.read()) | |
| temp_file.flush() | |
| # Process the audio file for transcription | |
| audio_input = processor(temp_file.name, sampling_rate=16000, return_tensors="pt", padding=True) | |
| with torch.no_grad(): | |
| logits = model(audio_input.input_values).logits | |
| predicted_ids = torch.argmax(logits, dim=-1) | |
| transcription = processor.batch_decode(predicted_ids) | |
| st.session_state.transcription = transcription[0] # Store transcription | |
| st.experimental_rerun() # Refresh the app to show the transcription | |