Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from transformers import pipeline | |
| from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings | |
| import av | |
| import numpy as np | |
| # ASR Model | |
| pipe = pipeline("automatic-speech-recognition", model="facebook/s2t-medium-mustc-multilingual-st") | |
| # Function to process audio frames | |
| def audio_callback(frame: av.AudioFrame) -> av.AudioFrame: | |
| audio_data = frame.to_ndarray().mean(axis=1).astype(np.int16) # Convert to mono | |
| if "audio_buffer" not in st.session_state: | |
| st.session_state.audio_buffer = b"" | |
| st.session_state.audio_buffer += audio_data.tobytes() | |
| return frame | |
| # Transcribe audio buffer | |
| def transcribe_audio(): | |
| if "audio_buffer" in st.session_state: | |
| audio_data = st.session_state.audio_buffer | |
| result = pipe(audio_data) | |
| st.write("Transcription:", result["text"]) | |
| # Streamlit UI | |
| st.title("Voice Recognition App") | |
| webrtc_streamer( | |
| key="audio", | |
| mode=WebRtcMode.SENDRECV, | |
| audio_processor_factory=lambda: audio_callback, | |
| media_stream_constraints={"audio": True, "video": False}, | |
| ) | |
| if st.button("Transcribe Audio"): | |
| transcribe_audio() | |