Spaces:

ibrahim313
/

phonix

Sleeping

App Files Files Community

phonix / app.py

ibrahim313

Update app.py

d7a9f29 verified over 1 year ago

raw

history blame contribute delete

3.55 kB

	import streamlit as st
	from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
	import torch
	import numpy as np
	import tempfile
	import wave

	# Load Wav2Vec2 model and processor
	processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-base-960h")
	model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")

	# Streamlit App
	st.title("Phonics/Personalized Reading App")
	st.write("Record your audio and we will transcribe it.")

	# Audio recording using HTML5
	record_button = st.button("Record Audio")

	if record_button:
	st.markdown("""
	<audio id="audio" controls></audio>
	<button id="start" onclick="startRecording()">Start Recording</button>
	<button id="stop" onclick="stopRecording()" disabled>Stop Recording</button>
	<script>
	let mediaRecorder;
	let audioChunks = [];

	async function startRecording() {
	const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
	mediaRecorder = new MediaRecorder(stream);
	mediaRecorder.ondataavailable = event => {
	audioChunks.push(event.data);
	};

	mediaRecorder.onstop = () => {
	const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
	const audioUrl = URL.createObjectURL(audioBlob);
	const audioElement = document.getElementById('audio');
	audioElement.src = audioUrl;

	// Prepare to send audio to server
	const formData = new FormData();
	formData.append('audio', audioBlob, 'recording.wav');

	fetch('/upload', {
	method: 'POST',
	body: formData
	}).then(response => response.json()).then(data => {
	st.session_state.transcription = data.transcription;
	st.experimental_rerun(); // Refresh the app to show the transcription
	});
	};

	mediaRecorder.start();
	document.getElementById('start').disabled = true;
	document.getElementById('stop').disabled = false;
	}

	function stopRecording() {
	mediaRecorder.stop();
	document.getElementById('start').disabled = false;
	document.getElementById('stop').disabled = true;
	}
	</script>
	""", unsafe_allow_html=True)

	# Display the transcription
	if 'transcription' in st.session_state:
	st.write("Transcription:")
	st.write(st.session_state.transcription)

	# Handle audio file upload
	uploaded_file = st.file_uploader("Or upload your audio file", type=["wav", "mp3"])

	if uploaded_file is not None:
	# Save uploaded audio file to a temporary file
	with tempfile.NamedTemporaryFile(delete=True) as temp_file:
	temp_file.write(uploaded_file.read())
	temp_file.flush()

	# Process the audio file for transcription
	audio_input = processor(temp_file.name, sampling_rate=16000, return_tensors="pt", padding=True)

	with torch.no_grad():
	logits = model(audio_input.input_values).logits

	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = processor.batch_decode(predicted_ids)

	st.session_state.transcription = transcription[0] # Store transcription
	st.experimental_rerun() # Refresh the app to show the transcription