Spaces:

ummtushar
/

Audio2Text

Sleeping

App Files Files Community

Audio2Text / app.py

ummtushar

Create app.py

3a5f6fa verified 12 months ago

raw

history blame contribute delete

5.11 kB

	import streamlit as st
	import zipfile
	import os
	import tempfile
	import whisper
	from pathlib import Path

	def process_pptx(uploaded_file):
	# Create temporary file to save the uploaded file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.pptx') as tmp_pptx:
	tmp_pptx.write(uploaded_file.getvalue())
	pptx_path = tmp_pptx.name

	# Convert PPTX path to ZIP path
	zip_path = os.path.splitext(pptx_path)[0] + '.zip'
	os.rename(pptx_path, zip_path)

	# Create dictionary to store audio files
	audio_files = {}

	# Create temporary directory for extraction
	temp_dir = tempfile.mkdtemp()

	with st.spinner('Extracting PPTX contents...'):
	# Extract the zip file to temp directory
	with zipfile.ZipFile(zip_path, 'r') as zip_ref:
	zip_ref.extractall(temp_dir)

	# Path to media folder
	media_path = os.path.join(temp_dir, 'ppt', 'media')

	# Check if media folder exists
	if os.path.exists(media_path):
	# Create temporary directory for converted files
	temp_audio_dir = tempfile.mkdtemp()

	# Progress bar for audio conversion
	progress_bar = st.progress(0)
	status_text = st.empty()

	# First count total slides with audio
	total_slides = 0
	slide_num = 1
	while True:
	found = False
	for ext in ['.mp4', '.m4a']:
	if os.path.exists(os.path.join(media_path, f'media{slide_num}{ext}')):
	total_slides += 1
	found = True
	break
	if not found:
	break
	slide_num += 1

	# Process audio files
	slide_num = 1
	processed_slides = 0
	while True:
	# Check for either .mp4 or .m4a file for current slide
	media_file = None
	for ext in ['.mp4', '.m4a']:
	filename = f'media{slide_num}{ext}'
	file_path = os.path.join(media_path, filename)
	if os.path.exists(file_path):
	media_file = file_path
	break

	if not media_file:
	break

	# Create temporary mp3 file
	temp_mp3 = os.path.join(temp_audio_dir, f'temp_{slide_num}.mp3')

	try:
	status_text.text(f'Converting audio from slide {slide_num}...')
	# Convert to mp3 using ffmpeg
	os.system(f'ffmpeg -i "{media_file}" -vn -acodec libmp3lame "{temp_mp3}" -loglevel quiet')
	# Store the temp mp3 file path in dictionary
	audio_files[slide_num-1] = temp_mp3
	processed_slides += 1
	progress_bar.progress(processed_slides / total_slides)
	except Exception as e:
	st.error(f"Error converting slide {slide_num}: {str(e)}")

	slide_num += 1

	progress_bar.empty()
	status_text.empty()

	# Load Whisper model
	with st.spinner('Loading Whisper model...'):
	model = whisper.load_model("base")

	# Dictionary to store transcriptions by slide number
	slide_transcripts = {}

	# Progress bar for transcription
	progress_bar = st.progress(0)
	status_text = st.empty()

	# Transcribe each audio file
	for idx, (slide_num, audio_file) in enumerate(audio_files.items()):
	status_text.text(f'Transcribing slide {slide_num + 1}...')
	# Transcribe the audio file
	result = model.transcribe(audio_file)
	# Store transcription text for this slide
	slide_transcripts[slide_num + 1] = result["text"]
	progress_bar.progress((idx + 1) / len(audio_files))

	progress_bar.empty()
	status_text.empty()

	# Clean up temporary files
	os.unlink(zip_path)

	return slide_transcripts
	return None

	def main():
	st.title('Audio2Text')
	st.write('Upload a PowerPoint file (PPTX) to transcribe its audio content')

	# File uploader
	uploaded_file = st.file_uploader("Choose a PPTX file", type="pptx")

	if uploaded_file is not None:
	# Check file size (2GB limit)
	if uploaded_file.size > 2 * 1024 * 1024 * 1024:
	st.error("File size exceeds 2GB limit")
	return

	st.write("Processing... This may take a while depending on the number and length of audio clips.")

	# Process the file
	transcripts = process_pptx(uploaded_file)

	if transcripts:
	st.subheader("Transcription Results")
	for slide_num, text in sorted(transcripts.items()):
	st.markdown(f"Slide {slide_num}")
	st.write(text)
	st.markdown("---")
	else:
	st.warning("No audio content found in the PowerPoint file.")

	if __name__ == "__main__":
	main()