Spaces:

Jamshaid-Saleem
/

PDF_TO_Audio

Sleeping

PDF_TO_Audio / app.py

Create app.py

5f49046 verified about 1 year ago

1.86 kB

	import streamlit as st
	from PyPDF2 import PdfReader
	from gtts import gTTS
	import os
	import tempfile

	# Function to extract text from a PDF
	def extract_text_from_pdf(pdf_file):
	pdf_reader = PdfReader(pdf_file)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text()
	return text

	# Function to generate audio from text
	def text_to_audio(text, language="en"):
	tts = gTTS(text=text, lang=language, slow=False)
	return tts

	# Streamlit App
	st.title("PDF to Audio Converter")
	st.write("Upload a PDF document, and this application will generate an audio file of its content.")

	# File upload
	uploaded_pdf = st.file_uploader("Upload your PDF file", type=["pdf"])

	if uploaded_pdf is not None:
	with st.spinner("Processing the PDF..."):
	# Extract text from PDF
	extracted_text = extract_text_from_pdf(uploaded_pdf)

	if extracted_text.strip(): # Check if the text is not empty
	st.subheader("Extracted Text")
	st.text_area("Text from PDF", extracted_text, height=300)

	# Generate audio
	st.write("Generating audio...")
	audio_file = text_to_audio(extracted_text)

	# Save audio to a temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
	temp_audio_path = temp_audio.name
	audio_file.save(temp_audio_path)

	st.audio(temp_audio_path, format="audio/mp3")

	# Provide download button
	with open(temp_audio_path, "rb") as file:
	st.download_button(
	label="Download Audio File",
	data=file,
	file_name="output_audio.mp3",
	mime="audio/mp3"
	)
	else:
	st.error("The PDF does not contain readable text.")