PDF_TO_Audio / app.py
Jamshaid-Saleem's picture
Create app.py
5f49046 verified
import streamlit as st
from PyPDF2 import PdfReader
from gtts import gTTS
import os
import tempfile
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_file):
pdf_reader = PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Function to generate audio from text
def text_to_audio(text, language="en"):
tts = gTTS(text=text, lang=language, slow=False)
return tts
# Streamlit App
st.title("PDF to Audio Converter")
st.write("Upload a PDF document, and this application will generate an audio file of its content.")
# File upload
uploaded_pdf = st.file_uploader("Upload your PDF file", type=["pdf"])
if uploaded_pdf is not None:
with st.spinner("Processing the PDF..."):
# Extract text from PDF
extracted_text = extract_text_from_pdf(uploaded_pdf)
if extracted_text.strip(): # Check if the text is not empty
st.subheader("Extracted Text")
st.text_area("Text from PDF", extracted_text, height=300)
# Generate audio
st.write("Generating audio...")
audio_file = text_to_audio(extracted_text)
# Save audio to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
temp_audio_path = temp_audio.name
audio_file.save(temp_audio_path)
st.audio(temp_audio_path, format="audio/mp3")
# Provide download button
with open(temp_audio_path, "rb") as file:
st.download_button(
label="Download Audio File",
data=file,
file_name="output_audio.mp3",
mime="audio/mp3"
)
else:
st.error("The PDF does not contain readable text.")