Spaces:
Running
Running
File size: 1,864 Bytes
5f49046 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import streamlit as st
from PyPDF2 import PdfReader
from gtts import gTTS
import os
import tempfile
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_file):
pdf_reader = PdfReader(pdf_file)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Function to generate audio from text
def text_to_audio(text, language="en"):
tts = gTTS(text=text, lang=language, slow=False)
return tts
# Streamlit App
st.title("PDF to Audio Converter")
st.write("Upload a PDF document, and this application will generate an audio file of its content.")
# File upload
uploaded_pdf = st.file_uploader("Upload your PDF file", type=["pdf"])
if uploaded_pdf is not None:
with st.spinner("Processing the PDF..."):
# Extract text from PDF
extracted_text = extract_text_from_pdf(uploaded_pdf)
if extracted_text.strip(): # Check if the text is not empty
st.subheader("Extracted Text")
st.text_area("Text from PDF", extracted_text, height=300)
# Generate audio
st.write("Generating audio...")
audio_file = text_to_audio(extracted_text)
# Save audio to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
temp_audio_path = temp_audio.name
audio_file.save(temp_audio_path)
st.audio(temp_audio_path, format="audio/mp3")
# Provide download button
with open(temp_audio_path, "rb") as file:
st.download_button(
label="Download Audio File",
data=file,
file_name="output_audio.mp3",
mime="audio/mp3"
)
else:
st.error("The PDF does not contain readable text.")
|