import streamlit as st from PyPDF2 import PdfReader from gtts import gTTS import os import tempfile # Function to extract text from a PDF def extract_text_from_pdf(pdf_file): pdf_reader = PdfReader(pdf_file) text = "" for page in pdf_reader.pages: text += page.extract_text() return text # Function to generate audio from text def text_to_audio(text, language="en"): tts = gTTS(text=text, lang=language, slow=False) return tts # Streamlit App st.title("PDF to Audio Converter") st.write("Upload a PDF document, and this application will generate an audio file of its content.") # File upload uploaded_pdf = st.file_uploader("Upload your PDF file", type=["pdf"]) if uploaded_pdf is not None: with st.spinner("Processing the PDF..."): # Extract text from PDF extracted_text = extract_text_from_pdf(uploaded_pdf) if extracted_text.strip(): # Check if the text is not empty st.subheader("Extracted Text") st.text_area("Text from PDF", extracted_text, height=300) # Generate audio st.write("Generating audio...") audio_file = text_to_audio(extracted_text) # Save audio to a temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio: temp_audio_path = temp_audio.name audio_file.save(temp_audio_path) st.audio(temp_audio_path, format="audio/mp3") # Provide download button with open(temp_audio_path, "rb") as file: st.download_button( label="Download Audio File", data=file, file_name="output_audio.mp3", mime="audio/mp3" ) else: st.error("The PDF does not contain readable text.")