Spaces:

Jamshaid-Saleem
/

PDF_TO_Audio

Running

File size: 1,864 Bytes

5f49046

import streamlit as st
from PyPDF2 import PdfReader
from gtts import gTTS
import os
import tempfile

# Function to extract text from a PDF
def extract_text_from_pdf(pdf_file):
    pdf_reader = PdfReader(pdf_file)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

# Function to generate audio from text
def text_to_audio(text, language="en"):
    tts = gTTS(text=text, lang=language, slow=False)
    return tts

# Streamlit App
st.title("PDF to Audio Converter")
st.write("Upload a PDF document, and this application will generate an audio file of its content.")

# File upload
uploaded_pdf = st.file_uploader("Upload your PDF file", type=["pdf"])

if uploaded_pdf is not None:
    with st.spinner("Processing the PDF..."):
        # Extract text from PDF
        extracted_text = extract_text_from_pdf(uploaded_pdf)

        if extracted_text.strip():  # Check if the text is not empty
            st.subheader("Extracted Text")
            st.text_area("Text from PDF", extracted_text, height=300)

            # Generate audio
            st.write("Generating audio...")
            audio_file = text_to_audio(extracted_text)

            # Save audio to a temporary file
            with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio:
                temp_audio_path = temp_audio.name
                audio_file.save(temp_audio_path)

            st.audio(temp_audio_path, format="audio/mp3")

            # Provide download button
            with open(temp_audio_path, "rb") as file:
                st.download_button(
                    label="Download Audio File",
                    data=file,
                    file_name="output_audio.mp3",
                    mime="audio/mp3"
                )
        else:
            st.error("The PDF does not contain readable text.")