Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| from PyPDF2 import PdfReader | |
| from gtts import gTTS | |
| import os | |
| import tempfile | |
| # Function to extract text from a PDF | |
| def extract_text_from_pdf(pdf_file): | |
| pdf_reader = PdfReader(pdf_file) | |
| text = "" | |
| for page in pdf_reader.pages: | |
| text += page.extract_text() | |
| return text | |
| # Function to generate audio from text | |
| def text_to_audio(text, language="en"): | |
| tts = gTTS(text=text, lang=language, slow=False) | |
| return tts | |
| # Streamlit App | |
| st.title("PDF to Audio Converter") | |
| st.write("Upload a PDF document, and this application will generate an audio file of its content.") | |
| # File upload | |
| uploaded_pdf = st.file_uploader("Upload your PDF file", type=["pdf"]) | |
| if uploaded_pdf is not None: | |
| with st.spinner("Processing the PDF..."): | |
| # Extract text from PDF | |
| extracted_text = extract_text_from_pdf(uploaded_pdf) | |
| if extracted_text.strip(): # Check if the text is not empty | |
| st.subheader("Extracted Text") | |
| st.text_area("Text from PDF", extracted_text, height=300) | |
| # Generate audio | |
| st.write("Generating audio...") | |
| audio_file = text_to_audio(extracted_text) | |
| # Save audio to a temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio: | |
| temp_audio_path = temp_audio.name | |
| audio_file.save(temp_audio_path) | |
| st.audio(temp_audio_path, format="audio/mp3") | |
| # Provide download button | |
| with open(temp_audio_path, "rb") as file: | |
| st.download_button( | |
| label="Download Audio File", | |
| data=file, | |
| file_name="output_audio.mp3", | |
| mime="audio/mp3" | |
| ) | |
| else: | |
| st.error("The PDF does not contain readable text.") | |