AudioNotebook / app.py
Arslan17121's picture
Update app.py
021b694 verified
import streamlit as st
import pdfplumber
from transformers import pipeline
from gtts import gTTS
import os
# Function to extract text from PDF
def extract_text_from_pdf(pdf_file):
text = ""
try:
with pdfplumber.open(pdf_file) as pdf:
for page in pdf.pages:
text += page.extract_text() or ""
except Exception as e:
st.error(f"Error reading the PDF: {e}")
return text
# Function to generate discussion points with adjustable summary length
def generate_discussion_points(text, user_prompt=None):
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
# Optionally include the user's prompt to guide the summary
if user_prompt:
text = user_prompt + " " + text
# Adjust the max_length and min_length for a longer summary
summary = summarizer(text, max_length=600, min_length=200, do_sample=False)
return summary[0]['summary_text']
# Function to convert text to speech
def text_to_speech(text, output_file="output.mp3"):
tts = gTTS(text)
tts.save(output_file)
return output_file
# Streamlit app starts here
st.title("๐Ÿ“„ PDF briefing Points Generator")
st.write("Upload a PDF file to generate briefing points and listen to them. Optionally, provide a prompt to guide the summary.")
# File uploader
uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"])
# Input for user-defined summary prompt
user_prompt = st.text_area("Enter a prompt to guide the summary (optional)", height=100)
if uploaded_file:
# Extract text from uploaded PDF
with st.spinner("Extracting text from the uploaded PDF..."):
pdf_text = extract_text_from_pdf(uploaded_file)
if pdf_text.strip():
st.write("### Extracted Text")
st.text_area("Extracted Text", pdf_text, height=200)
# Generate discussion points
with st.spinner("Generating discussion points..."):
try:
discussion_points = generate_discussion_points(pdf_text, user_prompt)
st.write("### Discussion Points")
st.text_area("Discussion Points", discussion_points, height=150)
except Exception as e:
st.error(f"Error during summarization: {e}")
# Convert discussion points to audio
with st.spinner("Converting discussion points to audio..."):
try:
audio_file = text_to_speech(discussion_points)
st.success("Audio file generated successfully!")
except Exception as e:
st.error(f"Error during text-to-speech conversion: {e}")
audio_file = None
if audio_file:
# Audio playback
st.write("### Listen to the Discussion Points")
audio_bytes = open(audio_file, "rb").read()
st.audio(audio_bytes, format="audio/mp3")
# Option to download the audio file
st.download_button(
label="Download Audio",
data=audio_bytes,
file_name="discussion_points.mp3",
mime="audio/mp3"
)
else:
st.error("No text was extracted from the uploaded PDF. Please try with another file.")
else:
st.info("Upload a PDF file to start.")