import streamlit as st import pdfplumber from transformers import pipeline from gtts import gTTS import os # Function to extract text from PDF def extract_text_from_pdf(pdf_file): text = "" try: with pdfplumber.open(pdf_file) as pdf: for page in pdf.pages: text += page.extract_text() or "" except Exception as e: st.error(f"Error reading the PDF: {e}") return text # Function to generate discussion points with adjustable summary length def generate_discussion_points(text, user_prompt=None): summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # Optionally include the user's prompt to guide the summary if user_prompt: text = user_prompt + " " + text # Adjust the max_length and min_length for a longer summary summary = summarizer(text, max_length=600, min_length=200, do_sample=False) return summary[0]['summary_text'] # Function to convert text to speech def text_to_speech(text, output_file="output.mp3"): tts = gTTS(text) tts.save(output_file) return output_file # Streamlit app starts here st.title("📄 PDF briefing Points Generator") st.write("Upload a PDF file to generate briefing points and listen to them. Optionally, provide a prompt to guide the summary.") # File uploader uploaded_file = st.file_uploader("Upload a PDF", type=["pdf"]) # Input for user-defined summary prompt user_prompt = st.text_area("Enter a prompt to guide the summary (optional)", height=100) if uploaded_file: # Extract text from uploaded PDF with st.spinner("Extracting text from the uploaded PDF..."): pdf_text = extract_text_from_pdf(uploaded_file) if pdf_text.strip(): st.write("### Extracted Text") st.text_area("Extracted Text", pdf_text, height=200) # Generate discussion points with st.spinner("Generating discussion points..."): try: discussion_points = generate_discussion_points(pdf_text, user_prompt) st.write("### Discussion Points") st.text_area("Discussion Points", discussion_points, height=150) except Exception as e: st.error(f"Error during summarization: {e}") # Convert discussion points to audio with st.spinner("Converting discussion points to audio..."): try: audio_file = text_to_speech(discussion_points) st.success("Audio file generated successfully!") except Exception as e: st.error(f"Error during text-to-speech conversion: {e}") audio_file = None if audio_file: # Audio playback st.write("### Listen to the Discussion Points") audio_bytes = open(audio_file, "rb").read() st.audio(audio_bytes, format="audio/mp3") # Option to download the audio file st.download_button( label="Download Audio", data=audio_bytes, file_name="discussion_points.mp3", mime="audio/mp3" ) else: st.error("No text was extracted from the uploaded PDF. Please try with another file.") else: st.info("Upload a PDF file to start.")