PDF2SLIDE2

Runtime error

File size: 4,696 Bytes

from PyPDF2 import PdfReader
import re
import streamlit as st
import fitz
from transformers import pipeline
import os
import requests
import io
from PIL import Image
from pptx import Presentation
from pptx.util import Inches, Pt
import tempfile

API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
headers = {"Authorization": "Bearer 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"}
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.content

def add_line_breaks_to_summary(summary_text, line_length):
    # Split the summary text into lines without breaking words
    lines = []
    words = summary_text.split()
    current_line = ""
    for word in words:
        if len(current_line) + len(word) + 1 <= line_length:  # Include space between words
            if current_line:
                current_line += " "
            current_line += word
        else:
            lines.append(current_line)
            current_line = word
    if current_line:
        lines.append(current_line)
    return "\n".join(lines)

def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
    paragraphs = []
    
    try:
        pdf_stream = io.BytesIO(pdf_data)
        pdf_document = fitz.open(stream=pdf_stream, filetype="pdf")
        
        for page_number in range(pdf_document.page_count):
            page = pdf_document.load_page(page_number)
            blocks = page.get_text("blocks")
            
            current_paragraph = ""
            previous_bottom = None
            
            for block in blocks:
                x0, y0, x1, y1 = block[:4]
                text = block[4]
                
                if previous_bottom is not None:
                    vertical_spacing = y0 - previous_bottom
                else:
                    vertical_spacing = 0
                
                if vertical_spacing > spacing_threshold:
                    if current_paragraph:
                        paragraphs.append(current_paragraph.strip())
                    current_paragraph = text
                else:
                    current_paragraph += " " + text
                
                previous_bottom = y1
            
            if current_paragraph:
                paragraphs.append(current_paragraph.strip())
        
        pdf_document.close()
    except Exception as e:
        print(f"Erreur lors de l'extraction du PDF : {str(e)}")
    
    return paragraphs

st.title("PDF2SLIDE")

uploaded_file = st.file_uploader("Select a PDF", type=["pdf"])

if uploaded_file is not None:
    pdf_data = uploaded_file.read()

    paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data)
    i = 1

    # Create a PowerPoint presentation
    prs = Presentation()

    for paragraph in paragraphs:
        summary = summarizer(paragraph, max_length=(len(paragraph) / 2), min_length=10, do_sample=False)
        summary_text = add_line_breaks_to_summary(summary[0]['summary_text'], 80)

        # Generate and save the image to a temporary file
        image_bytes = query({
            "inputs": 'A picture without text about: ' + summary[0]['summary_text']
        })
        temp_img_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
        with open(temp_img_path, "wb") as img_file:
            img_file.write(image_bytes)

        # Create a slide
        slide = prs.slides.add_slide(prs.slide_layouts[5])

        # Add the image to the slide at the bottom with a 0.5-inch space
        left = (prs.slide_width - Inches(3)) / 2
        top = prs.slide_height - Inches(3) - Inches(0.5)  # Adjusted for the 0.5-inch space
        pic = slide.shapes.add_picture(temp_img_path, left, top, Inches(3), Inches(3))

        # Add the paragraph to the slide at the top
        left = Inches(1)
        top = Inches(1)
        width = Inches(8)  # Adjust the width as needed
        height = Inches(2)  # Adjust the height as needed
        txBox = slide.shapes.add_textbox(left, top, width, height)
        tf = txBox.text_frame
        p = tf.add_paragraph()
        p.text = summary_text
        p.space_after = Pt(0)  # Adjust the spacing as needed

    # Save the PowerPoint presentation
    presentation_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name
    prs.save(presentation_path)

    # Display a download button for the PowerPoint file
    st.download_button(
        label="Download PowerPoint Presentation",
        data=open(presentation_path, "rb"),
        key="download_ppt",
        file_name="PDF2SLIDE_Presentation.pptx",
    )