Spaces:

IAGENE
/

PDF2SLIDE

Runtime error

File size: 6,949 Bytes

7c4e057

from PyPDF2 import PdfReader
import re
import shutil
import tempfile
import base64
from pptx import Presentation
from pptx.util import Inches, Pt 
from pptx.enum.text import PP_ALIGN
import streamlit as st
import fitz
from transformers import pipeline
import os
import requests
import io
from PIL import Image

API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
headers = {"Authorization": "Bearer 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"}
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def query(payload):
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.content

def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
    paragraphs = []
    
    try:
        pdf_stream = io.BytesIO(pdf_data)
        pdf_document = fitz.open(stream=pdf_stream, filetype="pdf")
        
        for page_number in range(pdf_document.page_count):
            page = pdf_document.load_page(page_number)
            blocks = page.get_text("blocks")
            
            current_paragraph = ""
            previous_bottom = None
            
            for block in blocks:
                x0, y0, x1, y1 = block[:4]  # Coordonnées du bloc de texte
                text = block[4]  # Texte du bloc
                
                # Mesurez l'espacement vertical entre les blocs de texte
                if previous_bottom is not None:
                    vertical_spacing = y0 - previous_bottom
                else:
                    vertical_spacing = 0
                
                # Si l'espacement vertical dépasse le seuil, considérez-le comme un nouveau paragraphe
                if vertical_spacing > spacing_threshold:
                    if current_paragraph:
                        paragraphs.append(current_paragraph.strip())
                    current_paragraph = text
                else:
                    current_paragraph += " " + text  # Ajoutez le texte au paragraphe actuel
                
                previous_bottom = y1
            
            # Ajoutez le dernier paragraphe de la page
            if current_paragraph:
                paragraphs.append(current_paragraph.strip())
        
        pdf_document.close()
    except Exception as e:
        print(f"Erreur lors de l'extraction du PDF : {str(e)}")
    
    return paragraphs

st.title("PDF2SLIDE")

uploaded_file = st.file_uploader("Selectionnez un PDF", type=["pdf"])

if uploaded_file is not None:
    pdf_data = uploaded_file.read()

    paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data)
    i = 1   
    prs = Presentation()
    
    for paragraph in paragraphs:

        summary = summarizer(paragraph, max_length=(len(paragraph)/4), min_length=10, do_sample=False)

        slide_layout = prs.slide_layouts[5]  # Utilisez le modèle de diapositive approprié (index 5 pour une diapositive de titre et de contenu)
        slide = prs.slides.add_slide(slide_layout)

        title = slide.shapes.title
        title.text = f"Paragraphe {i}"
        title.alignment = PP_ALIGN.CENTER

        slide_width = prs.slide_width
        slide_height = prs.slide_height

        image_width = slide_width * 0.7  # L'image occupe 70% de la largeur de la slide
        image_height = slide_height * 0.7  # L'image occupe 70% de la hauteur de la slide
        left_img = (slide_width - image_width) / 2  # Centrez horizontalement
        top_img = (slide_height - image_height) * 0.6  # Occupe 15% de la hauteur en haut de la slide

        #left = prs.slide_width * 0.1
        #top = prs.slide_height * 0.6
        #width = prs.slide_width * 0.8
        #height = prs.slide_height * 0.3
        #txBox = slide.shapes.add_textbox(left, top, width, height)
        #tf = txBox.text_frame
        #p = tf.add_paragraph()
        #p.text = summary[0]['summary_text']
    
        #st.text(f"Paragraphe {i}: {summary[0]['summary_text']}")  # Affiche le résumé du paragraphe
        
        image_bytes = query({
            "inputs": 'A picture about  :' + summary[0]['summary_text']  # Utilisez le texte du résumé
        })
        image = Image.open(io.BytesIO(image_bytes))
        #left = top = prs.slide_width * 0.1
        pic = slide.shapes.add_picture(io.BytesIO(image_bytes), left_img, top_img, image_width, image_height)

        title_width = slide_width * 0.7  # Le titre occupe 70% de la largeur de la slide
        title_height = slide_height * 0.15  # Le titre occupe 15% de la hauteur de la slide
        left_title = (slide_width - title_width) / 2  # Centrez horizontalement
        top_title = (slide_height - title_height) * 0.05  # Occupe 5% de la hauteur en haut de la slide
        title = slide.shapes.add_textbox(left_title, top_title, title_width, title_height)
        title_frame = title.text_frame

        #title_p = title_frame.add_paragraph()
        #title_p.text = "Paragraphe {i}: "
        #title_p.alignment = PP_ALIGN.CENTER  # Centrez le texte horizontalement

        text_width = slide_width * 0.7  # Le texte occupe 70% de la largeur de la slide
        text_height = slide_height * 0.15  # Le texte occupe 15% de la hauteur de la slide
        left_text = (slide_width - text_width) / 2  # Centrez horizontalement
        top_text = slide_height * 0.85  # Occupe 85% de la hauteur en bas de la slide
        txBox = slide.shapes.add_textbox(left_text, top_text, text_width, text_height)
        tf = txBox.text_frame
        
        p = tf.add_paragraph()
        p.text = summary[0]['summary_text']

        # Ajustez la taille de police pour le texte afin qu'il rentre dans le cadre de texte
        while p.space_after > Pt(0):
            p.font.size -= Pt(1)  # Réduisez la taille de police de 1 point

        p.space_before = Pt(12)  # Espace avant le paragraphe (12 points)
        p.space_after = Pt(12)  # Espace après le paragraphe (12 points)
        p.alignment = PP_ALIGN.CENTER  # Centrez le texte horizontalement

        # Ajustez la largeur du cadre de texte du texte pour éviter le dépassement
        text_frame_width = Pt(text_width - 2)  # Réduisez la largeur de 0.1 pouce de chaque côté
        #txBox.width = text_frame_width
        txBox.text_frame.width = text_frame_width
        
       # st.image(image)

        i = i + 1

    #tempfile_name = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name
    pptx_stream = io.BytesIO()
    prs.save(pptx_stream)
    pptx_stream.seek(0)

    st.markdown(
    f'<a href="data:application/vnd.openxmlformats-officedocument.presentationml.presentation;base64,{base64.b64encode(pptx_stream.read()).decode()}" download="presentation.pptx">Télécharger la présentation</a>',
    unsafe_allow_html=True,
)

    
    
    #st.download_button("Télécharger la présentation", "output_path", key="download_pptx", mime="application/vnd.openxmlformats-officedocument.presentationml.presentation")