from PyPDF2 import PdfReader import re import streamlit as st import fitz from transformers import pipeline import os import requests import io from PIL import Image from pptx import Presentation from pptx.util import Inches import tempfile API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5" headers = {"Authorization": "Bearer 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"} summarizer = pipeline("summarization", model="facebook/bart-large-cnn") def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.content def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10): paragraphs = [] try: pdf_stream = io.BytesIO(pdf_data) pdf_document = fitz.open(stream=pdf_stream, filetype="pdf") for page_number in range(pdf_document.page_count): page = pdf_document.load_page(page_number) blocks = page.get_text("blocks") current_paragraph = "" previous_bottom = None for block in blocks: x0, y0, x1, y1 = block[:4] # Coordonnées du bloc de texte text = block[4] # Texte du bloc # Mesurez l'espacement vertical entre les blocs de texte if previous_bottom is not None: vertical_spacing = y0 - previous_bottom else: vertical_spacing = 0 # Si l'espacement vertical dépasse le seuil, considérez-le comme un nouveau paragraphe if vertical_spacing > spacing_threshold: if current_paragraph: paragraphs.append(current_paragraph.strip()) current_paragraph = text else: current_paragraph += " " + text # Ajoutez le texte au paragraphe actuel previous_bottom = y1 # Ajoutez le dernier paragraphe de la page if current_paragraph: paragraphs.append(current_paragraph.strip()) pdf_document.close() except Exception as e: print(f"Erreur lors de l'extraction du PDF : {str(e)}") return paragraphs st.title("PDF2SLIDE") uploaded_file = st.file_uploader("Select a PDF", type=["pdf"]) if uploaded_file is not None: pdf_data = uploaded_file.read() paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data) i = 1 # Create a PowerPoint presentation prs = Presentation() for paragraph in paragraphs: summary = summarizer(paragraph, max_length=(len(paragraph) / 2), min_length=10, do_sample=False) # Create a slide slide = prs.slides.add_slide(prs.slide_layouts[5]) # Add the paragraph to the slide left = top = Inches(1) width = height = Inches(5) txBox = slide.shapes.add_textbox(left, top, width, height) tf = txBox.text_frame p = tf.add_paragraph() p.text = f"Paragraph {i}:" p.space_after = Inches(0.1) p = tf.add_paragraph() p.text = summary[0]['summary_text'] # Generate and add the image to the slide image_bytes = query({ "inputs": 'A picture without text about: ' + summary[0]['summary_text'] }) image = Image.open(io.BytesIO(image_bytes)) # Define the desired image width and height image_width = 800 # Adjust as needed image_height = 600 # Adjust as needed # Resize the image to the desired dimensions image = image.resize((image_width, image_height), Image.ANTIALIAS) # Create a temporary file to save the resized image with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_img_file: temp_img_path = temp_img_file.name image.save(temp_img_path, format="PNG") # Add the image to the slide left = Inches(1) top = Inches(2) pic = slide.shapes.add_picture(temp_img_path, left, top, width, height) i += 1 # Save the PowerPoint presentation presentation_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name prs.save(presentation_path) # Display a download button for the PowerPoint file st.download_button( label="Download PowerPoint Presentation", data=open(presentation_path, "rb"), key="download_ppt", file_name="PDF2SLIDE_Presentation.pptx", )