Spaces:
Runtime error
Runtime error
| from PyPDF2 import PdfReader | |
| import re | |
| import streamlit as st | |
| import fitz | |
| from transformers import pipeline | |
| import os | |
| import requests | |
| import io | |
| from PIL import Image | |
| from pptx import Presentation | |
| from pptx.util import Inches | |
| import tempfile | |
| API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5" | |
| headers = {"Authorization": "Bearer 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"} | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| def query(payload): | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| return response.content | |
| def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10): | |
| paragraphs = [] | |
| try: | |
| pdf_stream = io.BytesIO(pdf_data) | |
| pdf_document = fitz.open(stream=pdf_stream, filetype="pdf") | |
| for page_number in range(pdf_document.page_count): | |
| page = pdf_document.load_page(page_number) | |
| blocks = page.get_text("blocks") | |
| current_paragraph = "" | |
| previous_bottom = None | |
| for block in blocks: | |
| x0, y0, x1, y1 = block[:4] # Coordonnées du bloc de texte | |
| text = block[4] # Texte du bloc | |
| # Mesurez l'espacement vertical entre les blocs de texte | |
| if previous_bottom is not None: | |
| vertical_spacing = y0 - previous_bottom | |
| else: | |
| vertical_spacing = 0 | |
| # Si l'espacement vertical dépasse le seuil, considérez-le comme un nouveau paragraphe | |
| if vertical_spacing > spacing_threshold: | |
| if current_paragraph: | |
| paragraphs.append(current_paragraph.strip()) | |
| current_paragraph = text | |
| else: | |
| current_paragraph += " " + text # Ajoutez le texte au paragraphe actuel | |
| previous_bottom = y1 | |
| # Ajoutez le dernier paragraphe de la page | |
| if current_paragraph: | |
| paragraphs.append(current_paragraph.strip()) | |
| pdf_document.close() | |
| except Exception as e: | |
| print(f"Erreur lors de l'extraction du PDF : {str(e)}") | |
| return paragraphs | |
| st.title("PDF2SLIDE") | |
| uploaded_file = st.file_uploader("Select a PDF", type=["pdf"]) | |
| if uploaded_file is not None: | |
| pdf_data = uploaded_file.read() | |
| paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data) | |
| i = 1 | |
| # Create a PowerPoint presentation | |
| prs = Presentation() | |
| for paragraph in paragraphs: | |
| summary = summarizer(paragraph, max_length=(len(paragraph) / 2), min_length=10, do_sample=False) | |
| # Create a slide | |
| slide = prs.slides.add_slide(prs.slide_layouts[5]) | |
| # Add the paragraph to the slide | |
| left = top = Inches(1) | |
| width = height = Inches(5) | |
| txBox = slide.shapes.add_textbox(left, top, width, height) | |
| tf = txBox.text_frame | |
| p = tf.add_paragraph() | |
| p.text = f"Paragraph {i}:" | |
| p.space_after = Inches(0.1) | |
| p = tf.add_paragraph() | |
| p.text = summary[0]['summary_text'] | |
| # Generate and add the image to the slide | |
| image_bytes = query({ | |
| "inputs": 'A picture without text about: ' + summary[0]['summary_text'] | |
| }) | |
| image = Image.open(io.BytesIO(image_bytes)) | |
| # Define the desired image width and height | |
| image_width = 800 # Adjust as needed | |
| image_height = 600 # Adjust as needed | |
| # Resize the image to the desired dimensions | |
| image = image.resize((image_width, image_height), Image.ANTIALIAS) | |
| # Create a temporary file to save the resized image | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_img_file: | |
| temp_img_path = temp_img_file.name | |
| image.save(temp_img_path, format="PNG") | |
| # Add the image to the slide | |
| left = Inches(1) | |
| top = Inches(2) | |
| pic = slide.shapes.add_picture(temp_img_path, left, top, width, height) | |
| i += 1 | |
| # Save the PowerPoint presentation | |
| presentation_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name | |
| prs.save(presentation_path) | |
| # Display a download button for the PowerPoint file | |
| st.download_button( | |
| label="Download PowerPoint Presentation", | |
| data=open(presentation_path, "rb"), | |
| key="download_ppt", | |
| file_name="PDF2SLIDE_Presentation.pptx", | |
| ) | |