Spaces:
Runtime error
Runtime error
| from PyPDF2 import PdfReader | |
| import re | |
| import streamlit as st | |
| import fitz | |
| from transformers import pipeline | |
| import os | |
| import requests | |
| import io | |
| from PIL import Image | |
| from pptx import Presentation | |
| from pptx.util import Inches, Pt | |
| import tempfile | |
| API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5" | |
| headers = {"Authorization": "Bearer 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"} | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| def query(payload): | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| return response.content | |
| def add_line_breaks_to_summary(summary_text, line_length): | |
| # Split the summary text into lines without breaking words | |
| lines = [] | |
| words = summary_text.split() | |
| current_line = "" | |
| for word in words: | |
| if len(current_line) + len(word) + 1 <= line_length: # Include space between words | |
| if current_line: | |
| current_line += " " | |
| current_line += word | |
| else: | |
| lines.append(current_line) | |
| current_line = word | |
| if current_line: | |
| lines.append(current_line) | |
| return "\n".join(lines) | |
| def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10): | |
| paragraphs = [] | |
| try: | |
| pdf_stream = io.BytesIO(pdf_data) | |
| pdf_document = fitz.open(stream=pdf_stream, filetype="pdf") | |
| for page_number in range(pdf_document.page_count): | |
| page = pdf_document.load_page(page_number) | |
| blocks = page.get_text("blocks") | |
| current_paragraph = "" | |
| previous_bottom = None | |
| for block in blocks: | |
| x0, y0, x1, y1 = block[:4] | |
| text = block[4] | |
| if previous_bottom is not None: | |
| vertical_spacing = y0 - previous_bottom | |
| else: | |
| vertical_spacing = 0 | |
| if vertical_spacing > spacing_threshold: | |
| if current_paragraph: | |
| paragraphs.append(current_paragraph.strip()) | |
| current_paragraph = text | |
| else: | |
| current_paragraph += " " + text | |
| previous_bottom = y1 | |
| if current_paragraph: | |
| paragraphs.append(current_paragraph.strip()) | |
| pdf_document.close() | |
| except Exception as e: | |
| print(f"Erreur lors de l'extraction du PDF : {str(e)}") | |
| return paragraphs | |
| st.title("PDF2SLIDE") | |
| uploaded_file = st.file_uploader("Select a PDF", type=["pdf"]) | |
| if uploaded_file is not None: | |
| pdf_data = uploaded_file.read() | |
| paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data) | |
| i = 1 | |
| # Create a PowerPoint presentation | |
| prs = Presentation() | |
| for paragraph in paragraphs: | |
| summary = summarizer(paragraph, max_length=(len(paragraph) / 2), min_length=10, do_sample=False) | |
| summary_text = add_line_breaks_to_summary(summary[0]['summary_text'], 80) | |
| # Generate and save the image to a temporary file | |
| image_bytes = query({ | |
| "inputs": 'A picture without text about: ' + summary[0]['summary_text'] | |
| }) | |
| temp_img_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name | |
| with open(temp_img_path, "wb") as img_file: | |
| img_file.write(image_bytes) | |
| # Create a slide | |
| slide = prs.slides.add_slide(prs.slide_layouts[5]) | |
| # Add the image to the slide at the bottom with a 0.5-inch space | |
| left = (prs.slide_width - Inches(3)) / 2 | |
| top = prs.slide_height - Inches(3) - Inches(0.5) # Adjusted for the 0.5-inch space | |
| pic = slide.shapes.add_picture(temp_img_path, left, top, Inches(3), Inches(3)) | |
| # Add the paragraph to the slide at the top | |
| left = Inches(1) | |
| top = Inches(1) | |
| width = Inches(8) # Adjust the width as needed | |
| height = Inches(2) # Adjust the height as needed | |
| txBox = slide.shapes.add_textbox(left, top, width, height) | |
| tf = txBox.text_frame | |
| p = tf.add_paragraph() | |
| p.text = summary_text | |
| p.space_after = Pt(0) # Adjust the spacing as needed | |
| # Save the PowerPoint presentation | |
| presentation_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name | |
| prs.save(presentation_path) | |
| # Display a download button for the PowerPoint file | |
| st.download_button( | |
| label="Download PowerPoint Presentation", | |
| data=open(presentation_path, "rb"), | |
| key="download_ppt", | |
| file_name="PDF2SLIDE_Presentation.pptx", | |
| ) | |