Spaces:
Runtime error
Runtime error
File size: 4,696 Bytes
67bf68b 7877199 67bf68b 906d18c 6d9e930 2444fb8 4cd5238 67bf68b ca96738 6d9e930 ca96738 67bf68b e41304e 67bf68b e41304e 67bf68b 7877199 23807f5 7877199 4166de3 7877199 23807f5 b5abbc4 23807f5 906d18c 23807f5 906d18c 4afa594 906d18c f5cc285 906d18c ec76e54 59e3acd ec76e54 7df67c3 59e3acd e41304e 6d9e930 2444fb8 906d18c ca96738 6d9e930 f5cc285 906d18c 3b29aeb c3a449c 4166de3 c3a449c 4166de3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | from PyPDF2 import PdfReader
import re
import streamlit as st
import fitz
from transformers import pipeline
import os
import requests
import io
from PIL import Image
from pptx import Presentation
from pptx.util import Inches, Pt
import tempfile
API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
headers = {"Authorization": "Bearer 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"}
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
def query(payload):
response = requests.post(API_URL, headers=headers, json=payload)
return response.content
def add_line_breaks_to_summary(summary_text, line_length):
# Split the summary text into lines without breaking words
lines = []
words = summary_text.split()
current_line = ""
for word in words:
if len(current_line) + len(word) + 1 <= line_length: # Include space between words
if current_line:
current_line += " "
current_line += word
else:
lines.append(current_line)
current_line = word
if current_line:
lines.append(current_line)
return "\n".join(lines)
def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
paragraphs = []
try:
pdf_stream = io.BytesIO(pdf_data)
pdf_document = fitz.open(stream=pdf_stream, filetype="pdf")
for page_number in range(pdf_document.page_count):
page = pdf_document.load_page(page_number)
blocks = page.get_text("blocks")
current_paragraph = ""
previous_bottom = None
for block in blocks:
x0, y0, x1, y1 = block[:4]
text = block[4]
if previous_bottom is not None:
vertical_spacing = y0 - previous_bottom
else:
vertical_spacing = 0
if vertical_spacing > spacing_threshold:
if current_paragraph:
paragraphs.append(current_paragraph.strip())
current_paragraph = text
else:
current_paragraph += " " + text
previous_bottom = y1
if current_paragraph:
paragraphs.append(current_paragraph.strip())
pdf_document.close()
except Exception as e:
print(f"Erreur lors de l'extraction du PDF : {str(e)}")
return paragraphs
st.title("PDF2SLIDE")
uploaded_file = st.file_uploader("Select a PDF", type=["pdf"])
if uploaded_file is not None:
pdf_data = uploaded_file.read()
paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data)
i = 1
# Create a PowerPoint presentation
prs = Presentation()
for paragraph in paragraphs:
summary = summarizer(paragraph, max_length=(len(paragraph) / 2), min_length=10, do_sample=False)
summary_text = add_line_breaks_to_summary(summary[0]['summary_text'], 80)
# Generate and save the image to a temporary file
image_bytes = query({
"inputs": 'A picture without text about: ' + summary[0]['summary_text']
})
temp_img_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
with open(temp_img_path, "wb") as img_file:
img_file.write(image_bytes)
# Create a slide
slide = prs.slides.add_slide(prs.slide_layouts[5])
# Add the image to the slide at the bottom with a 0.5-inch space
left = (prs.slide_width - Inches(3)) / 2
top = prs.slide_height - Inches(3) - Inches(0.5) # Adjusted for the 0.5-inch space
pic = slide.shapes.add_picture(temp_img_path, left, top, Inches(3), Inches(3))
# Add the paragraph to the slide at the top
left = Inches(1)
top = Inches(1)
width = Inches(8) # Adjust the width as needed
height = Inches(2) # Adjust the height as needed
txBox = slide.shapes.add_textbox(left, top, width, height)
tf = txBox.text_frame
p = tf.add_paragraph()
p.text = summary_text
p.space_after = Pt(0) # Adjust the spacing as needed
# Save the PowerPoint presentation
presentation_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name
prs.save(presentation_path)
# Display a download button for the PowerPoint file
st.download_button(
label="Download PowerPoint Presentation",
data=open(presentation_path, "rb"),
key="download_ppt",
file_name="PDF2SLIDE_Presentation.pptx",
)
|