PDF2SLIDE2

Runtime error

App Files Files Community

PDF2SLIDE2 / app.py

Flo161

Update app.py

ec76e54 over 2 years ago

raw

history blame contribute delete

4.7 kB

	from PyPDF2 import PdfReader
	import re
	import streamlit as st
	import fitz
	from transformers import pipeline
	import os
	import requests
	import io
	from PIL import Image
	from pptx import Presentation
	from pptx.util import Inches, Pt
	import tempfile

	API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
	headers = {"Authorization": "Bearer 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"}
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

	def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.content

	def add_line_breaks_to_summary(summary_text, line_length):
	# Split the summary text into lines without breaking words
	lines = []
	words = summary_text.split()
	current_line = ""
	for word in words:
	if len(current_line) + len(word) + 1 <= line_length: # Include space between words
	if current_line:
	current_line += " "
	current_line += word
	else:
	lines.append(current_line)
	current_line = word
	if current_line:
	lines.append(current_line)
	return "\n".join(lines)

	def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
	paragraphs = []

	try:
	pdf_stream = io.BytesIO(pdf_data)
	pdf_document = fitz.open(stream=pdf_stream, filetype="pdf")

	for page_number in range(pdf_document.page_count):
	page = pdf_document.load_page(page_number)
	blocks = page.get_text("blocks")

	current_paragraph = ""
	previous_bottom = None

	for block in blocks:
	x0, y0, x1, y1 = block[:4]
	text = block[4]

	if previous_bottom is not None:
	vertical_spacing = y0 - previous_bottom
	else:
	vertical_spacing = 0

	if vertical_spacing > spacing_threshold:
	if current_paragraph:
	paragraphs.append(current_paragraph.strip())
	current_paragraph = text
	else:
	current_paragraph += " " + text

	previous_bottom = y1

	if current_paragraph:
	paragraphs.append(current_paragraph.strip())

	pdf_document.close()
	except Exception as e:
	print(f"Erreur lors de l'extraction du PDF : {str(e)}")

	return paragraphs

	st.title("PDF2SLIDE")

	uploaded_file = st.file_uploader("Select a PDF", type=["pdf"])

	if uploaded_file is not None:
	pdf_data = uploaded_file.read()

	paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data)
	i = 1

	# Create a PowerPoint presentation
	prs = Presentation()

	for paragraph in paragraphs:
	summary = summarizer(paragraph, max_length=(len(paragraph) / 2), min_length=10, do_sample=False)
	summary_text = add_line_breaks_to_summary(summary[0]['summary_text'], 80)

	# Generate and save the image to a temporary file
	image_bytes = query({
	"inputs": 'A picture without text about: ' + summary[0]['summary_text']
	})
	temp_img_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
	with open(temp_img_path, "wb") as img_file:
	img_file.write(image_bytes)

	# Create a slide
	slide = prs.slides.add_slide(prs.slide_layouts[5])

	# Add the image to the slide at the bottom with a 0.5-inch space
	left = (prs.slide_width - Inches(3)) / 2
	top = prs.slide_height - Inches(3) - Inches(0.5) # Adjusted for the 0.5-inch space
	pic = slide.shapes.add_picture(temp_img_path, left, top, Inches(3), Inches(3))

	# Add the paragraph to the slide at the top
	left = Inches(1)
	top = Inches(1)
	width = Inches(8) # Adjust the width as needed
	height = Inches(2) # Adjust the height as needed
	txBox = slide.shapes.add_textbox(left, top, width, height)
	tf = txBox.text_frame
	p = tf.add_paragraph()
	p.text = summary_text
	p.space_after = Pt(0) # Adjust the spacing as needed

	# Save the PowerPoint presentation
	presentation_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name
	prs.save(presentation_path)

	# Display a download button for the PowerPoint file
	st.download_button(
	label="Download PowerPoint Presentation",
	data=open(presentation_path, "rb"),
	key="download_ppt",
	file_name="PDF2SLIDE_Presentation.pptx",
	)