PDF2SLIDE

Runtime error

App Files Files Community

PDF2SLIDE / app.py

newoz

Update app.py

47c535b over 2 years ago

raw

history blame contribute delete

4.62 kB

	from PyPDF2 import PdfReader
	import re
	import streamlit as st
	import fitz
	from transformers import pipeline
	import os
	import requests
	import io
	from PIL import Image
	from pptx import Presentation
	from pptx.util import Inches
	import tempfile

	API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
	headers = {"Authorization": "Bearer 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"}
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

	def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.content

	def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
	paragraphs = []

	try:
	pdf_stream = io.BytesIO(pdf_data)
	pdf_document = fitz.open(stream=pdf_stream, filetype="pdf")

	for page_number in range(pdf_document.page_count):
	page = pdf_document.load_page(page_number)
	blocks = page.get_text("blocks")

	current_paragraph = ""
	previous_bottom = None

	for block in blocks:
	x0, y0, x1, y1 = block[:4] # Coordonnées du bloc de texte
	text = block[4] # Texte du bloc

	# Mesurez l'espacement vertical entre les blocs de texte
	if previous_bottom is not None:
	vertical_spacing = y0 - previous_bottom
	else:
	vertical_spacing = 0

	# Si l'espacement vertical dépasse le seuil, considérez-le comme un nouveau paragraphe
	if vertical_spacing > spacing_threshold:
	if current_paragraph:
	paragraphs.append(current_paragraph.strip())
	current_paragraph = text
	else:
	current_paragraph += " " + text # Ajoutez le texte au paragraphe actuel

	previous_bottom = y1

	# Ajoutez le dernier paragraphe de la page
	if current_paragraph:
	paragraphs.append(current_paragraph.strip())

	pdf_document.close()
	except Exception as e:
	print(f"Erreur lors de l'extraction du PDF : {str(e)}")

	return paragraphs

	st.title("PDF2SLIDE")

	uploaded_file = st.file_uploader("Select a PDF", type=["pdf"])

	if uploaded_file is not None:
	pdf_data = uploaded_file.read()

	paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data)
	i = 1

	# Create a PowerPoint presentation
	prs = Presentation()

	for paragraph in paragraphs:
	summary = summarizer(paragraph, max_length=(len(paragraph) / 2), min_length=10, do_sample=False)

	# Create a slide
	slide = prs.slides.add_slide(prs.slide_layouts[5])

	# Add the paragraph to the slide
	left = top = Inches(1)
	width = height = Inches(5)
	txBox = slide.shapes.add_textbox(left, top, width, height)
	tf = txBox.text_frame
	p = tf.add_paragraph()
	p.text = f"Paragraph {i}:"
	p.space_after = Inches(0.1)
	p = tf.add_paragraph()
	p.text = summary[0]['summary_text']

	# Generate and add the image to the slide
	image_bytes = query({
	"inputs": 'A picture without text about: ' + summary[0]['summary_text']
	})
	image = Image.open(io.BytesIO(image_bytes))

	# Define the desired image width and height
	image_width = 800 # Adjust as needed
	image_height = 600 # Adjust as needed

	# Resize the image to the desired dimensions
	image = image.resize((image_width, image_height), Image.ANTIALIAS)

	# Create a temporary file to save the resized image
	with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp_img_file:
	temp_img_path = temp_img_file.name
	image.save(temp_img_path, format="PNG")

	# Add the image to the slide
	left = Inches(1)
	top = Inches(2)
	pic = slide.shapes.add_picture(temp_img_path, left, top, width, height)


	i += 1

	# Save the PowerPoint presentation
	presentation_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name
	prs.save(presentation_path)

	# Display a download button for the PowerPoint file
	st.download_button(
	label="Download PowerPoint Presentation",
	data=open(presentation_path, "rb"),
	key="download_ppt",
	file_name="PDF2SLIDE_Presentation.pptx",
	)