Spaces:

IAGENE
/

PDF2SLIDE

Runtime error

App Files Files Community

PDF2SLIDE / app.py

yannESGI

Rename app2.py to app.py

70fb28d over 2 years ago

raw

history blame contribute delete

6.95 kB

	from PyPDF2 import PdfReader
	import re
	import shutil
	import tempfile
	import base64
	from pptx import Presentation
	from pptx.util import Inches, Pt
	from pptx.enum.text import PP_ALIGN
	import streamlit as st
	import fitz
	from transformers import pipeline
	import os
	import requests
	import io
	from PIL import Image

	API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
	headers = {"Authorization": "Bearer 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"}
	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

	def query(payload):
	response = requests.post(API_URL, headers=headers, json=payload)
	return response.content

	def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
	paragraphs = []

	try:
	pdf_stream = io.BytesIO(pdf_data)
	pdf_document = fitz.open(stream=pdf_stream, filetype="pdf")

	for page_number in range(pdf_document.page_count):
	page = pdf_document.load_page(page_number)
	blocks = page.get_text("blocks")

	current_paragraph = ""
	previous_bottom = None

	for block in blocks:
	x0, y0, x1, y1 = block[:4] # Coordonnées du bloc de texte
	text = block[4] # Texte du bloc

	# Mesurez l'espacement vertical entre les blocs de texte
	if previous_bottom is not None:
	vertical_spacing = y0 - previous_bottom
	else:
	vertical_spacing = 0

	# Si l'espacement vertical dépasse le seuil, considérez-le comme un nouveau paragraphe
	if vertical_spacing > spacing_threshold:
	if current_paragraph:
	paragraphs.append(current_paragraph.strip())
	current_paragraph = text
	else:
	current_paragraph += " " + text # Ajoutez le texte au paragraphe actuel

	previous_bottom = y1

	# Ajoutez le dernier paragraphe de la page
	if current_paragraph:
	paragraphs.append(current_paragraph.strip())

	pdf_document.close()
	except Exception as e:
	print(f"Erreur lors de l'extraction du PDF : {str(e)}")

	return paragraphs

	st.title("PDF2SLIDE")

	uploaded_file = st.file_uploader("Selectionnez un PDF", type=["pdf"])

	if uploaded_file is not None:
	pdf_data = uploaded_file.read()

	paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data)
	i = 1
	prs = Presentation()

	for paragraph in paragraphs:

	summary = summarizer(paragraph, max_length=(len(paragraph)/4), min_length=10, do_sample=False)

	slide_layout = prs.slide_layouts[5] # Utilisez le modèle de diapositive approprié (index 5 pour une diapositive de titre et de contenu)
	slide = prs.slides.add_slide(slide_layout)

	title = slide.shapes.title
	title.text = f"Paragraphe {i}"
	title.alignment = PP_ALIGN.CENTER

	slide_width = prs.slide_width
	slide_height = prs.slide_height

	image_width = slide_width * 0.7 # L'image occupe 70% de la largeur de la slide
	image_height = slide_height * 0.7 # L'image occupe 70% de la hauteur de la slide
	left_img = (slide_width - image_width) / 2 # Centrez horizontalement
	top_img = (slide_height - image_height) * 0.6 # Occupe 15% de la hauteur en haut de la slide

	#left = prs.slide_width * 0.1
	#top = prs.slide_height * 0.6
	#width = prs.slide_width * 0.8
	#height = prs.slide_height * 0.3
	#txBox = slide.shapes.add_textbox(left, top, width, height)
	#tf = txBox.text_frame
	#p = tf.add_paragraph()
	#p.text = summary[0]['summary_text']

	#st.text(f"Paragraphe {i}: {summary[0]['summary_text']}") # Affiche le résumé du paragraphe

	image_bytes = query({
	"inputs": 'A picture about :' + summary[0]['summary_text'] # Utilisez le texte du résumé
	})
	image = Image.open(io.BytesIO(image_bytes))
	#left = top = prs.slide_width * 0.1
	pic = slide.shapes.add_picture(io.BytesIO(image_bytes), left_img, top_img, image_width, image_height)

	title_width = slide_width * 0.7 # Le titre occupe 70% de la largeur de la slide
	title_height = slide_height * 0.15 # Le titre occupe 15% de la hauteur de la slide
	left_title = (slide_width - title_width) / 2 # Centrez horizontalement
	top_title = (slide_height - title_height) * 0.05 # Occupe 5% de la hauteur en haut de la slide
	title = slide.shapes.add_textbox(left_title, top_title, title_width, title_height)
	title_frame = title.text_frame

	#title_p = title_frame.add_paragraph()
	#title_p.text = "Paragraphe {i}: "
	#title_p.alignment = PP_ALIGN.CENTER # Centrez le texte horizontalement

	text_width = slide_width * 0.7 # Le texte occupe 70% de la largeur de la slide
	text_height = slide_height * 0.15 # Le texte occupe 15% de la hauteur de la slide
	left_text = (slide_width - text_width) / 2 # Centrez horizontalement
	top_text = slide_height * 0.85 # Occupe 85% de la hauteur en bas de la slide
	txBox = slide.shapes.add_textbox(left_text, top_text, text_width, text_height)
	tf = txBox.text_frame

	p = tf.add_paragraph()
	p.text = summary[0]['summary_text']

	# Ajustez la taille de police pour le texte afin qu'il rentre dans le cadre de texte
	while p.space_after > Pt(0):
	p.font.size -= Pt(1) # Réduisez la taille de police de 1 point

	p.space_before = Pt(12) # Espace avant le paragraphe (12 points)
	p.space_after = Pt(12) # Espace après le paragraphe (12 points)
	p.alignment = PP_ALIGN.CENTER # Centrez le texte horizontalement

	# Ajustez la largeur du cadre de texte du texte pour éviter le dépassement
	text_frame_width = Pt(text_width - 2) # Réduisez la largeur de 0.1 pouce de chaque côté
	#txBox.width = text_frame_width
	txBox.text_frame.width = text_frame_width

	# st.image(image)

	i = i + 1

	#tempfile_name = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name
	pptx_stream = io.BytesIO()
	prs.save(pptx_stream)
	pptx_stream.seek(0)

	st.markdown(
	f'<a href="data:application/vnd.openxmlformats-officedocument.presentationml.presentation;base64,{base64.b64encode(pptx_stream.read()).decode()}" download="presentation.pptx">Télécharger la présentation</a>',
	unsafe_allow_html=True,
	)



	#st.download_button("Télécharger la présentation", "output_path", key="download_pptx", mime="application/vnd.openxmlformats-officedocument.presentationml.presentation")