Spaces:
Runtime error
Runtime error
| from PyPDF2 import PdfReader | |
| import re | |
| import shutil | |
| import tempfile | |
| import base64 | |
| from pptx import Presentation | |
| from pptx.util import Inches, Pt | |
| from pptx.enum.text import PP_ALIGN | |
| import streamlit as st | |
| import fitz | |
| from transformers import pipeline | |
| import os | |
| import requests | |
| import io | |
| from PIL import Image | |
| API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5" | |
| headers = {"Authorization": "Bearer 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"} | |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
| def query(payload): | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| return response.content | |
| def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10): | |
| paragraphs = [] | |
| try: | |
| pdf_stream = io.BytesIO(pdf_data) | |
| pdf_document = fitz.open(stream=pdf_stream, filetype="pdf") | |
| for page_number in range(pdf_document.page_count): | |
| page = pdf_document.load_page(page_number) | |
| blocks = page.get_text("blocks") | |
| current_paragraph = "" | |
| previous_bottom = None | |
| for block in blocks: | |
| x0, y0, x1, y1 = block[:4] # Coordonnées du bloc de texte | |
| text = block[4] # Texte du bloc | |
| # Mesurez l'espacement vertical entre les blocs de texte | |
| if previous_bottom is not None: | |
| vertical_spacing = y0 - previous_bottom | |
| else: | |
| vertical_spacing = 0 | |
| # Si l'espacement vertical dépasse le seuil, considérez-le comme un nouveau paragraphe | |
| if vertical_spacing > spacing_threshold: | |
| if current_paragraph: | |
| paragraphs.append(current_paragraph.strip()) | |
| current_paragraph = text | |
| else: | |
| current_paragraph += " " + text # Ajoutez le texte au paragraphe actuel | |
| previous_bottom = y1 | |
| # Ajoutez le dernier paragraphe de la page | |
| if current_paragraph: | |
| paragraphs.append(current_paragraph.strip()) | |
| pdf_document.close() | |
| except Exception as e: | |
| print(f"Erreur lors de l'extraction du PDF : {str(e)}") | |
| return paragraphs | |
| st.title("PDF2SLIDE") | |
| uploaded_file = st.file_uploader("Selectionnez un PDF", type=["pdf"]) | |
| if uploaded_file is not None: | |
| pdf_data = uploaded_file.read() | |
| paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data) | |
| i = 1 | |
| prs = Presentation() | |
| for paragraph in paragraphs: | |
| summary = summarizer(paragraph, max_length=(len(paragraph)/4), min_length=10, do_sample=False) | |
| slide_layout = prs.slide_layouts[5] # Utilisez le modèle de diapositive approprié (index 5 pour une diapositive de titre et de contenu) | |
| slide = prs.slides.add_slide(slide_layout) | |
| title = slide.shapes.title | |
| title.text = f"Paragraphe {i}" | |
| title.alignment = PP_ALIGN.CENTER | |
| slide_width = prs.slide_width | |
| slide_height = prs.slide_height | |
| image_width = slide_width * 0.7 # L'image occupe 70% de la largeur de la slide | |
| image_height = slide_height * 0.7 # L'image occupe 70% de la hauteur de la slide | |
| left_img = (slide_width - image_width) / 2 # Centrez horizontalement | |
| top_img = (slide_height - image_height) * 0.6 # Occupe 15% de la hauteur en haut de la slide | |
| #left = prs.slide_width * 0.1 | |
| #top = prs.slide_height * 0.6 | |
| #width = prs.slide_width * 0.8 | |
| #height = prs.slide_height * 0.3 | |
| #txBox = slide.shapes.add_textbox(left, top, width, height) | |
| #tf = txBox.text_frame | |
| #p = tf.add_paragraph() | |
| #p.text = summary[0]['summary_text'] | |
| #st.text(f"Paragraphe {i}: {summary[0]['summary_text']}") # Affiche le résumé du paragraphe | |
| image_bytes = query({ | |
| "inputs": 'A picture about :' + summary[0]['summary_text'] # Utilisez le texte du résumé | |
| }) | |
| image = Image.open(io.BytesIO(image_bytes)) | |
| #left = top = prs.slide_width * 0.1 | |
| pic = slide.shapes.add_picture(io.BytesIO(image_bytes), left_img, top_img, image_width, image_height) | |
| title_width = slide_width * 0.7 # Le titre occupe 70% de la largeur de la slide | |
| title_height = slide_height * 0.15 # Le titre occupe 15% de la hauteur de la slide | |
| left_title = (slide_width - title_width) / 2 # Centrez horizontalement | |
| top_title = (slide_height - title_height) * 0.05 # Occupe 5% de la hauteur en haut de la slide | |
| title = slide.shapes.add_textbox(left_title, top_title, title_width, title_height) | |
| title_frame = title.text_frame | |
| #title_p = title_frame.add_paragraph() | |
| #title_p.text = "Paragraphe {i}: " | |
| #title_p.alignment = PP_ALIGN.CENTER # Centrez le texte horizontalement | |
| text_width = slide_width * 0.7 # Le texte occupe 70% de la largeur de la slide | |
| text_height = slide_height * 0.15 # Le texte occupe 15% de la hauteur de la slide | |
| left_text = (slide_width - text_width) / 2 # Centrez horizontalement | |
| top_text = slide_height * 0.85 # Occupe 85% de la hauteur en bas de la slide | |
| txBox = slide.shapes.add_textbox(left_text, top_text, text_width, text_height) | |
| tf = txBox.text_frame | |
| p = tf.add_paragraph() | |
| p.text = summary[0]['summary_text'] | |
| # Ajustez la taille de police pour le texte afin qu'il rentre dans le cadre de texte | |
| while p.space_after > Pt(0): | |
| p.font.size -= Pt(1) # Réduisez la taille de police de 1 point | |
| p.space_before = Pt(12) # Espace avant le paragraphe (12 points) | |
| p.space_after = Pt(12) # Espace après le paragraphe (12 points) | |
| p.alignment = PP_ALIGN.CENTER # Centrez le texte horizontalement | |
| # Ajustez la largeur du cadre de texte du texte pour éviter le dépassement | |
| text_frame_width = Pt(text_width - 2) # Réduisez la largeur de 0.1 pouce de chaque côté | |
| #txBox.width = text_frame_width | |
| txBox.text_frame.width = text_frame_width | |
| # st.image(image) | |
| i = i + 1 | |
| #tempfile_name = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name | |
| pptx_stream = io.BytesIO() | |
| prs.save(pptx_stream) | |
| pptx_stream.seek(0) | |
| st.markdown( | |
| f'<a href="data:application/vnd.openxmlformats-officedocument.presentationml.presentation;base64,{base64.b64encode(pptx_stream.read()).decode()}" download="presentation.pptx">Télécharger la présentation</a>', | |
| unsafe_allow_html=True, | |
| ) | |
| #st.download_button("Télécharger la présentation", "output_path", key="download_pptx", mime="application/vnd.openxmlformats-officedocument.presentationml.presentation") | |