Spaces:

IAGENE
/

PDF2SLIDE

Runtime error

App Files Files Community

Flo161 commited on Sep 23, 2023

Commit

01a759b

1 Parent(s): 1974873

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -97

app.py CHANGED Viewed

@@ -1,11 +1,5 @@
 from PyPDF2 import PdfReader
 import re
-import shutil
-import tempfile
-import base64
-from pptx import Presentation
-from pptx.util import Inches, Pt
-from pptx.enum.text import PP_ALIGN
 import streamlit as st
 import fitz
 from transformers import pipeline
@@ -13,6 +7,9 @@ import os
 import requests
 import io
 from PIL import Image
 API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
 headers = {"Authorization": "Bearer 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"}
@@ -22,6 +19,23 @@ def query(payload):
     response = requests.post(API_URL, headers=headers, json=payload)
     return response.content
 def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
     paragraphs = []
@@ -37,26 +51,23 @@ def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
             previous_bottom = None
             for block in blocks:
-                x0, y0, x1, y1 = block[:4]  # Coordonnées du bloc de texte
-                text = block[4]  # Texte du bloc
-                # Mesurez l'espacement vertical entre les blocs de texte
                 if previous_bottom is not None:
                     vertical_spacing = y0 - previous_bottom
                 else:
                     vertical_spacing = 0
-                # Si l'espacement vertical dépasse le seuil, considérez-le comme un nouveau paragraphe
                 if vertical_spacing > spacing_threshold:
                     if current_paragraph:
                         paragraphs.append(current_paragraph.strip())
                     current_paragraph = text
                 else:
-                    current_paragraph += " " + text  # Ajoutez le texte au paragraphe actuel
                 previous_bottom = y1
-            # Ajoutez le dernier paragraphe de la page
             if current_paragraph:
                 paragraphs.append(current_paragraph.strip())
@@ -68,100 +79,56 @@ def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
 st.title("PDF2SLIDE")
-uploaded_file = st.file_uploader("Selectionnez un PDF", type=["pdf"])
 if uploaded_file is not None:
     pdf_data = uploaded_file.read()
     paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data)
-    i = 1
-    prs = Presentation()
-    for paragraph in paragraphs:
-        summary = summarizer(paragraph, max_length=(len(paragraph)/4), min_length=10, do_sample=False)
-        slide_layout = prs.slide_layouts[5]  # Utilisez le modèle de diapositive approprié (index 5 pour une diapositive de titre et de contenu)
-        slide = prs.slides.add_slide(slide_layout)
-        title = slide.shapes.title
-        title.text = f"Paragraphe {i}"
-        title.alignment = PP_ALIGN.CENTER
-        slide_width = prs.slide_width
-        slide_height = prs.slide_height
-        image_width = slide_width * 0.7  # L'image occupe 70% de la largeur de la slide
-        image_height = slide_height * 0.7  # L'image occupe 70% de la hauteur de la slide
-        left_img = (slide_width - image_width) / 2  # Centrez horizontalement
-        top_img = (slide_height - image_height) * 0.6  # Occupe 15% de la hauteur en haut de la slide
-        #left = prs.slide_width * 0.1
-        #top = prs.slide_height * 0.6
-        #width = prs.slide_width * 0.8
-        #height = prs.slide_height * 0.3
-        #txBox = slide.shapes.add_textbox(left, top, width, height)
-        #tf = txBox.text_frame
-        #p = tf.add_paragraph()
-        #p.text = summary[0]['summary_text']
-        #st.text(f"Paragraphe {i}: {summary[0]['summary_text']}")  # Affiche le résumé du paragraphe
         image_bytes = query({
-            "inputs": 'A picture about  :' + summary[0]['summary_text']  # Utilisez le texte du résumé
         })
-        image = Image.open(io.BytesIO(image_bytes))
-        #left = top = prs.slide_width * 0.1
-        pic = slide.shapes.add_picture(io.BytesIO(image_bytes), left_img, top_img, image_width, image_height)
-        title_width = slide_width * 0.7  # Le titre occupe 70% de la largeur de la slide
-        title_height = slide_height * 0.15  # Le titre occupe 15% de la hauteur de la slide
-        left_title = (slide_width - title_width) / 2  # Centrez horizontalement
-        top_title = (slide_height - title_height) * 0.05  # Occupe 5% de la hauteur en haut de la slide
-        title = slide.shapes.add_textbox(left_title, top_title, title_width, title_height)
-        title_frame = title.text_frame
-        #title_p = title_frame.add_paragraph()
-        #title_p.text = "Paragraphe {i}: "
-        #title_p.alignment = PP_ALIGN.CENTER  # Centrez le texte horizontalement
-        text_width = slide_width * 0.7  # Le texte occupe 70% de la largeur de la slide
-        text_height = slide_height * 0.15  # Le texte occupe 15% de la hauteur de la slide
-        left_text = (slide_width - text_width) / 2  # Centrez horizontalement
-        top_text = slide_height * 0.85  # Occupe 85% de la hauteur en bas de la slide
-        txBox = slide.shapes.add_textbox(left_text, top_text, text_width, text_height)
         tf = txBox.text_frame
         p = tf.add_paragraph()
-        p.text = summary[0]['summary_text']
-        # Ajustez la taille de police pour le texte afin qu'il rentre dans le cadre de texte
-        while p.space_after > Pt(0):
-            p.font.size -= Pt(1)  # Réduisez la taille de police de 1 point
-        p.space_before = Pt(12)  # Espace avant le paragraphe (12 points)
-        p.space_after = Pt(12)  # Espace après le paragraphe (12 points)
-        p.alignment = PP_ALIGN.CENTER  # Centrez le texte horizontalement
-        # Ajustez la largeur du cadre de texte du texte pour éviter le dépassement
-        text_frame_width = Pt(text_width - 2)  # Réduisez la largeur de 0.1 pouce de chaque côté
-        #txBox.width = text_frame_width
-        txBox.text_frame.width = text_frame_width
-       # st.image(image)
-        i = i + 1
-    #tempfile_name = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name
-    pptx_stream = io.BytesIO()
-    prs.save(pptx_stream)
-    pptx_stream.seek(0)
-    st.markdown(
-    f'<a href="data:application/vnd.openxmlformats-officedocument.presentationml.presentation;base64,{base64.b64encode(pptx_stream.read()).decode()}" download="presentation.pptx">Télécharger la présentation</a>',
-    unsafe_allow_html=True,
-)
-    #st.download_button("Télécharger la présentation", "output_path", key="download_pptx", mime="application/vnd.openxmlformats-officedocument.presentationml.presentation")

 from PyPDF2 import PdfReader
 import re
 import streamlit as st
 import fitz
 from transformers import pipeline
 import requests
 import io
 from PIL import Image
+from pptx import Presentation
+from pptx.util import Inches, Pt
+import tempfile
 API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
 headers = {"Authorization": "Bearer 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"}
     response = requests.post(API_URL, headers=headers, json=payload)
     return response.content
+def add_line_breaks_to_summary(summary_text, line_length):
+    # Split the summary text into lines without breaking words
+    lines = []
+    words = summary_text.split()
+    current_line = ""
+    for word in words:
+        if len(current_line) + len(word) + 1 <= line_length:  # Include space between words
+            if current_line:
+                current_line += " "
+            current_line += word
+        else:
+            lines.append(current_line)
+            current_line = word
+    if current_line:
+        lines.append(current_line)
+    return "\n".join(lines)
 def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
     paragraphs = []
             previous_bottom = None
             for block in blocks:
+                x0, y0, x1, y1 = block[:4]
+                text = block[4]
                 if previous_bottom is not None:
                     vertical_spacing = y0 - previous_bottom
                 else:
                     vertical_spacing = 0
                 if vertical_spacing > spacing_threshold:
                     if current_paragraph:
                         paragraphs.append(current_paragraph.strip())
                     current_paragraph = text
                 else:
+                    current_paragraph += " " + text
                 previous_bottom = y1
             if current_paragraph:
                 paragraphs.append(current_paragraph.strip())
 st.title("PDF2SLIDE")
+uploaded_file = st.file_uploader("Select a PDF", type=["pdf"])
 if uploaded_file is not None:
     pdf_data = uploaded_file.read()
     paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data)
+    i = 1
+    # Create a PowerPoint presentation
+    prs = Presentation()
+    for paragraph in paragraphs:
+        summary = summarizer(paragraph, max_length=(len(paragraph) / 2), min_length=10, do_sample=False)
+        summary_text = add_line_breaks_to_summary(summary[0]['summary_text'], 80)
+        # Generate and save the image to a temporary file
         image_bytes = query({
+            "inputs": 'A picture without text about: ' + summary[0]['summary_text']
         })
+        temp_img_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
+        with open(temp_img_path, "wb") as img_file:
+            img_file.write(image_bytes)
+        # Create a slide
+        slide = prs.slides.add_slide(prs.slide_layouts[5])
+        # Add the image to the slide at the bottom with a 0.5-inch space
+        left = (prs.slide_width - Inches(3)) / 2
+        top = prs.slide_height - Inches(3) - Inches(0.5)  # Adjusted for the 0.5-inch space
+        pic = slide.shapes.add_picture(temp_img_path, left, top, Inches(3), Inches(3))
+        # Add the paragraph to the slide at the top
+        left = Inches(1)
+        top = Inches(1)
+        width = Inches(8)  # Adjust the width as needed
+        height = Inches(2)  # Adjust the height as needed
+        txBox = slide.shapes.add_textbox(left, top, width, height)
         tf = txBox.text_frame
         p = tf.add_paragraph()
+        p.text = summary_text
+        p.space_after = Pt(0)  # Adjust the spacing as needed
+    # Save the PowerPoint presentation
+    presentation_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name
+    prs.save(presentation_path)
+    # Display a download button for the PowerPoint file
+    st.download_button(
+        label="Download PowerPoint Presentation",
+        data=open(presentation_path, "rb"),
+        key="download_ppt",
+        file_name="PDF2SLIDE_Presentation.pptx",
+    )