Spaces:

IAGENE
/

PDF2SLIDE

Runtime error

App Files Files Community

Flo161 commited on Sep 23, 2023

Commit

1974873

1 Parent(s): a97a6f2

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -26

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ import shutil
 import tempfile
 import base64
 from pptx import Presentation
-from pptx.util import Inches, Pt
 from pptx.enum.text import PP_ALIGN
 import streamlit as st
 import fitz
@@ -24,28 +24,28 @@ def query(payload):
 def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
     paragraphs = []
     try:
         pdf_stream = io.BytesIO(pdf_data)
         pdf_document = fitz.open(stream=pdf_stream, filetype="pdf")
         for page_number in range(pdf_document.page_count):
             page = pdf_document.load_page(page_number)
             blocks = page.get_text("blocks")
             current_paragraph = ""
             previous_bottom = None
             for block in blocks:
                 x0, y0, x1, y1 = block[:4]  # Coordonnées du bloc de texte
                 text = block[4]  # Texte du bloc
                 # Mesurez l'espacement vertical entre les blocs de texte
                 if previous_bottom is not None:
                     vertical_spacing = y0 - previous_bottom
                 else:
                     vertical_spacing = 0
                 # Si l'espacement vertical dépasse le seuil, considérez-le comme un nouveau paragraphe
                 if vertical_spacing > spacing_threshold:
                     if current_paragraph:
@@ -53,17 +53,17 @@ def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
                     current_paragraph = text
                 else:
                     current_paragraph += " " + text  # Ajoutez le texte au paragraphe actuel
                 previous_bottom = y1
             # Ajoutez le dernier paragraphe de la page
             if current_paragraph:
                 paragraphs.append(current_paragraph.strip())
         pdf_document.close()
     except Exception as e:
         print(f"Erreur lors de l'extraction du PDF : {str(e)}")
     return paragraphs
 st.title("PDF2SLIDE")
@@ -74,11 +74,12 @@ if uploaded_file is not None:
     pdf_data = uploaded_file.read()
     paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data)
-    i = 1
     prs = Presentation()
     for paragraph in paragraphs:
-        summary = summarizer(paragraph, max_length=(len(paragraph) // 4), min_length=10, do_sample=False)
         slide_layout = prs.slide_layouts[5]  # Utilisez le modèle de diapositive approprié (index 5 pour une diapositive de titre et de contenu)
         slide = prs.slides.add_slide(slide_layout)
@@ -95,10 +96,22 @@ if uploaded_file is not None:
         left_img = (slide_width - image_width) / 2  # Centrez horizontalement
         top_img = (slide_height - image_height) * 0.6  # Occupe 15% de la hauteur en haut de la slide
         image_bytes = query({
             "inputs": 'A picture about  :' + summary[0]['summary_text']  # Utilisez le texte du résumé
         })
         image = Image.open(io.BytesIO(image_bytes))
         pic = slide.shapes.add_picture(io.BytesIO(image_bytes), left_img, top_img, image_width, image_height)
         title_width = slide_width * 0.7  # Le titre occupe 70% de la largeur de la slide
@@ -108,39 +121,47 @@ if uploaded_file is not None:
         title = slide.shapes.add_textbox(left_title, top_title, title_width, title_height)
         title_frame = title.text_frame
         text_width = slide_width * 0.7  # Le texte occupe 70% de la largeur de la slide
         text_height = slide_height * 0.15  # Le texte occupe 15% de la hauteur de la slide
         left_text = (slide_width - text_width) / 2  # Centrez horizontalement
         top_text = slide_height * 0.85  # Occupe 85% de la hauteur en bas de la slide
         txBox = slide.shapes.add_textbox(left_text, top_text, text_width, text_height)
         tf = txBox.text_frame
         p = tf.add_paragraph()
         p.text = summary[0]['summary_text']
-        p.space_after = Pt(0)  # Initialize space_after to 0 points
-        while p.space_after != Pt(12):
-            p.space_after = Pt(12)  # Set space_after to 12 points again
-            p.space_after = Pt(0)  # Initialize space_after to 0 points
-            p.space_after = Pt(12)  # Set space_after to 12 points again
-            p.space_after = Pt(0)  # Initialize space_after to 0 points
-            p.font.size -= Pt(1)  # Reduce the font size by 1 point
         p.space_before = Pt(12)  # Espace avant le paragraphe (12 points)
         p.space_after = Pt(12)  # Espace après le paragraphe (12 points)
         p.alignment = PP_ALIGN.CENTER  # Centrez le texte horizontalement
         text_frame_width = Pt(text_width - 2)  # Réduisez la largeur de 0.1 pouce de chaque côté
         txBox.text_frame.width = text_frame_width
         i = i + 1
     pptx_stream = io.BytesIO()
     prs.save(pptx_stream)
     pptx_stream.seek(0)
     st.markdown(
-        f'<a href="data:application/vnd.openxmlformats-officedocument.presentationml.presentation;base64,{base64.b64encode(pptx_stream.read()).decode()}" download="presentation.pptx">Télécharger la présentation</a>',
-        unsafe_allow_html=True,
-    )

 import tempfile
 import base64
 from pptx import Presentation
+from pptx.util import Inches, Pt
 from pptx.enum.text import PP_ALIGN
 import streamlit as st
 import fitz
 def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
     paragraphs = []
     try:
         pdf_stream = io.BytesIO(pdf_data)
         pdf_document = fitz.open(stream=pdf_stream, filetype="pdf")
         for page_number in range(pdf_document.page_count):
             page = pdf_document.load_page(page_number)
             blocks = page.get_text("blocks")
             current_paragraph = ""
             previous_bottom = None
             for block in blocks:
                 x0, y0, x1, y1 = block[:4]  # Coordonnées du bloc de texte
                 text = block[4]  # Texte du bloc
                 # Mesurez l'espacement vertical entre les blocs de texte
                 if previous_bottom is not None:
                     vertical_spacing = y0 - previous_bottom
                 else:
                     vertical_spacing = 0
                 # Si l'espacement vertical dépasse le seuil, considérez-le comme un nouveau paragraphe
                 if vertical_spacing > spacing_threshold:
                     if current_paragraph:
                     current_paragraph = text
                 else:
                     current_paragraph += " " + text  # Ajoutez le texte au paragraphe actuel
                 previous_bottom = y1
             # Ajoutez le dernier paragraphe de la page
             if current_paragraph:
                 paragraphs.append(current_paragraph.strip())
         pdf_document.close()
     except Exception as e:
         print(f"Erreur lors de l'extraction du PDF : {str(e)}")
     return paragraphs
 st.title("PDF2SLIDE")
     pdf_data = uploaded_file.read()
     paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data)
+    i = 1
     prs = Presentation()
     for paragraph in paragraphs:
+        summary = summarizer(paragraph, max_length=(len(paragraph)/4), min_length=10, do_sample=False)
         slide_layout = prs.slide_layouts[5]  # Utilisez le modèle de diapositive approprié (index 5 pour une diapositive de titre et de contenu)
         slide = prs.slides.add_slide(slide_layout)
         left_img = (slide_width - image_width) / 2  # Centrez horizontalement
         top_img = (slide_height - image_height) * 0.6  # Occupe 15% de la hauteur en haut de la slide
+        #left = prs.slide_width * 0.1
+        #top = prs.slide_height * 0.6
+        #width = prs.slide_width * 0.8
+        #height = prs.slide_height * 0.3
+        #txBox = slide.shapes.add_textbox(left, top, width, height)
+        #tf = txBox.text_frame
+        #p = tf.add_paragraph()
+        #p.text = summary[0]['summary_text']
+        #st.text(f"Paragraphe {i}: {summary[0]['summary_text']}")  # Affiche le résumé du paragraphe
         image_bytes = query({
             "inputs": 'A picture about  :' + summary[0]['summary_text']  # Utilisez le texte du résumé
         })
         image = Image.open(io.BytesIO(image_bytes))
+        #left = top = prs.slide_width * 0.1
         pic = slide.shapes.add_picture(io.BytesIO(image_bytes), left_img, top_img, image_width, image_height)
         title_width = slide_width * 0.7  # Le titre occupe 70% de la largeur de la slide
         title = slide.shapes.add_textbox(left_title, top_title, title_width, title_height)
         title_frame = title.text_frame
+        #title_p = title_frame.add_paragraph()
+        #title_p.text = "Paragraphe {i}: "
+        #title_p.alignment = PP_ALIGN.CENTER  # Centrez le texte horizontalement
         text_width = slide_width * 0.7  # Le texte occupe 70% de la largeur de la slide
         text_height = slide_height * 0.15  # Le texte occupe 15% de la hauteur de la slide
         left_text = (slide_width - text_width) / 2  # Centrez horizontalement
         top_text = slide_height * 0.85  # Occupe 85% de la hauteur en bas de la slide
         txBox = slide.shapes.add_textbox(left_text, top_text, text_width, text_height)
         tf = txBox.text_frame
         p = tf.add_paragraph()
         p.text = summary[0]['summary_text']
+        # Ajustez la taille de police pour le texte afin qu'il rentre dans le cadre de texte
+        while p.space_after > Pt(0):
+            p.font.size -= Pt(1)  # Réduisez la taille de police de 1 point
         p.space_before = Pt(12)  # Espace avant le paragraphe (12 points)
         p.space_after = Pt(12)  # Espace après le paragraphe (12 points)
         p.alignment = PP_ALIGN.CENTER  # Centrez le texte horizontalement
+        # Ajustez la largeur du cadre de texte du texte pour éviter le dépassement
         text_frame_width = Pt(text_width - 2)  # Réduisez la largeur de 0.1 pouce de chaque côté
+        #txBox.width = text_frame_width
         txBox.text_frame.width = text_frame_width
+       # st.image(image)
         i = i + 1
+    #tempfile_name = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name
     pptx_stream = io.BytesIO()
     prs.save(pptx_stream)
     pptx_stream.seek(0)
     st.markdown(
+    f'<a href="data:application/vnd.openxmlformats-officedocument.presentationml.presentation;base64,{base64.b64encode(pptx_stream.read()).decode()}" download="presentation.pptx">Télécharger la présentation</a>',
+    unsafe_allow_html=True,
+)
+    #st.download_button("Télécharger la présentation", "output_path", key="download_pptx", mime="application/vnd.openxmlformats-officedocument.presentationml.presentation")