Flo161 commited on
Commit
01a759b
·
1 Parent(s): 1974873

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -97
app.py CHANGED
@@ -1,11 +1,5 @@
1
  from PyPDF2 import PdfReader
2
  import re
3
- import shutil
4
- import tempfile
5
- import base64
6
- from pptx import Presentation
7
- from pptx.util import Inches, Pt
8
- from pptx.enum.text import PP_ALIGN
9
  import streamlit as st
10
  import fitz
11
  from transformers import pipeline
@@ -13,6 +7,9 @@ import os
13
  import requests
14
  import io
15
  from PIL import Image
 
 
 
16
 
17
  API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
18
  headers = {"Authorization": "Bearer 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"}
@@ -22,6 +19,23 @@ def query(payload):
22
  response = requests.post(API_URL, headers=headers, json=payload)
23
  return response.content
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
26
  paragraphs = []
27
 
@@ -37,26 +51,23 @@ def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
37
  previous_bottom = None
38
 
39
  for block in blocks:
40
- x0, y0, x1, y1 = block[:4] # Coordonnées du bloc de texte
41
- text = block[4] # Texte du bloc
42
 
43
- # Mesurez l'espacement vertical entre les blocs de texte
44
  if previous_bottom is not None:
45
  vertical_spacing = y0 - previous_bottom
46
  else:
47
  vertical_spacing = 0
48
 
49
- # Si l'espacement vertical dépasse le seuil, considérez-le comme un nouveau paragraphe
50
  if vertical_spacing > spacing_threshold:
51
  if current_paragraph:
52
  paragraphs.append(current_paragraph.strip())
53
  current_paragraph = text
54
  else:
55
- current_paragraph += " " + text # Ajoutez le texte au paragraphe actuel
56
 
57
  previous_bottom = y1
58
 
59
- # Ajoutez le dernier paragraphe de la page
60
  if current_paragraph:
61
  paragraphs.append(current_paragraph.strip())
62
 
@@ -68,100 +79,56 @@ def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
68
 
69
  st.title("PDF2SLIDE")
70
 
71
- uploaded_file = st.file_uploader("Selectionnez un PDF", type=["pdf"])
72
 
73
  if uploaded_file is not None:
74
  pdf_data = uploaded_file.read()
75
 
76
  paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data)
77
- i = 1
78
- prs = Presentation()
79
-
80
- for paragraph in paragraphs:
81
-
82
- summary = summarizer(paragraph, max_length=(len(paragraph)/4), min_length=10, do_sample=False)
83
-
84
- slide_layout = prs.slide_layouts[5] # Utilisez le modèle de diapositive approprié (index 5 pour une diapositive de titre et de contenu)
85
- slide = prs.slides.add_slide(slide_layout)
86
 
87
- title = slide.shapes.title
88
- title.text = f"Paragraphe {i}"
89
- title.alignment = PP_ALIGN.CENTER
90
-
91
- slide_width = prs.slide_width
92
- slide_height = prs.slide_height
93
 
94
- image_width = slide_width * 0.7 # L'image occupe 70% de la largeur de la slide
95
- image_height = slide_height * 0.7 # L'image occupe 70% de la hauteur de la slide
96
- left_img = (slide_width - image_width) / 2 # Centrez horizontalement
97
- top_img = (slide_height - image_height) * 0.6 # Occupe 15% de la hauteur en haut de la slide
98
 
99
- #left = prs.slide_width * 0.1
100
- #top = prs.slide_height * 0.6
101
- #width = prs.slide_width * 0.8
102
- #height = prs.slide_height * 0.3
103
- #txBox = slide.shapes.add_textbox(left, top, width, height)
104
- #tf = txBox.text_frame
105
- #p = tf.add_paragraph()
106
- #p.text = summary[0]['summary_text']
107
-
108
- #st.text(f"Paragraphe {i}: {summary[0]['summary_text']}") # Affiche le résumé du paragraphe
109
-
110
  image_bytes = query({
111
- "inputs": 'A picture about :' + summary[0]['summary_text'] # Utilisez le texte du résumé
112
  })
113
- image = Image.open(io.BytesIO(image_bytes))
114
- #left = top = prs.slide_width * 0.1
115
- pic = slide.shapes.add_picture(io.BytesIO(image_bytes), left_img, top_img, image_width, image_height)
116
-
117
- title_width = slide_width * 0.7 # Le titre occupe 70% de la largeur de la slide
118
- title_height = slide_height * 0.15 # Le titre occupe 15% de la hauteur de la slide
119
- left_title = (slide_width - title_width) / 2 # Centrez horizontalement
120
- top_title = (slide_height - title_height) * 0.05 # Occupe 5% de la hauteur en haut de la slide
121
- title = slide.shapes.add_textbox(left_title, top_title, title_width, title_height)
122
- title_frame = title.text_frame
123
-
124
- #title_p = title_frame.add_paragraph()
125
- #title_p.text = "Paragraphe {i}: "
126
- #title_p.alignment = PP_ALIGN.CENTER # Centrez le texte horizontalement
127
-
128
- text_width = slide_width * 0.7 # Le texte occupe 70% de la largeur de la slide
129
- text_height = slide_height * 0.15 # Le texte occupe 15% de la hauteur de la slide
130
- left_text = (slide_width - text_width) / 2 # Centrez horizontalement
131
- top_text = slide_height * 0.85 # Occupe 85% de la hauteur en bas de la slide
132
- txBox = slide.shapes.add_textbox(left_text, top_text, text_width, text_height)
133
  tf = txBox.text_frame
134
-
135
  p = tf.add_paragraph()
136
- p.text = summary[0]['summary_text']
137
-
138
- # Ajustez la taille de police pour le texte afin qu'il rentre dans le cadre de texte
139
- while p.space_after > Pt(0):
140
- p.font.size -= Pt(1) # Réduisez la taille de police de 1 point
141
-
142
- p.space_before = Pt(12) # Espace avant le paragraphe (12 points)
143
- p.space_after = Pt(12) # Espace après le paragraphe (12 points)
144
- p.alignment = PP_ALIGN.CENTER # Centrez le texte horizontalement
145
-
146
- # Ajustez la largeur du cadre de texte du texte pour éviter le dépassement
147
- text_frame_width = Pt(text_width - 2) # Réduisez la largeur de 0.1 pouce de chaque côté
148
- #txBox.width = text_frame_width
149
- txBox.text_frame.width = text_frame_width
150
-
151
- # st.image(image)
152
-
153
- i = i + 1
154
-
155
- #tempfile_name = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name
156
- pptx_stream = io.BytesIO()
157
- prs.save(pptx_stream)
158
- pptx_stream.seek(0)
159
-
160
- st.markdown(
161
- f'<a href="data:application/vnd.openxmlformats-officedocument.presentationml.presentation;base64,{base64.b64encode(pptx_stream.read()).decode()}" download="presentation.pptx">Télécharger la présentation</a>',
162
- unsafe_allow_html=True,
163
- )
164
-
165
-
166
-
167
- #st.download_button("Télécharger la présentation", "output_path", key="download_pptx", mime="application/vnd.openxmlformats-officedocument.presentationml.presentation")
 
1
  from PyPDF2 import PdfReader
2
  import re
 
 
 
 
 
 
3
  import streamlit as st
4
  import fitz
5
  from transformers import pipeline
 
7
  import requests
8
  import io
9
  from PIL import Image
10
+ from pptx import Presentation
11
+ from pptx.util import Inches, Pt
12
+ import tempfile
13
 
14
  API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
15
  headers = {"Authorization": "Bearer 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"}
 
19
  response = requests.post(API_URL, headers=headers, json=payload)
20
  return response.content
21
 
22
+ def add_line_breaks_to_summary(summary_text, line_length):
23
+ # Split the summary text into lines without breaking words
24
+ lines = []
25
+ words = summary_text.split()
26
+ current_line = ""
27
+ for word in words:
28
+ if len(current_line) + len(word) + 1 <= line_length: # Include space between words
29
+ if current_line:
30
+ current_line += " "
31
+ current_line += word
32
+ else:
33
+ lines.append(current_line)
34
+ current_line = word
35
+ if current_line:
36
+ lines.append(current_line)
37
+ return "\n".join(lines)
38
+
39
  def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
40
  paragraphs = []
41
 
 
51
  previous_bottom = None
52
 
53
  for block in blocks:
54
+ x0, y0, x1, y1 = block[:4]
55
+ text = block[4]
56
 
 
57
  if previous_bottom is not None:
58
  vertical_spacing = y0 - previous_bottom
59
  else:
60
  vertical_spacing = 0
61
 
 
62
  if vertical_spacing > spacing_threshold:
63
  if current_paragraph:
64
  paragraphs.append(current_paragraph.strip())
65
  current_paragraph = text
66
  else:
67
+ current_paragraph += " " + text
68
 
69
  previous_bottom = y1
70
 
 
71
  if current_paragraph:
72
  paragraphs.append(current_paragraph.strip())
73
 
 
79
 
80
  st.title("PDF2SLIDE")
81
 
82
+ uploaded_file = st.file_uploader("Select a PDF", type=["pdf"])
83
 
84
  if uploaded_file is not None:
85
  pdf_data = uploaded_file.read()
86
 
87
  paragraphs = extract_paragraphs_by_vertical_spacing(pdf_data)
88
+ i = 1
 
 
 
 
 
 
 
 
89
 
90
+ # Create a PowerPoint presentation
91
+ prs = Presentation()
 
 
 
 
92
 
93
+ for paragraph in paragraphs:
94
+ summary = summarizer(paragraph, max_length=(len(paragraph) / 2), min_length=10, do_sample=False)
95
+ summary_text = add_line_breaks_to_summary(summary[0]['summary_text'], 80)
 
96
 
97
+ # Generate and save the image to a temporary file
 
 
 
 
 
 
 
 
 
 
98
  image_bytes = query({
99
+ "inputs": 'A picture without text about: ' + summary[0]['summary_text']
100
  })
101
+ temp_img_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
102
+ with open(temp_img_path, "wb") as img_file:
103
+ img_file.write(image_bytes)
104
+
105
+ # Create a slide
106
+ slide = prs.slides.add_slide(prs.slide_layouts[5])
107
+
108
+ # Add the image to the slide at the bottom with a 0.5-inch space
109
+ left = (prs.slide_width - Inches(3)) / 2
110
+ top = prs.slide_height - Inches(3) - Inches(0.5) # Adjusted for the 0.5-inch space
111
+ pic = slide.shapes.add_picture(temp_img_path, left, top, Inches(3), Inches(3))
112
+
113
+ # Add the paragraph to the slide at the top
114
+ left = Inches(1)
115
+ top = Inches(1)
116
+ width = Inches(8) # Adjust the width as needed
117
+ height = Inches(2) # Adjust the height as needed
118
+ txBox = slide.shapes.add_textbox(left, top, width, height)
 
 
119
  tf = txBox.text_frame
 
120
  p = tf.add_paragraph()
121
+ p.text = summary_text
122
+ p.space_after = Pt(0) # Adjust the spacing as needed
123
+
124
+ # Save the PowerPoint presentation
125
+ presentation_path = tempfile.NamedTemporaryFile(delete=False, suffix=".pptx").name
126
+ prs.save(presentation_path)
127
+
128
+ # Display a download button for the PowerPoint file
129
+ st.download_button(
130
+ label="Download PowerPoint Presentation",
131
+ data=open(presentation_path, "rb"),
132
+ key="download_ppt",
133
+ file_name="PDF2SLIDE_Presentation.pptx",
134
+ )