newoz commited on
Commit
4166de3
·
1 Parent(s): c3a449c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -63
app.py CHANGED
@@ -1,71 +1,15 @@
1
- from PyPDF2 import PdfReader
2
- import re
3
  import streamlit as st
4
- import fitz
5
- from transformers import pipeline
6
- import os
7
- import requests
8
- import io
9
- from PIL import Image
10
  from pptx import Presentation
11
  from pptx.util import Inches
12
  import tempfile
 
 
13
 
14
- API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
15
- headers = {"Authorization": "Bearer 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"}
16
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
17
-
18
- def query(payload):
19
- response = requests.post(API_URL, headers=headers, json=payload)
20
- return response.content
21
-
22
- def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
23
- paragraphs = []
24
-
25
- try:
26
- pdf_stream = io.BytesIO(pdf_data)
27
- pdf_document = fitz.open(stream=pdf_stream, filetype="pdf")
28
-
29
- for page_number in range(pdf_document.page_count):
30
- page = pdf_document.load_page(page_number)
31
- blocks = page.get_text("blocks")
32
-
33
- current_paragraph = ""
34
- previous_bottom = None
35
-
36
- for block in blocks:
37
- x0, y0, x1, y1 = block[:4] # Coordonnées du bloc de texte
38
- text = block[4] # Texte du bloc
39
-
40
- # Mesurez l'espacement vertical entre les blocs de texte
41
- if previous_bottom is not None:
42
- vertical_spacing = y0 - previous_bottom
43
- else:
44
- vertical_spacing = 0
45
-
46
- # Si l'espacement vertical dépasse le seuil, considérez-le comme un nouveau paragraphe
47
- if vertical_spacing > spacing_threshold:
48
- if current_paragraph:
49
- paragraphs.append(current_paragraph.strip())
50
- current_paragraph = text
51
- else:
52
- current_paragraph += " " + text # Ajoutez le texte au paragraphe actuel
53
-
54
- previous_bottom = y1
55
-
56
- # Ajoutez le dernier paragraphe de la page
57
- if current_paragraph:
58
- paragraphs.append(current_paragraph.strip())
59
-
60
- pdf_document.close()
61
- except Exception as e:
62
- print(f"Erreur lors de l'extraction du PDF : {str(e)}")
63
-
64
- return paragraphs
65
 
66
  st.title("PDF2SLIDE")
67
 
68
- uploaded_file = st.file_uploader("Selectionnez un PDF", type=["pdf"])
69
 
70
  if uploaded_file is not None:
71
  pdf_data = uploaded_file.read()
@@ -100,7 +44,10 @@ if uploaded_file is not None:
100
  image = Image.open(io.BytesIO(image_bytes))
101
  temp_img_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
102
  image.save(temp_img_path)
103
- slide.shapes.add_picture(temp_img_path, Inches(1), Inches(1.5), Inches(5), Inches(3))
 
 
 
104
 
105
  i += 1
106
 
@@ -111,7 +58,7 @@ if uploaded_file is not None:
111
  # Display a download button for the PowerPoint file
112
  st.download_button(
113
  label="Download PowerPoint Presentation",
114
- data=presentation_path,
115
  key="download_ppt",
116
  file_name="PDF2SLIDE_Presentation.pptx",
117
- )
 
 
 
1
  import streamlit as st
 
 
 
 
 
 
2
  from pptx import Presentation
3
  from pptx.util import Inches
4
  import tempfile
5
+ from PIL import Image
6
+ import io
7
 
8
+ # ... (previous code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  st.title("PDF2SLIDE")
11
 
12
+ uploaded_file = st.file_uploader("Select a PDF", type=["pdf"])
13
 
14
  if uploaded_file is not None:
15
  pdf_data = uploaded_file.read()
 
44
  image = Image.open(io.BytesIO(image_bytes))
45
  temp_img_path = tempfile.NamedTemporaryFile(delete=False, suffix=".png").name
46
  image.save(temp_img_path)
47
+
48
+ left = Inches(1)
49
+ top = Inches(2)
50
+ pic = slide.shapes.add_picture(temp_img_path, left, top, width, height)
51
 
52
  i += 1
53
 
 
58
  # Display a download button for the PowerPoint file
59
  st.download_button(
60
  label="Download PowerPoint Presentation",
61
+ data=open(presentation_path, "rb"),
62
  key="download_ppt",
63
  file_name="PDF2SLIDE_Presentation.pptx",
64
+ )