newoz commited on
Commit
67bf68b
·
1 Parent(s): 4166de3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -3
app.py CHANGED
@@ -1,11 +1,67 @@
 
 
1
  import streamlit as st
 
 
 
 
 
 
2
  from pptx import Presentation
3
  from pptx.util import Inches
4
  import tempfile
5
- from PIL import Image
6
- import io
7
 
8
- # ... (previous code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  st.title("PDF2SLIDE")
11
 
 
1
+ from PyPDF2 import PdfReader
2
+ import re
3
  import streamlit as st
4
+ import fitz
5
+ from transformers import pipeline
6
+ import os
7
+ import requests
8
+ import io
9
+ from PIL import Image
10
  from pptx import Presentation
11
  from pptx.util import Inches
12
  import tempfile
 
 
13
 
14
+ API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5"
15
+ headers = {"Authorization": "Bearer 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"}
16
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
17
+
18
+ def query(payload):
19
+ response = requests.post(API_URL, headers=headers, json=payload)
20
+ return response.content
21
+
22
+ def extract_paragraphs_by_vertical_spacing(pdf_data, spacing_threshold=10):
23
+ paragraphs = []
24
+
25
+ try:
26
+ pdf_stream = io.BytesIO(pdf_data)
27
+ pdf_document = fitz.open(stream=pdf_stream, filetype="pdf")
28
+
29
+ for page_number in range(pdf_document.page_count):
30
+ page = pdf_document.load_page(page_number)
31
+ blocks = page.get_text("blocks")
32
+
33
+ current_paragraph = ""
34
+ previous_bottom = None
35
+
36
+ for block in blocks:
37
+ x0, y0, x1, y1 = block[:4] # Coordonnées du bloc de texte
38
+ text = block[4] # Texte du bloc
39
+
40
+ # Mesurez l'espacement vertical entre les blocs de texte
41
+ if previous_bottom is not None:
42
+ vertical_spacing = y0 - previous_bottom
43
+ else:
44
+ vertical_spacing = 0
45
+
46
+ # Si l'espacement vertical dépasse le seuil, considérez-le comme un nouveau paragraphe
47
+ if vertical_spacing > spacing_threshold:
48
+ if current_paragraph:
49
+ paragraphs.append(current_paragraph.strip())
50
+ current_paragraph = text
51
+ else:
52
+ current_paragraph += " " + text # Ajoutez le texte au paragraphe actuel
53
+
54
+ previous_bottom = y1
55
+
56
+ # Ajoutez le dernier paragraphe de la page
57
+ if current_paragraph:
58
+ paragraphs.append(current_paragraph.strip())
59
+
60
+ pdf_document.close()
61
+ except Exception as e:
62
+ print(f"Erreur lors de l'extraction du PDF : {str(e)}")
63
+
64
+ return paragraphs
65
 
66
  st.title("PDF2SLIDE")
67