Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
|
| 2 |
import re
|
| 3 |
import streamlit as st
|
| 4 |
#import fitz
|
|
@@ -21,16 +21,29 @@ st.title("PDF2SLIDE")
|
|
| 21 |
uploaded_file = st.file_uploader("Selectionnez un PDF", type=["pdf"])
|
| 22 |
|
| 23 |
if uploaded_file is not None:
|
| 24 |
-
pdf_reader =
|
| 25 |
|
| 26 |
-
|
| 27 |
|
| 28 |
for page_number in range(len(pdf_reader.pages)):
|
| 29 |
page = pdf_reader.pages[page_number]
|
| 30 |
page_text = page.extract_text()
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
|
|
|
| 1 |
+
from PyPDF2 import PdfReader
|
| 2 |
import re
|
| 3 |
import streamlit as st
|
| 4 |
#import fitz
|
|
|
|
| 21 |
uploaded_file = st.file_uploader("Selectionnez un PDF", type=["pdf"])
|
| 22 |
|
| 23 |
if uploaded_file is not None:
|
| 24 |
+
pdf_reader = PdfReader(uploaded_file)
|
| 25 |
|
| 26 |
+
unique_characters = set() # Utiliser un ensemble pour stocker les caractères uniques
|
| 27 |
|
| 28 |
for page_number in range(len(pdf_reader.pages)):
|
| 29 |
page = pdf_reader.pages[page_number]
|
| 30 |
page_text = page.extract_text()
|
| 31 |
+
unique_characters.update(set(page_text)) # Ajouter les caractères de cette page à l'ensemble
|
| 32 |
+
|
| 33 |
+
# Afficher les caractères uniques
|
| 34 |
+
st.write("Caractères uniques présents dans le PDF :")
|
| 35 |
+
st.write(''.join(unique_characters))
|
| 36 |
+
|
| 37 |
+
# pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
| 38 |
+
|
| 39 |
+
# paragraphs = []
|
| 40 |
+
|
| 41 |
+
# for page_number in range(len(pdf_reader.pages)):
|
| 42 |
+
# page = pdf_reader.pages[page_number]
|
| 43 |
+
# page_text = page.extract_text()
|
| 44 |
+
# paragraph_list = re.split(r'\n{2,}', page_text)
|
| 45 |
+
# paragraphs.extend(paragraph_list)
|
| 46 |
+
# for paragraph in paragraphs:
|
| 47 |
+
# print(paragraph)
|
| 48 |
+
# print(424242)
|
| 49 |
|