yannESGI commited on
Commit
df3a05c
·
1 Parent(s): b3633d6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -8
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import PyPDF2
2
  import re
3
  import streamlit as st
4
  #import fitz
@@ -21,16 +21,29 @@ st.title("PDF2SLIDE")
21
  uploaded_file = st.file_uploader("Selectionnez un PDF", type=["pdf"])
22
 
23
  if uploaded_file is not None:
24
- pdf_reader = PyPDF2.PdfReader(uploaded_file)
25
 
26
- paragraphs = []
27
 
28
  for page_number in range(len(pdf_reader.pages)):
29
  page = pdf_reader.pages[page_number]
30
  page_text = page.extract_text()
31
- paragraph_list = re.split(r'\n{2,}', page_text)
32
- paragraphs.extend(paragraph_list)
33
- for paragraph in paragraphs:
34
- print(paragraph)
35
- print(424242)
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
 
1
+ from PyPDF2 import PdfReader
2
  import re
3
  import streamlit as st
4
  #import fitz
 
21
  uploaded_file = st.file_uploader("Selectionnez un PDF", type=["pdf"])
22
 
23
  if uploaded_file is not None:
24
+ pdf_reader = PdfReader(uploaded_file)
25
 
26
+ unique_characters = set() # Utiliser un ensemble pour stocker les caractères uniques
27
 
28
  for page_number in range(len(pdf_reader.pages)):
29
  page = pdf_reader.pages[page_number]
30
  page_text = page.extract_text()
31
+ unique_characters.update(set(page_text)) # Ajouter les caractères de cette page à l'ensemble
32
+
33
+ # Afficher les caractères uniques
34
+ st.write("Caractères uniques présents dans le PDF :")
35
+ st.write(''.join(unique_characters))
36
+
37
+ # pdf_reader = PyPDF2.PdfReader(uploaded_file)
38
+
39
+ # paragraphs = []
40
+
41
+ # for page_number in range(len(pdf_reader.pages)):
42
+ # page = pdf_reader.pages[page_number]
43
+ # page_text = page.extract_text()
44
+ # paragraph_list = re.split(r'\n{2,}', page_text)
45
+ # paragraphs.extend(paragraph_list)
46
+ # for paragraph in paragraphs:
47
+ # print(paragraph)
48
+ # print(424242)
49