yannESGI commited on
Commit
56bfb5c
·
1 Parent(s): e311af8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -24
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import PyPDF2
2
- import subprocess
3
  import streamlit as st
4
  #import fitz
5
  from transformers import pipeline
@@ -21,29 +21,15 @@ st.title("PDF2SLIDE")
21
  uploaded_file = st.file_uploader("Selectionnez un PDF", type=["pdf"])
22
 
23
  if uploaded_file is not None:
24
- # pdf_reader = PyPDF2.PdfReader(uploaded_file)
25
-
26
- # paragraphs = []
27
-
28
- # for page_number in range(len(pdf_reader.pages)):
29
- # page = pdf_reader.pages[page_number]
30
- # page_text = page.extract_text()
31
- # #print(page_text)
32
- # paragraphs.extend(page_text.split('\n\n'))
33
- # for paragraph in paragraphs:
34
- # print(paragraph)
35
- # print(424242)
36
-
37
-
38
- pdf_text = None
39
- print(f"Chemin du fichier PDF : {uploaded_file.name}")
40
- try:
41
- pdf_text = subprocess.check_output(["pdftotext", uploaded_file.name, "-"]).decode("utf-8")
42
- except Exception as e:
43
- st.error(f"Erreur lors de la conversion du PDF en texte : {str(e)}")
44
-
45
- if pdf_text:
46
- paragraphs = pdf_text.split('\n\n')
47
  for paragraph in paragraphs:
48
  print(paragraph)
49
  print(424242)
 
1
  import PyPDF2
2
+ import re
3
  import streamlit as st
4
  #import fitz
5
  from transformers import pipeline
 
21
  uploaded_file = st.file_uploader("Selectionnez un PDF", type=["pdf"])
22
 
23
  if uploaded_file is not None:
24
+ pdf_reader = PyPDF2.PdfFileReader(uploaded_file)
25
+
26
+ paragraphs = []
27
+
28
+ for page_number in range(pdf_reader.numPages):
29
+ page = pdf_reader.getPage(page_number)
30
+ page_text = page.extractText()
31
+ paragraph_list = re.split(r'\n{2,}', page_text)
32
+ paragraphs.extend(paragraph_list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  for paragraph in paragraphs:
34
  print(paragraph)
35
  print(424242)