kxx-kkk commited on
Commit
020d5d0
·
verified ·
1 Parent(s): d3bbe1d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -18,19 +18,19 @@ def extract_text(file_path):
18
  for page_number in range(num_pages):
19
  # st.write(f"Page {page_number + 1}")
20
  page = pdf_reader.pages[page_number]
21
- text = page.extract_text()
22
 
23
  images = convert_from_path(file_path) # Convert PDF pages to images
24
  for i, image in enumerate(images):
25
  # st.write(f"Page {i + 1}")
26
- image_text = pytesseract.image_to_string(image)
27
 
28
  st.write("text")
29
  st.write(text)
30
 
31
  st.write("image_text")
32
  st.write(image_text)
33
-
34
  text = text + image_text
35
  st.write("plus")
36
  st.write(text) # Display the extracted text from the image
 
18
  for page_number in range(num_pages):
19
  # st.write(f"Page {page_number + 1}")
20
  page = pdf_reader.pages[page_number]
21
+ text += page.extract_text()
22
 
23
  images = convert_from_path(file_path) # Convert PDF pages to images
24
  for i, image in enumerate(images):
25
  # st.write(f"Page {i + 1}")
26
+ image_text += pytesseract.image_to_string(image)
27
 
28
  st.write("text")
29
  st.write(text)
30
 
31
  st.write("image_text")
32
  st.write(image_text)
33
+
34
  text = text + image_text
35
  st.write("plus")
36
  st.write(text) # Display the extracted text from the image