AzizWazir commited on
Commit
ef255ad
·
verified ·
1 Parent(s): 42d5a84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -3
app.py CHANGED
@@ -1,16 +1,21 @@
1
  import streamlit as st
2
- import fitz # PyMuPDF
3
  import pytesseract
4
  from pdf2image import convert_from_path
5
  from PIL import Image
6
  import pandas as pd
7
  from docx import Document
8
  import io
 
9
 
10
  # OCR function to convert image-based PDF to text
11
  def extract_text_from_image_pdf(uploaded_file):
12
- # Convert PDF to images
13
- images = convert_from_path(uploaded_file)
 
 
 
 
 
14
  extracted_text = []
15
 
16
  for image in images:
 
1
  import streamlit as st
 
2
  import pytesseract
3
  from pdf2image import convert_from_path
4
  from PIL import Image
5
  import pandas as pd
6
  from docx import Document
7
  import io
8
+ import tempfile
9
 
10
  # OCR function to convert image-based PDF to text
11
  def extract_text_from_image_pdf(uploaded_file):
12
+ # Save the uploaded file to a temporary file
13
+ with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
14
+ tmp_file.write(uploaded_file.read()) # Write the file content to the temporary file
15
+ tmp_file_path = tmp_file.name # Get the temporary file path
16
+
17
+ # Convert PDF to images using pdf2image
18
+ images = convert_from_path(tmp_file_path)
19
  extracted_text = []
20
 
21
  for image in images: