xeroISB commited on
Commit
4babfd4
·
1 Parent(s): c5a2867

byte code

Browse files
Files changed (2) hide show
  1. app.py +21 -11
  2. requirements.txt +1 -1
app.py CHANGED
@@ -2,22 +2,32 @@ import gradio as gr
2
  from PyPDF2 import PdfReader
3
  import io
4
 
5
- # Function to convert PDF to text
6
- def pdf_to_text(pdf_file):
7
- # Read the uploaded file
8
- reader = PdfReader(pdf_file)
9
- text = ""
10
- for page in reader.pages:
11
- text += page.extract_text()
12
- return text
 
13
 
14
- # Gradio interface to upload PDF and show the converted text
 
 
 
 
 
 
 
 
 
15
  iface = gr.Interface(
16
  fn=pdf_to_text, # Function to call for text extraction
17
- inputs=gr.File(label="Upload PDF"), # PDF file input
18
  outputs="text", # Output the extracted text
19
  title="PDF to Text Conversion",
20
- description="Upload a PDF file and extract its text."
21
  )
22
 
23
  if __name__ == "__main__":
 
2
  from PyPDF2 import PdfReader
3
  import io
4
 
5
+ # Function to convert PDF to text (handles both byte data and file uploads)
6
+ def pdf_to_text(file_input):
7
+ # If the input is in byte format (i.e., it comes as raw bytes from a file or Base64 encoding)
8
+ if isinstance(file_input, bytes):
9
+ # Treat it as byte data and convert it to a file-like object
10
+ pdf_file = io.BytesIO(file_input)
11
+ else:
12
+ # If it's a regular PDF file (file upload), open it from the file input
13
+ pdf_file = file_input.name # This will get the file path if it's a regular file upload
14
 
15
+ try:
16
+ reader = PdfReader(pdf_file)
17
+ text = ""
18
+ for page in reader.pages:
19
+ text += page.extract_text()
20
+ return text
21
+ except Exception as e:
22
+ return f"Error while processing the PDF: {str(e)}"
23
+
24
+ # Gradio interface: allow both file uploads and byte data input
25
  iface = gr.Interface(
26
  fn=pdf_to_text, # Function to call for text extraction
27
+ inputs=gr.File(label="Upload PDF or send Byte data"), # File input
28
  outputs="text", # Output the extracted text
29
  title="PDF to Text Conversion",
30
+ description="Upload a PDF file or send byte data (Base64 encoded) to extract its text."
31
  )
32
 
33
  if __name__ == "__main__":
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
 
2
  PyPDF2
3
- python-multipart
 
1
 
2
  PyPDF2
3
+ python-multipart