muradkhan commited on
Commit
ac61621
·
verified ·
1 Parent(s): 45a0b43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -12
app.py CHANGED
@@ -1,23 +1,25 @@
1
- import PyPDF2
2
  from pprint import pprint
3
- from getpass import getpass
4
- from haystack import Pipeline
5
- from haystack.schema import Document
6
  from haystack.nodes import BM25Retriever
7
  from haystack.document_stores import InMemoryDocumentStore
8
  from haystack.nodes import PromptTemplate, PromptNode
 
9
  import gradio as gr
10
  import os
11
 
12
- HF_TOKEN = getpass("Enter Token")
13
- from huggingface_hub import notebook_login
14
- notebook_login()
 
 
 
 
15
 
16
  # Process and retrieve answers
17
  def process_invoice(file, hf_token, questions):
18
- # Read file content
19
- file_content = file.read()
20
- document = Document(content=file_content)
21
  docs = [document]
22
 
23
  document_store = InMemoryDocumentStore(use_bm25=True)
@@ -60,13 +62,13 @@ def gradio_interface(file, hf_token, questions):
60
  interface = gr.Interface(
61
  fn=gradio_interface,
62
  inputs=[
63
- gr.inputs.File(file_count="single", type="file", label="Upload Invoice (PDF or Image)"),
64
  gr.inputs.Textbox(type="password", label="Enter your Hugging Face Token"),
65
  gr.inputs.Textbox(lines=5, placeholder="Enter your questions separated by commas")
66
  ],
67
  outputs="json",
68
  title="Invoice Data Extraction",
69
- description="Upload an invoice PDF or image, provide your Hugging Face token, and get the extracted data based on your questions."
70
  )
71
 
72
  if __name__ == "__main__":
 
 
1
  from pprint import pprint
2
+ from haystack import Document, Pipeline
 
 
3
  from haystack.nodes import BM25Retriever
4
  from haystack.document_stores import InMemoryDocumentStore
5
  from haystack.nodes import PromptTemplate, PromptNode
6
+ from PyPDF2 import PdfReader
7
  import gradio as gr
8
  import os
9
 
10
+ # Function to read PDF file content directly
11
+ def read_pdf(pdf_path):
12
+ content = ""
13
+ reader = PdfReader(pdf_path)
14
+ for page in reader.pages:
15
+ content += page.extract_text()
16
+ return content
17
 
18
  # Process and retrieve answers
19
  def process_invoice(file, hf_token, questions):
20
+ # Read the PDF content directly
21
+ pdf_content = read_pdf(file.name)
22
+ document = Document(content=pdf_content)
23
  docs = [document]
24
 
25
  document_store = InMemoryDocumentStore(use_bm25=True)
 
62
  interface = gr.Interface(
63
  fn=gradio_interface,
64
  inputs=[
65
+ gr.inputs.File(file_count="single", type="file", label="Upload Invoice (PDF)"),
66
  gr.inputs.Textbox(type="password", label="Enter your Hugging Face Token"),
67
  gr.inputs.Textbox(lines=5, placeholder="Enter your questions separated by commas")
68
  ],
69
  outputs="json",
70
  title="Invoice Data Extraction",
71
+ description="Upload an invoice PDF, provide your Hugging Face token, and get the extracted data based on your questions."
72
  )
73
 
74
  if __name__ == "__main__":