Spaces:

pratikshahp
/

Invoice-Extractor

Sleeping

App Files Files Community

pratikshahp commited on Jun 11, 2024

Commit

ce975c6

verified ·

1 Parent(s): 253113c

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -58

app.py CHANGED Viewed

@@ -1,63 +1,44 @@
-#Invoice Extractor
-import os
-import streamlit as st
 from PIL import Image
-# Load model directly
-from transformers import AutoProcessor, AutoModelForSeq2SeqLM
-def get_model_response(input_text, image, prompt):
-    # Load the pre-trained model
-    processor = AutoProcessor.from_pretrained("google/pix2struct-base")
-    model = AutoModelForSeq2SeqLM.from_pretrained("google/pix2struct-base")
-    # Tokenize inputs
-    inputs = tokenizer(input_text, image, prompt, return_tensors="pt", max_length=512, truncation=True)
-    # Generate response from the model
-    outputs = model(**inputs)
-    response = tokenizer.decode(outputs.logits.argmax(dim=-1))
-    return response
-#function to provide image
-def input_image_setup(uploaded_file):
-    if uploaded_file is not None:
-        #read te file into byte
-        bytes_data = uploaded_file.getvalue()
-        image_parts=[
-            {
-                "mime_type": uploaded_file.type,
-                "data": bytes_data
-            }
-        ]
-        return image_parts
-    else:
-        raise FileNotFoundError("No file uploaded")
-#Streamlit App
-st.set_page_config(page_title="Invoice Extractor")
-st.header("Gemini Application")
-input=st.text_input("Input Prompt: ",key="input")
-uploaded_file = st.file_uploader("choose an image...", type=["jpg","jpeg","png"])
-image=""
 if uploaded_file is not None:
     image = Image.open(uploaded_file)
-    st.image(image, caption="Uploaded Image.", use_column_width=True)
-submit = st.button("Tell me about the invoice")
-input_prompt="""
-You are an expert in understanding invoices. you will receive input images as invoices and
-you will have to answer questions based on the input image.
-"""
-#if submit button is clicked,
-if submit:
-    image_data=input_image_setup(uploaded_file)
-    response=get_model_response(input_prompt,image_data,input)
-    st.subheader("The response is..")
-    st.write(response)

 from PIL import Image
+import streamlit as st
+from transformers import AutoModelForImageToText, AutoTokenizer, AutoModelForCausalLM, AutoTokenizer
+import torch
+# Load Hugging Face models
+image_to_text_model = AutoModelForImageToText.from_pretrained("microsoft/layoutlm-base-uncased")
+image_to_text_tokenizer = AutoTokenizer.from_pretrained("microsoft/layoutlm-base-uncased")
+text_generation_model = AutoModelForCausalLM.from_pretrained("gpt2")
+text_generation_tokenizer = AutoTokenizer.from_pretrained("gpt2")
+# Function to extract text from image
+def extract_text_from_image(image):
+    inputs = image_to_text_tokenizer(image, return_tensors="pt")
+    with torch.no_grad():
+        outputs = image_to_text_model(**inputs)
+    return image_to_text_tokenizer.batch_decode(outputs.logits, skip_special_tokens=True)[0]
+# Function to generate response
+def generate_response(prompt, text):
+    inputs = text_generation_tokenizer(prompt + text, return_tensors="pt")
+    with torch.no_grad():
+        outputs = text_generation_model.generate(**inputs)
+    return text_generation_tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
+# Streamlit App
+st.set_page_config(page_title="Invoice Extractor")
+st.header("Invoice Extractor")
+prompt = st.text_input("Prompt: ", "Please provide details about this invoice.")
+uploaded_file = st.file_uploader("Upload an invoice image...", type=["jpg", "jpeg", "png"])
 if uploaded_file is not None:
     image = Image.open(uploaded_file)
+    st.image(image, caption="Uploaded Image.", use_column_width=True)
+    if st.button("Extract and Generate"):
+        if image:
+            extracted_text = extract_text_from_image(image)
+            response = generate_response(prompt, extracted_text)
+            st.subheader("Generated Response:")
+            st.write(response)
+        else:
+            st.warning("Please upload an image.")