Spaces:

omdivyatej
/

general_invoice_parser

Runtime error

App Files Files Community

omdivyatej commited on Oct 3, 2023

Commit

c8ec340

1 Parent(s): 6277da1

update app.py

Browse files

Files changed (1) hide show

app.py +46 -19

app.py CHANGED Viewed

@@ -2,7 +2,10 @@
 import gradio as gr
 import pandas as pd  # Import pandas
 from ocr_request import ocr_request
-import io
 def process_file(files):
     response_arr = []
@@ -14,28 +17,52 @@ def process_file(files):
     print("Main file :", response_arr)
     #i= [[{'invoice_number': '349136', 'product_description': '1ST FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '3495565136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}],[{'invoice_number': '349136', 'product_description': ' FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}]]
-    flat_list = []
-    for item in response_arr:
-        invoice_number = item['invoice_number']
-        # Extracting product descriptions
-        products = item.get('predictions', []) or item.get('product_description', [])
-        for product in products:
-            # Rename 'description' key to 'product_description' for uniformity across all products
-            product_description = product.get('product_description', product.get('description'))
-            predicted_material = product['predicted_material']
-            confidence = product['confidence']
-            flat_list.append({
-                'invoice_number': invoice_number,
-                'product_description': product_description,
-                'predicted_material': predicted_material,
-                'confidence': confidence
-            })
-    df = pd.DataFrame(flat_list)
     print("Df final : ", df)
     # Save the dataframe to a CSV in-memory
@@ -54,6 +81,6 @@ interface = gr.Interface(fn=process_file,
                          inputs=gr.inputs.File(label="Upload a File", file_count='multiple'),
                          outputs=["dataframe",gr.outputs.File(label="Download CSV")])  # Specify "dataframe" as output type
-interface.launch()

 import gradio as gr
 import pandas as pd  # Import pandas
 from ocr_request import ocr_request
+import os
+from dotenv import load_dotenv
+import openai
+import json
 def process_file(files):
     response_arr = []
     print("Main file :", response_arr)
     #i= [[{'invoice_number': '349136', 'product_description': '1ST FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '3495565136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}],[{'invoice_number': '349136', 'product_description': ' FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}]]
+    # flat_list = []
+    # for item in response_arr:
+    #     invoice_number = item['invoice_number']
+    #     # Extracting product descriptions
+    #     products = item.get('predictions', []) or item.get('product_description', [])
+    #     for product in products:
+    #         # Rename 'description' key to 'product_description' for uniformity across all products
+    #         product_description = product.get('product_description', product.get('description'))
+    #         predicted_material = product['predicted_material']
+    #         confidence = product['confidence']
+    #         flat_list.append({
+    #             'invoice_number': invoice_number,
+    #             'product_description': product_description,
+    #             'predicted_material': predicted_material,
+    #             'confidence': confidence
+    #         })
+    load_dotenv()
+    # Initialize OpenAI with your API key
+    openai.api_key = os.getenv("OPENAI_API_KEY")
+    prompt =f"""
+    you are an excellent programmer and an anlyst. Given a json array or a json, you need to analyse it and convert into a json format which can be converted in dataframe of pandas easily. You have a singular task :
+    Once you have thought through, produce a json, easily convertible to a dataframe in python, which would contain invoice number, product description, predicted material, confidence. Remember: You just have to share the o/p json, no thought process or anything else.
+    Here is the json array/json : {json.dumps(response_arr)}
+    """
+    messages=[{"role": "user", "content":prompt}]
+    # Use OpenAI to generate a completion using GPT-4 (replace 'gpt-4.0-turbo' with the correct engine ID once available)
+    response = openai.ChatCompletion.create(
+        model="gpt-4",
+        max_tokens=5000,
+        temperature=0,
+        messages = messages
+    )
+    # Extracting the result
+    result = response.choices[0]["message"]["content"]
+    print("After in min gpt")
+    print(json.loads(result))
+    df = pd.DataFrame(json.loads(result))
+    # df = pd.DataFrame(flat_list)
     print("Df final : ", df)
     # Save the dataframe to a CSV in-memory
                          inputs=gr.inputs.File(label="Upload a File", file_count='multiple'),
                          outputs=["dataframe",gr.outputs.File(label="Download CSV")])  # Specify "dataframe" as output type
+interface.launch(share=True)