Spaces:
Runtime error
Runtime error
Commit ·
c8ec340
1
Parent(s): 6277da1
update app.py
Browse files
app.py
CHANGED
|
@@ -2,7 +2,10 @@
|
|
| 2 |
import gradio as gr
|
| 3 |
import pandas as pd # Import pandas
|
| 4 |
from ocr_request import ocr_request
|
| 5 |
-
import
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
def process_file(files):
|
| 8 |
response_arr = []
|
|
@@ -14,28 +17,52 @@ def process_file(files):
|
|
| 14 |
print("Main file :", response_arr)
|
| 15 |
|
| 16 |
#i= [[{'invoice_number': '349136', 'product_description': '1ST FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '3495565136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}],[{'invoice_number': '349136', 'product_description': ' FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}]]
|
| 17 |
-
flat_list = []
|
| 18 |
|
| 19 |
-
for item in response_arr:
|
| 20 |
-
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
print("Df final : ", df)
|
| 41 |
# Save the dataframe to a CSV in-memory
|
|
@@ -54,6 +81,6 @@ interface = gr.Interface(fn=process_file,
|
|
| 54 |
inputs=gr.inputs.File(label="Upload a File", file_count='multiple'),
|
| 55 |
outputs=["dataframe",gr.outputs.File(label="Download CSV")]) # Specify "dataframe" as output type
|
| 56 |
|
| 57 |
-
interface.launch()
|
| 58 |
|
| 59 |
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
import pandas as pd # Import pandas
|
| 4 |
from ocr_request import ocr_request
|
| 5 |
+
import os
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
import openai
|
| 8 |
+
import json
|
| 9 |
|
| 10 |
def process_file(files):
|
| 11 |
response_arr = []
|
|
|
|
| 17 |
print("Main file :", response_arr)
|
| 18 |
|
| 19 |
#i= [[{'invoice_number': '349136', 'product_description': '1ST FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '3495565136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}],[{'invoice_number': '349136', 'product_description': ' FLOOR WALLS', 'predicted_material': 'Framing', 'confidence': 0.8}, {'invoice_number': '349136', 'product_description': "11.875 X 16 ' Pro Lam 2.0 LVL 1.75 ( 7 @ 16 ' , 4 @\n8 ' )", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "COLUMN\n11.875 X 10 ' Pro Lam 2.0 LVL 1.75", 'predicted_material': 'Framing', 'confidence': 0.9}, {'invoice_number': '349136', 'product_description': "Power Column 3 1/2 X 5 1/2 - 08 '", 'predicted_material': 'Framing', 'confidence': 0.9}]]
|
| 20 |
+
# flat_list = []
|
| 21 |
|
| 22 |
+
# for item in response_arr:
|
| 23 |
+
# invoice_number = item['invoice_number']
|
| 24 |
|
| 25 |
+
# # Extracting product descriptions
|
| 26 |
+
# products = item.get('predictions', []) or item.get('product_description', [])
|
| 27 |
|
| 28 |
+
# for product in products:
|
| 29 |
+
# # Rename 'description' key to 'product_description' for uniformity across all products
|
| 30 |
+
# product_description = product.get('product_description', product.get('description'))
|
| 31 |
+
# predicted_material = product['predicted_material']
|
| 32 |
+
# confidence = product['confidence']
|
| 33 |
|
| 34 |
+
# flat_list.append({
|
| 35 |
+
# 'invoice_number': invoice_number,
|
| 36 |
+
# 'product_description': product_description,
|
| 37 |
+
# 'predicted_material': predicted_material,
|
| 38 |
+
# 'confidence': confidence
|
| 39 |
+
# })
|
| 40 |
|
| 41 |
+
load_dotenv()
|
| 42 |
+
# Initialize OpenAI with your API key
|
| 43 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
| 44 |
+
|
| 45 |
+
prompt =f"""
|
| 46 |
+
you are an excellent programmer and an anlyst. Given a json array or a json, you need to analyse it and convert into a json format which can be converted in dataframe of pandas easily. You have a singular task :
|
| 47 |
+
Once you have thought through, produce a json, easily convertible to a dataframe in python, which would contain invoice number, product description, predicted material, confidence. Remember: You just have to share the o/p json, no thought process or anything else.
|
| 48 |
+
|
| 49 |
+
Here is the json array/json : {json.dumps(response_arr)}
|
| 50 |
+
"""
|
| 51 |
+
messages=[{"role": "user", "content":prompt}]
|
| 52 |
+
# Use OpenAI to generate a completion using GPT-4 (replace 'gpt-4.0-turbo' with the correct engine ID once available)
|
| 53 |
+
response = openai.ChatCompletion.create(
|
| 54 |
+
model="gpt-4",
|
| 55 |
+
max_tokens=5000,
|
| 56 |
+
temperature=0,
|
| 57 |
+
messages = messages
|
| 58 |
+
)
|
| 59 |
+
# Extracting the result
|
| 60 |
+
result = response.choices[0]["message"]["content"]
|
| 61 |
+
print("After in min gpt")
|
| 62 |
+
print(json.loads(result))
|
| 63 |
+
|
| 64 |
+
df = pd.DataFrame(json.loads(result))
|
| 65 |
+
# df = pd.DataFrame(flat_list)
|
| 66 |
|
| 67 |
print("Df final : ", df)
|
| 68 |
# Save the dataframe to a CSV in-memory
|
|
|
|
| 81 |
inputs=gr.inputs.File(label="Upload a File", file_count='multiple'),
|
| 82 |
outputs=["dataframe",gr.outputs.File(label="Download CSV")]) # Specify "dataframe" as output type
|
| 83 |
|
| 84 |
+
interface.launch(share=True)
|
| 85 |
|
| 86 |
|