Update app.py
Browse files
app.py
CHANGED
|
@@ -35,6 +35,13 @@ def clean_description(description, item_number=None):
|
|
| 35 |
|
| 36 |
return description.strip()
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
# Function to extract PO items with better error handling and checks
|
| 39 |
def parse_po_items_with_filters(text):
|
| 40 |
"""
|
|
@@ -60,7 +67,7 @@ def parse_po_items_with_filters(text):
|
|
| 60 |
# Save the previous item if current_item is not None
|
| 61 |
if current_item is not None:
|
| 62 |
# Clean and add the description to the current item
|
| 63 |
-
current_item["Description"] = clean_description(" ".join(description_accumulator).strip()
|
| 64 |
data.append(current_item)
|
| 65 |
description_accumulator = [] # Reset description accumulator
|
| 66 |
print(f"Item {current_item['Item']} added to data.") # Debugging
|
|
@@ -100,7 +107,7 @@ def parse_po_items_with_filters(text):
|
|
| 100 |
# Finalize the last item
|
| 101 |
if current_item is not None:
|
| 102 |
# Clean and add the description to the current item
|
| 103 |
-
current_item["Description"] = clean_description(" ".join(description_accumulator).strip()
|
| 104 |
data.append(current_item)
|
| 105 |
print(f"Finalized Item {current_item['Item']}") # Debugging
|
| 106 |
|
|
@@ -112,6 +119,7 @@ def parse_po_items_with_filters(text):
|
|
| 112 |
row["Description"]
|
| 113 |
)
|
| 114 |
if item_3_match:
|
|
|
|
| 115 |
data.insert(
|
| 116 |
i + 1,
|
| 117 |
{
|
|
@@ -137,6 +145,20 @@ def parse_po_items_with_filters(text):
|
|
| 137 |
df = pd.DataFrame(data)
|
| 138 |
return df, "Data extracted successfully."
|
| 139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
# Function: Save to Excel
|
| 141 |
def save_to_excel(df, output_path="extracted_po_data.xlsx"):
|
| 142 |
"""
|
|
|
|
| 35 |
|
| 36 |
return description.strip()
|
| 37 |
|
| 38 |
+
# Function to clean item description
|
| 39 |
+
def clean_description(description):
|
| 40 |
+
"""
|
| 41 |
+
Cleans up the description for an item to ensure it's correctly formatted.
|
| 42 |
+
"""
|
| 43 |
+
return description.strip()
|
| 44 |
+
|
| 45 |
# Function to extract PO items with better error handling and checks
|
| 46 |
def parse_po_items_with_filters(text):
|
| 47 |
"""
|
|
|
|
| 67 |
# Save the previous item if current_item is not None
|
| 68 |
if current_item is not None:
|
| 69 |
# Clean and add the description to the current item
|
| 70 |
+
current_item["Description"] = clean_description(" ".join(description_accumulator).strip())
|
| 71 |
data.append(current_item)
|
| 72 |
description_accumulator = [] # Reset description accumulator
|
| 73 |
print(f"Item {current_item['Item']} added to data.") # Debugging
|
|
|
|
| 107 |
# Finalize the last item
|
| 108 |
if current_item is not None:
|
| 109 |
# Clean and add the description to the current item
|
| 110 |
+
current_item["Description"] = clean_description(" ".join(description_accumulator).strip())
|
| 111 |
data.append(current_item)
|
| 112 |
print(f"Finalized Item {current_item['Item']}") # Debugging
|
| 113 |
|
|
|
|
| 119 |
row["Description"]
|
| 120 |
)
|
| 121 |
if item_3_match:
|
| 122 |
+
# Split item 2 description and assign item 3
|
| 123 |
data.insert(
|
| 124 |
i + 1,
|
| 125 |
{
|
|
|
|
| 145 |
df = pd.DataFrame(data)
|
| 146 |
return df, "Data extracted successfully."
|
| 147 |
|
| 148 |
+
# Example text (as provided)
|
| 149 |
+
text = """
|
| 150 |
+
ITEM 1 Stainless Steel RATING AND DIAGRAM PLATE 24 Nos. 3.00 72.00
|
| 151 |
+
As per Drg.No. G 000822 RI RDP 50KVA NT001 51 SIZE : 150mm X 160mm X 1.00mm Thick With Serial No:NT00151 97 to 121 Mfd:-2022
|
| 152 |
+
ITEM 2 Stainless Steel RATING AND DIAGRAM PLATE 12 Nos. 3.80 45.60
|
| 153 |
+
As per Drg.to.G 000816 R2 RDP 600KVA NT00152 SIZE : 150mm X 260mm X 1.00mm Thick With Serial No:NT00I53 38 to 50 Mfd:-2022
|
| 154 |
+
"""
|
| 155 |
+
|
| 156 |
+
# Running the function
|
| 157 |
+
df, status = parse_po_items_with_filters(text)
|
| 158 |
+
print(status)
|
| 159 |
+
if df is not None:
|
| 160 |
+
print(df)
|
| 161 |
+
|
| 162 |
# Function: Save to Excel
|
| 163 |
def save_to_excel(df, output_path="extracted_po_data.xlsx"):
|
| 164 |
"""
|