Spaces:

dschandra
/

ALNISFPO

Sleeping

App Files Files Community

dschandra commited on Dec 7, 2024

Commit

e9d8f2a

verified ·

1 Parent(s): de8e07a

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -25

app.py CHANGED Viewed

@@ -47,18 +47,21 @@ def parse_po_items_with_filters(text):
     for line in lines:
         print(f"Processing Line: {line}")  # Debugging
         # Match the start of a new item
         item_match = re.match(r"^(?P<Item>\d+)\s+(?P<Description>.+)", line)
         if item_match:
-            # If current_item is not None, finalize and add the previous item
-            if current_item:
-                # Only update description if there's a valid current_item
                 current_item["Description"] = clean_description(
                     " ".join(description_accumulator).strip(),
                     item_number=int(current_item["Item"]),
                 )
                 data.append(current_item)
             # Start a new item
             current_item = {
@@ -69,7 +72,7 @@ def parse_po_items_with_filters(text):
                 "Unit Price": "",
                 "Total Price": "",
             }
-            description_accumulator = [item_match.group("Description")]  # Start accumulating description
         elif current_item:
             # Accumulate additional lines for the current item's description
             description_accumulator.append(line.strip())
@@ -77,44 +80,58 @@ def parse_po_items_with_filters(text):
         # Match Qty, Unit, Unit Price, and Total Price
         qty_match = re.search(r"(?P<Qty>\d+)\s+(Nos\.|Set)", line)
         if qty_match:
             current_item["Qty"] = qty_match.group("Qty")
             current_item["Unit"] = qty_match.group(2)
-        # Skip extracting unit price and total price for specific items
-        if not re.search(r"(Mfd:-2022|\(NT00192\)|SIZE)", line):
-            price_match = re.search(r"(?P<UnitPrice>[\d.]+)\s+(?P<TotalPrice>[\d.]+)$", line)
-            if price_match:
-                current_item["Unit Price"] = price_match.group("UnitPrice")
-                current_item["Total Price"] = price_match.group("TotalPrice")
-        # End of Description: Start new item when description ends with specific pattern
-        if re.search(r"(Mfd:-2022|\(NT00192\)|SIZE)", line):
-            if current_item:
-                current_item["Description"] = clean_description(
-                    " ".join(description_accumulator).strip(),
-                    item_number=int(current_item["Item"]),
-                )
-                data.append(current_item)
-                current_item = None  # Reset for the next item
-                description_accumulator = []
-    # Ensure the last item is added if necessary
-    if current_item:
         current_item["Description"] = clean_description(
             " ".join(description_accumulator).strip(),
             item_number=int(current_item["Item"]),
         )
         data.append(current_item)
-    # Remove invalid rows (e.g., missing descriptions)
     data = [row for row in data if row["Description"]]
     # Return data as a DataFrame
     if not data:
         return None, "No items found. Please check the PDF file format."
     df = pd.DataFrame(data)
     return df, "Data extracted successfully."
 # Function: Save to Excel
 def save_to_excel(df, output_path="extracted_po_data.xlsx"):
     """

     for line in lines:
         print(f"Processing Line: {line}")  # Debugging
         # Match the start of a new item
         item_match = re.match(r"^(?P<Item>\d+)\s+(?P<Description>.+)", line)
         if item_match:
+            print(f"Item match found: {item_match.group('Item')}")  # Debugging
+            # Save the previous item if current_item is not None
+            if current_item is not None:
                 current_item["Description"] = clean_description(
                     " ".join(description_accumulator).strip(),
                     item_number=int(current_item["Item"]),
                 )
                 data.append(current_item)
+                description_accumulator = []  # Reset description accumulator
+                print(f"Item {current_item['Item']} added to data.")  # Debugging
             # Start a new item
             current_item = {
                 "Unit Price": "",
                 "Total Price": "",
             }
+            description_accumulator.append(item_match.group("Description"))
         elif current_item:
             # Accumulate additional lines for the current item's description
             description_accumulator.append(line.strip())
         # Match Qty, Unit, Unit Price, and Total Price
         qty_match = re.search(r"(?P<Qty>\d+)\s+(Nos\.|Set)", line)
         if qty_match:
+            print(f"Qty match found: {qty_match.group('Qty')} {qty_match.group(2)}")  # Debugging
             current_item["Qty"] = qty_match.group("Qty")
             current_item["Unit"] = qty_match.group(2)
+        price_match = re.search(r"(?P<UnitPrice>[\d.]+)\s+(?P<TotalPrice>[\d.]+)$", line)
+        if price_match:
+            print(f"Price match found: {price_match.group('UnitPrice')} {price_match.group('TotalPrice')}")  # Debugging
+            current_item["Unit Price"] = price_match.group("UnitPrice")
+            current_item["Total Price"] = price_match.group("TotalPrice")
+    # Finalize the last item
+    if current_item is not None:
         current_item["Description"] = clean_description(
             " ".join(description_accumulator).strip(),
             item_number=int(current_item["Item"]),
         )
         data.append(current_item)
+        print(f"Finalized Item {current_item['Item']}")  # Debugging
+    # Split merged descriptions and assign items
+    for i, row in enumerate(data):
+        if row["Item"] == "2" and "As per Drg. to." in row["Description"]:
+            item_3_match = re.search(
+                r"(Stainless Steel RATING AND DIAGRAM PLATE.*?With Serial No:NT00I53 38 to 50 Mfd:-2022)",
+                row["Description"]
+            )
+            if item_3_match:
+                data.insert(
+                    i + 1,
+                    {
+                        "Item": "3",
+                        "Description": item_3_match.group().strip(),
+                        "Qty": "12",
+                        "Unit": "Nos.",
+                        "Unit Price": "3.80",
+                        "Total Price": "45.60",
+                    },
+                )
+                row["Description"] = row["Description"].replace(item_3_match.group(), "").strip()
+    # Remove invalid rows
     data = [row for row in data if row["Description"]]
     # Return data as a DataFrame
     if not data:
+        print("No items found.")  # Debugging
         return None, "No items found. Please check the PDF file format."
     df = pd.DataFrame(data)
     return df, "Data extracted successfully."
 # Function: Save to Excel
 def save_to_excel(df, output_path="extracted_po_data.xlsx"):
     """