Update app.py
Browse files
app.py
CHANGED
|
@@ -14,14 +14,6 @@ def extract_text_from_pdf(pdf_file):
|
|
| 14 |
text += page.extract_text()
|
| 15 |
return text
|
| 16 |
|
| 17 |
-
# Function: Clean Description (Basic cleaning logic)
|
| 18 |
-
def clean_description(description, item_number):
|
| 19 |
-
"""
|
| 20 |
-
Cleans up the description for an item to ensure it's correctly formatted.
|
| 21 |
-
"""
|
| 22 |
-
# Placeholder for actual cleaning process (e.g., removing unwanted characters)
|
| 23 |
-
return description.strip()
|
| 24 |
-
|
| 25 |
# Function: Clean Description
|
| 26 |
def clean_description(description, item_number=None):
|
| 27 |
"""
|
|
@@ -43,7 +35,15 @@ def clean_description(description, item_number=None):
|
|
| 43 |
|
| 44 |
return description.strip()
|
| 45 |
|
| 46 |
-
# Function
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
def parse_po_items_with_filters(text):
|
| 48 |
"""
|
| 49 |
Parses purchase order items from the extracted text systematically.
|
|
@@ -91,12 +91,16 @@ def parse_po_items_with_filters(text):
|
|
| 91 |
print(f"Qty match found: {qty_match.group('Qty')} {qty_match.group(2)}") # Debugging
|
| 92 |
current_item["Qty"] = qty_match.group("Qty")
|
| 93 |
current_item["Unit"] = qty_match.group(2)
|
|
|
|
|
|
|
| 94 |
|
| 95 |
price_match = re.search(r"(?P<UnitPrice>[\d.]+)\s+(?P<TotalPrice>[\d.]+)$", line)
|
| 96 |
if price_match:
|
| 97 |
print(f"Price match found: {price_match.group('UnitPrice')} {price_match.group('TotalPrice')}") # Debugging
|
| 98 |
current_item["Unit Price"] = price_match.group("UnitPrice")
|
| 99 |
current_item["Total Price"] = price_match.group("TotalPrice")
|
|
|
|
|
|
|
| 100 |
|
| 101 |
# Finalize the last item
|
| 102 |
if current_item is not None:
|
|
|
|
| 14 |
text += page.extract_text()
|
| 15 |
return text
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
# Function: Clean Description
|
| 18 |
def clean_description(description, item_number=None):
|
| 19 |
"""
|
|
|
|
| 35 |
|
| 36 |
return description.strip()
|
| 37 |
|
| 38 |
+
# Function: Clean Description (Basic cleaning logic)
|
| 39 |
+
def clean_description(description, item_number):
|
| 40 |
+
"""
|
| 41 |
+
Cleans up the description for an item to ensure it's correctly formatted.
|
| 42 |
+
"""
|
| 43 |
+
# Placeholder for actual cleaning process (e.g., removing unwanted characters)
|
| 44 |
+
return description.strip()
|
| 45 |
+
|
| 46 |
+
# Function to extract PO Items with filters and better error handling
|
| 47 |
def parse_po_items_with_filters(text):
|
| 48 |
"""
|
| 49 |
Parses purchase order items from the extracted text systematically.
|
|
|
|
| 91 |
print(f"Qty match found: {qty_match.group('Qty')} {qty_match.group(2)}") # Debugging
|
| 92 |
current_item["Qty"] = qty_match.group("Qty")
|
| 93 |
current_item["Unit"] = qty_match.group(2)
|
| 94 |
+
else:
|
| 95 |
+
print(f"No Qty match found in line: {line}") # Debugging
|
| 96 |
|
| 97 |
price_match = re.search(r"(?P<UnitPrice>[\d.]+)\s+(?P<TotalPrice>[\d.]+)$", line)
|
| 98 |
if price_match:
|
| 99 |
print(f"Price match found: {price_match.group('UnitPrice')} {price_match.group('TotalPrice')}") # Debugging
|
| 100 |
current_item["Unit Price"] = price_match.group("UnitPrice")
|
| 101 |
current_item["Total Price"] = price_match.group("TotalPrice")
|
| 102 |
+
else:
|
| 103 |
+
print(f"No price match found in line: {line}") # Debugging
|
| 104 |
|
| 105 |
# Finalize the last item
|
| 106 |
if current_item is not None:
|