SuriRaja commited on
Commit
a9fdebc
·
verified ·
1 Parent(s): b8410df

Update po_parsers/federal_transformers.py

Browse files
Files changed (1) hide show
  1. po_parsers/federal_transformers.py +8 -4
po_parsers/federal_transformers.py CHANGED
@@ -1,8 +1,9 @@
 
1
  import re
2
- from typing import Dict
3
  import pandas as pd
 
4
 
5
- def parse_federal_transformers(file_text: str) -> Dict:
6
  parsed_data = {
7
  "Purchase Order No": "",
8
  "Date": "",
@@ -15,10 +16,12 @@ def parse_federal_transformers(file_text: str) -> Dict:
15
  }
16
 
17
  try:
 
 
 
18
  # Parse headers
19
  parsed_data["Purchase Order No"] = re.search(r"Purchase Order No\.\s(\d+)", file_text).group(1)
20
  parsed_data["Date"] = re.search(r"Date:\s+(\d{2}-\w{3}-\d{2})", file_text).group(1)
21
-
22
  parsed_data["Invoice Address"] = re.search(r"Invoice Address\s*:\s*(.*?)(?=\sDelivery Address)", file_text, re.DOTALL).group(1).strip()
23
  parsed_data["Delivery Address"] = re.search(r"Delivery Address\s*:\s*(.*?)(?=\sNote)", file_text, re.DOTALL).group(1).strip()
24
 
@@ -38,8 +41,9 @@ def parse_federal_transformers(file_text: str) -> Dict:
38
  items_df = pd.DataFrame(parsed_data["Items"])
39
  parsed_data["Items DataFrame"] = items_df
40
  print("Items successfully parsed into DataFrame.")
41
-
42
  except Exception as e:
43
  print(f"Error parsing Federal Transformers PO: {e}")
 
44
 
45
  return parsed_data
 
1
+ import pdfplumber
2
  import re
 
3
  import pandas as pd
4
+ from typing import Dict
5
 
6
+ def parse_po(file_path: str) -> Dict:
7
  parsed_data = {
8
  "Purchase Order No": "",
9
  "Date": "",
 
16
  }
17
 
18
  try:
19
+ with pdfplumber.open(file_path) as pdf:
20
+ file_text = "\n".join([page.extract_text() for page in pdf.pages if page.extract_text()])
21
+
22
  # Parse headers
23
  parsed_data["Purchase Order No"] = re.search(r"Purchase Order No\.\s(\d+)", file_text).group(1)
24
  parsed_data["Date"] = re.search(r"Date:\s+(\d{2}-\w{3}-\d{2})", file_text).group(1)
 
25
  parsed_data["Invoice Address"] = re.search(r"Invoice Address\s*:\s*(.*?)(?=\sDelivery Address)", file_text, re.DOTALL).group(1).strip()
26
  parsed_data["Delivery Address"] = re.search(r"Delivery Address\s*:\s*(.*?)(?=\sNote)", file_text, re.DOTALL).group(1).strip()
27
 
 
41
  items_df = pd.DataFrame(parsed_data["Items"])
42
  parsed_data["Items DataFrame"] = items_df
43
  print("Items successfully parsed into DataFrame.")
44
+
45
  except Exception as e:
46
  print(f"Error parsing Federal Transformers PO: {e}")
47
+ parsed_data["Error"] = str(e)
48
 
49
  return parsed_data