SuriRaja commited on
Commit
d3afbc7
·
verified ·
1 Parent(s): 7f4998b

Create parse_toshiba.py

Browse files
Files changed (1) hide show
  1. parse_toshiba.py +32 -0
parse_toshiba.py ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ from typing import Dict
3
+
4
+ def parse_toshiba(file_text: str) -> Dict:
5
+ parsed_data = {
6
+ "Purchase Order No": "",
7
+ "Date": "",
8
+ "Supplier": "Toshiba Transmission and Distribution Systems (India) Pvt Ltd",
9
+ "Invoice Address": "",
10
+ "Delivery Address": "",
11
+ "Currency": "INR",
12
+ "Payment Terms": "",
13
+ "Items": []
14
+ }
15
+
16
+ try:
17
+ parsed_data["Purchase Order No"] = re.search(r"Purchase Order : (\w+)", file_text).group(1)
18
+ parsed_data["Date"] = re.search(r"Order Date : (\d{4}-\d{2}-\d{2})", file_text).group(1)
19
+
20
+ item_pattern = re.compile(r"Pos\.\s+(\d+)\s+([\w\s]+)\s+(\d+)\s+([\d.]+)\s+INR\s+([\d.]+)")
21
+ for match in item_pattern.finditer(file_text):
22
+ parsed_data["Items"].append({
23
+ "Position": match.group(1),
24
+ "Description": match.group(2).strip(),
25
+ "Quantity": int(match.group(3)),
26
+ "Unit Price": float(match.group(4)),
27
+ "Total Price": float(match.group(5)),
28
+ })
29
+ except Exception as e:
30
+ print(f"Error parsing Toshiba PO: {e}")
31
+
32
+ return parsed_data