Daniel Castrillon commited on
Commit
66a2e50
·
1 Parent(s): 98d21f2

added tracking identification logic

Browse files
Files changed (1) hide show
  1. app.py +49 -17
app.py CHANGED
@@ -20,15 +20,43 @@ def process_estafeta_pdf(file_name, page):
20
  # page.scale_to(100, 200)
21
  save_pdf_file(file_name, page)
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def process_dhl_pdf(file_name, page):
24
  page.cropbox.upper_left = (92,20)
25
  page.cropbox.lower_right = (360,560)
26
  save_pdf_file(file_name, page)
27
 
 
 
 
 
 
 
 
 
 
28
  def process_ups_pdf(file_name, page):
29
  # page.cropbox.upper_left = (0,0)
30
  # page.cropbox.lower_right = (500,400)
31
  page.rotate(90)
 
 
32
  save_pdf_file(file_name, page)
33
 
34
  def process_coppel_pdf(file_name, page):
@@ -36,6 +64,17 @@ def process_coppel_pdf(file_name, page):
36
  page.cropbox.lower_right = (400,520)
37
  save_pdf_file(file_name, page)
38
 
 
 
 
 
 
 
 
 
 
 
 
39
  def process_pdf_file(file):
40
  """
41
  This function processes the PDF file and returns the file name, file path and transport company
@@ -54,34 +93,27 @@ def process_pdf_file(file):
54
  page = pdf.pages[0]
55
  pdf_text = page.extract_text()
56
 
57
- page.rotateClockwise = 90
58
-
59
  transport_company = None
60
  file_name = file.name
61
  file_path = f"files/{file_name}"
62
 
63
  if re.search("estafeta", pdf_text, re.IGNORECASE):
64
  transport_company = "estafeta"
 
65
  process_estafeta_pdf(file_name, page)
66
- elif re.search("ups", pdf_text, re.IGNORECASE):
67
- transport_company = "ups"
68
- process_ups_pdf(file_name, page)
69
  elif re.search("dhl", pdf_text, re.IGNORECASE):
70
  transport_company = "dhl"
 
71
  process_dhl_pdf(file_name, page)
72
  elif re.search("coppel", pdf_text, re.IGNORECASE):
73
  transport_company = "coppel"
 
74
  process_coppel_pdf(file_name, page)
75
-
76
-
77
- if transport_company is None:
78
  transport_company = "ups"
79
  process_ups_pdf(file_name, page)
80
- # print(pdf_text)
81
- # raise Exception("No se pudo identificar la compañia de transporte")
82
 
83
  pdf_stream.close()
84
-
85
  return file_name, file_path, transport_company
86
 
87
  async def process_chat():
@@ -99,17 +131,17 @@ async def process_chat():
99
  ).send()
100
 
101
  try:
102
- elements = []
103
- content = ""
104
  files_path = []
105
  for file in files:
106
  file_name, file_path, transport_company = process_pdf_file(file)
107
- elements.append(cl.File(name=file_name, display="inline", path=file_path))
108
  files_path.append(file_path)
109
- content += f"Guía de {transport_company.upper()}\n"
110
 
111
- msg = cl.Message(content=content, elements=elements)
112
- await msg.send()
 
 
 
113
  for file_path in files_path:
114
  os.remove(file_path)
115
  except Exception as e:
 
20
  # page.scale_to(100, 200)
21
  save_pdf_file(file_name, page)
22
 
23
+ def process_estafeta_text(pdf_text):
24
+ extracted_text = ""
25
+ match = re.search(r'CONFIRMACION (\d+-\d+\w+)', pdf_text)
26
+ if match:
27
+ extracted_text = match.group(1)
28
+ index = -1
29
+ for i, char in enumerate(reversed(extracted_text)):
30
+ if char.isalpha():
31
+ index = len(extracted_text) - i
32
+ break
33
+
34
+ extracted_text = extracted_text[0:index].replace("-", "")
35
+ file_name = extracted_text + ".pdf"
36
+ file_path = f"files/{file_name}"
37
+ return file_name, file_path
38
+
39
+
40
  def process_dhl_pdf(file_name, page):
41
  page.cropbox.upper_left = (92,20)
42
  page.cropbox.lower_right = (360,560)
43
  save_pdf_file(file_name, page)
44
 
45
+ def process_dhl_text(pdf_text):
46
+ matches = re.findall(r'WAYBILL (\d+(?: \d+)*)', pdf_text)
47
+ for match in matches:
48
+ extracted_text = match.replace(" ", "")
49
+
50
+ file_name = extracted_text + ".pdf"
51
+ file_path = f"files/{file_name}"
52
+ return file_name, file_path
53
+
54
  def process_ups_pdf(file_name, page):
55
  # page.cropbox.upper_left = (0,0)
56
  # page.cropbox.lower_right = (500,400)
57
  page.rotate(90)
58
+ pdf_text = page.extract_text()
59
+ print(pdf_text)
60
  save_pdf_file(file_name, page)
61
 
62
  def process_coppel_pdf(file_name, page):
 
64
  page.cropbox.lower_right = (400,520)
65
  save_pdf_file(file_name, page)
66
 
67
+ def process_coppel_text(pdf_text):
68
+ match = re.search(r'TN: (\w+)', pdf_text)
69
+ if match:
70
+ extracted_text = match.group(1)
71
+ else:
72
+ print("Pattern not found in the text.")
73
+
74
+ file_name = extracted_text + ".pdf"
75
+ file_path = f"files/{file_name}"
76
+ return file_name, file_path
77
+
78
  def process_pdf_file(file):
79
  """
80
  This function processes the PDF file and returns the file name, file path and transport company
 
93
  page = pdf.pages[0]
94
  pdf_text = page.extract_text()
95
 
 
 
96
  transport_company = None
97
  file_name = file.name
98
  file_path = f"files/{file_name}"
99
 
100
  if re.search("estafeta", pdf_text, re.IGNORECASE):
101
  transport_company = "estafeta"
102
+ file_name, file_path = process_estafeta_text(pdf_text)
103
  process_estafeta_pdf(file_name, page)
 
 
 
104
  elif re.search("dhl", pdf_text, re.IGNORECASE):
105
  transport_company = "dhl"
106
+ file_name, file_path = process_dhl_text(pdf_text)
107
  process_dhl_pdf(file_name, page)
108
  elif re.search("coppel", pdf_text, re.IGNORECASE):
109
  transport_company = "coppel"
110
+ file_name, file_path = process_coppel_text(pdf_text)
111
  process_coppel_pdf(file_name, page)
112
+ else:
 
 
113
  transport_company = "ups"
114
  process_ups_pdf(file_name, page)
 
 
115
 
116
  pdf_stream.close()
 
117
  return file_name, file_path, transport_company
118
 
119
  async def process_chat():
 
131
  ).send()
132
 
133
  try:
 
 
134
  files_path = []
135
  for file in files:
136
  file_name, file_path, transport_company = process_pdf_file(file)
137
+ elements = [cl.File(name=file_name, display="inline", path=file_path)]
138
  files_path.append(file_path)
 
139
 
140
+ file_name = file_name.replace(".pdf", "")
141
+ content = f"Guía de {transport_company.upper()}: **{file_name}**"
142
+ msg = cl.Message(content=content, elements=elements)
143
+ await msg.send()
144
+
145
  for file_path in files_path:
146
  os.remove(file_path)
147
  except Exception as e: