nicks commited on
Commit
75f0880
·
1 Parent(s): c0a2d02

[Added] new attributes to mapping and handled contactinfo differently

Browse files
Files changed (2) hide show
  1. app.py +154 -162
  2. field_mapping.json +12 -3
app.py CHANGED
@@ -12,26 +12,26 @@ def create_output_attribute(attribute_name, attribute_value, attribute_language)
12
  "probability": 1.0,
13
  "value": attribute_value,
14
  "model": "NER",
15
- "language": attribute_language,
16
  }
17
  return out_dict
18
 
19
 
20
- def create_allergen_attribute(allergen_type_code, allergen_containment_level="None"):
21
  out_dict = {
22
  "coordinates": "None",
23
  "probability": 1.0,
24
  "model": "rule-based",
25
  "entity": "Allergen",
26
  "allergenTypeCode": allergen_type_code,
27
- "levelOfContainmentCode": allergen_containment_level,
28
  }
29
  return out_dict
30
 
31
 
32
  def extract_attribute_value_from_df(df, attribute_id):
33
  try:
34
- return df[df["attribute_id"] == attribute_id].iloc[0]["attribute_value"]
35
  except IndexError:
36
  return None
37
 
@@ -39,7 +39,7 @@ def extract_attribute_value_from_df(df, attribute_id):
39
  def extract_value_and_unit(value_in):
40
  if value_in is None:
41
  return value_in, value_in
42
- split = value_in.split(" ")
43
  if len(split) == 2:
44
  return split[0], split[1]
45
  else:
@@ -51,32 +51,18 @@ def process_normal_attributes(pxm_output, certifai_mapping, desired_language):
51
  output_list = []
52
  gtin = None
53
  hierarchy_attributes = []
54
- for attribute in pxm_output["data"]:
55
- attribute_id = str(attribute["attribute_id"])
56
- attribute_language = attribute["locale"]["value"]
57
- attribute_value = str(attribute["value"])
58
- if attribute["path"] is not None and attribute_id not in ["4896", "4897", "4898"]:
59
- hierarchy_attributes.append(
60
- {
61
- "attribute_id": attribute_id,
62
- "attribute_value": attribute_value,
63
- "attribute_language": attribute_language,
64
- "path": attribute["path"],
65
- }
66
- )
67
- if (
68
- attribute_id in certifai_mapping.keys()
69
- and attribute_language in desired_language
70
- and (attribute['path'] is None or attribute_id in ["4896", "4897", "4898"])
71
- ):
72
  attribute_name = certifai_mapping[attribute_id]
73
- output_list.append(
74
- create_output_attribute(
75
- attribute_name, attribute_value, attribute_language
76
- )
77
- )
78
  # Filter out the gtin
79
- if attribute_id == "3603":
80
  gtin = attribute_value
81
 
82
  return output_list, hierarchy_attributes, gtin
@@ -85,224 +71,221 @@ def process_normal_attributes(pxm_output, certifai_mapping, desired_language):
85
  def extract_path_hierarchy(hierarchy_attributes_df):
86
  # Preprocess the path column from the attributes df to extract the path hierarchy
87
  path_list = []
88
- for element in hierarchy_attributes_df["path"].str.split("."):
89
  if len(element) == 1:
90
  path_list.append([element[0], None, None])
91
  elif len(element) == 2:
92
  path_list.append([element[0], element[1], None])
93
  elif len(element) == 3:
94
  path_list.append(element)
95
- hierarchy_attributes_df[["path.0", "path.1", "path.2"]] = path_list
96
  return hierarchy_attributes_df
97
 
98
 
99
  def process_allergens(hierarchy_attributes_df):
100
  # Allergens
101
- allergens_df = hierarchy_attributes_df[
102
- (hierarchy_attributes_df["attribute_id"].isin(["5184", "5191"]))
103
- ]
104
  allergens_list = []
105
  if len(allergens_df) > 0:
106
- allergens_df = allergens_df[
107
- allergens_df["attribute_language"]
108
- == allergens_df.iloc[0]["attribute_language"]
109
- ]
110
- for _, group in allergens_df.groupby("path"):
111
- allergen_type_code = group[group["attribute_id"] == "5184"].iloc[0][
112
- "attribute_value"
113
- ]
114
- allergen_containment_level_code = group[
115
- group["attribute_id"] == "5191"
116
- ].iloc[0]["attribute_value"]
117
- allergens_list.append(
118
- create_allergen_attribute(
119
- allergen_type_code, allergen_containment_level_code
120
- )
121
- )
122
-
123
- allergen_attribute = {
124
- "entity": "allergens",
125
- "values": allergens_list,
126
- "model": "rule-based",
127
- }
128
 
129
  return allergen_attribute
130
 
131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  def process_communication_channels(hierarchy_attributes_df):
133
  # Communication Channels
134
  communication_channels_df = hierarchy_attributes_df[
135
- (hierarchy_attributes_df["attribute_id"].isin(["4900", "4901"]))
136
- ]
137
  communication_channels_df = communication_channels_df[
138
- communication_channels_df["attribute_language"]
139
- == communication_channels_df.iloc[0]["attribute_language"]
140
- ]
141
  communication_channels_list = []
142
- for _, group in communication_channels_df.groupby("path"):
143
- communication_channel_dict = {
144
- "coordinates": "None",
145
- "probability": 1.0,
146
- "model": "rule-based",
147
- "entity": "CommunicationChannel",
148
- "communicationChannelCode": extract_attribute_value_from_df(group, "4900"),
149
- "communicationValue": extract_attribute_value_from_df(group, "4901"),
150
- }
151
- communication_channels_list.append(communication_channel_dict)
152
- communication_channel_attribute = {
153
- "entity": "communicationChannels",
154
- "values": communication_channels_list,
155
- }
156
- return communication_channel_attribute
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
 
159
  def process_preparation_instructions(hierarchy_attributes_df):
160
  # Preparation instructions
161
  preparation_instructions_df = hierarchy_attributes_df[
162
- (hierarchy_attributes_df["attribute_id"].isin(["5206", "5207"]))
163
- ]
164
  preparation_instructions_df = preparation_instructions_df[
165
- preparation_instructions_df["attribute_language"]
166
- == preparation_instructions_df.iloc[0]["attribute_language"]
167
- ]
168
  preparation_instructions_list = []
169
- for _, group in preparation_instructions_df.groupby("path"):
170
  preparation_instructions_dict = {
171
  "coordinates": "None",
172
  "probability": 1.0,
173
  "model": "rule-based",
174
  "entity": "PreparationInstruction",
175
- "preparationTypeCode": extract_attribute_value_from_df(group, "5206"),
176
- "preparationInstructions": extract_attribute_value_from_df(group, "5207"),
177
  }
178
  preparation_instructions_list.append(preparation_instructions_dict)
179
- preparation_instructions_attribute = {
180
- "entity": "preparationInstructions",
181
- "values": preparation_instructions_list,
182
- }
183
  return preparation_instructions_attribute
184
 
185
 
186
  def process_diet_information(hierarchy_attributes_df):
187
  # Diet Information
188
  diet_information_df = hierarchy_attributes_df[
189
- (hierarchy_attributes_df["attribute_id"].isin(["5203", "5204"]))
190
- ]
191
  diet_information_df = diet_information_df[
192
- diet_information_df["attribute_language"]
193
- == diet_information_df.iloc[0]["attribute_language"]
194
- ]
195
  diet_information_list = []
196
- for _, group in diet_information_df.groupby("path"):
197
  diet_information_dict = {
198
  "coordinates": "None",
199
  "probability": 1.0,
200
  "model": "rule-based",
201
  "entity": "DietInformation",
202
- "dietTypeCode": extract_attribute_value_from_df(group, "5203"),
203
- "isDietTypeMarkedOnPackage": extract_attribute_value_from_df(group, "5204"),
204
  }
205
  diet_information_list.append(diet_information_dict)
206
- diet_information_attribute = {
207
- "entity": "dietInformation",
208
- "values": diet_information_list,
209
- }
210
  return diet_information_attribute
211
 
212
 
213
  def process_claim_element_information(hierarchy_attributes_df):
214
  # Claim element information
215
  claim_element_information_df = hierarchy_attributes_df[
216
- (hierarchy_attributes_df["attribute_id"].isin(["5199", "5200", "5201"]))
217
- ]
218
  claim_element_information_df = claim_element_information_df[
219
- claim_element_information_df["attribute_language"]
220
- == claim_element_information_df.iloc[0]["attribute_language"]
221
- ]
222
  claim_element_information_list = []
223
- for _, group in claim_element_information_df.groupby("path"):
224
  claim_element_information_dict = {
225
  "coordinates": "None",
226
  "probability": 1.0,
227
  "model": "rule-based",
228
  "entity": "ClaimElementInformation",
229
- "claimElementCode": extract_attribute_value_from_df(group, "5199"),
230
- "claimTypeCode": extract_attribute_value_from_df(group, "5200"),
231
- "claimMarkedOnPackage": extract_attribute_value_from_df(group, "5201"),
232
  }
233
  claim_element_information_list.append(claim_element_information_dict)
234
- claim_element_information_attribute = {
235
- "entity": "claimElementInformation",
236
- "values": claim_element_information_list,
237
- }
238
  return claim_element_information_attribute
239
 
240
 
241
  def process_nutrient_table(hierarchy_attributes_df):
242
  # Nutrient table
243
  nutrient_table_df = hierarchy_attributes_df[
244
- (
245
- hierarchy_attributes_df["attribute_id"].isin(
246
- ["5211", "5212", "5219", "5215", "5216", "5217"]
247
- )
248
- )
249
- ]
250
  nutrient_table_list = []
251
  if len(nutrient_table_df) > 0:
252
- # preferred_language = "en-GB"
253
- nutrient_table_df = nutrient_table_df[
254
- nutrient_table_df["attribute_language"]
255
- == nutrient_table_df.iloc[0]["attribute_language"]
256
- ]
257
- for _, group in nutrient_table_df.groupby("path.0"):
258
- content = extract_attribute_value_from_df(group, "5211")
259
  nutrient_value, nutrient_unit = extract_value_and_unit(content)
260
 
261
  nutrient_basis_quantity_dict = {
262
  "nutrientBasisQuantityValue": nutrient_value,
263
  "nutrientBasisQuantityMeasurementUnitCode": nutrient_unit,
264
- "preparationStateCode": extract_attribute_value_from_df(group, "5212"),
265
- }
266
  nutrient_values_list = []
267
- for _, sub_group in group.sort_values(by="path.1").groupby("path.1"):
268
- content = extract_attribute_value_from_df(sub_group, "5219")
269
  nutrient_value, nutrient_unit = extract_value_and_unit(content)
270
 
271
  nutrient_values_dict = {
272
  "coordinates": "",
273
  "probability": 1.0,
274
- "nutrientTypeCode": extract_attribute_value_from_df(
275
- sub_group, "5215"
276
- ),
277
  "quantityContained": {
278
  "measurementUnitCode": nutrient_unit,
279
  "value": nutrient_value,
280
- "precisionCode": extract_attribute_value_from_df(
281
- sub_group, "5216"
282
- ),
283
  },
284
  "dailyValueIntakePercent": {
285
- "value": extract_attribute_value_from_df(sub_group, "5217"),
286
- "precisionCode": "APPROXIMATELY",
287
- },
288
  }
289
  nutrient_values_list.append(nutrient_values_dict)
290
- nutrient_basis_quantity_dict["values"] = nutrient_values_list
291
  nutrient_table_list.append(nutrient_basis_quantity_dict)
292
 
293
- nutrient_attribute = {
294
- "coordinates": "None",
295
- "entity": "nutrients",
296
- "probability": 1.0,
297
- "value": nutrient_table_list,
298
- "model": "table-rule-based",
299
- }
300
  return nutrient_attribute
301
 
302
 
303
  def pad_gtin(gtin, desired_length=14):
304
- for _ in range(desired_length - len(gtin)):
305
- gtin = "0" + gtin
306
  return gtin
307
 
308
 
@@ -331,38 +314,47 @@ def process_file(filename, pxm_output):
331
  # Process the allergens
332
  allergen_attribute = process_allergens(hierarchy_attributes_df)
333
  output_list.append(allergen_attribute)
 
 
 
 
 
 
 
334
  # Process the communication channels
335
  try:
336
- communication_channel_attribute = process_communication_channels(
337
- hierarchy_attributes_df
338
- )
339
- output_list.append(communication_channel_attribute)
 
 
 
 
340
  except Exception as e:
341
  print(e)
 
342
  # Process preparation instructions
343
  try:
344
- preparation_instructions_attribute = process_preparation_instructions(
345
- hierarchy_attributes_df
346
- )
347
  output_list.append(preparation_instructions_attribute)
348
  except Exception as e:
349
  print(e)
 
350
  # Process diet information
351
  try:
352
- diet_information_attribute = process_diet_information(
353
- hierarchy_attributes_df
354
- )
355
  output_list.append(diet_information_attribute)
356
  except Exception as e:
357
  print(e)
 
358
  # Process claim element information
359
  try:
360
- claim_element_information_attribute = process_claim_element_information(
361
- hierarchy_attributes_df
362
- )
363
  output_list.append(claim_element_information_attribute)
364
  except Exception as e:
365
  print(e)
 
366
  # Process the nutrient table
367
  nutrient_table_attribute = process_nutrient_table(hierarchy_attributes_df)
368
  output_list.append(nutrient_table_attribute)
 
12
  "probability": 1.0,
13
  "value": attribute_value,
14
  "model": "NER",
15
+ "language": attribute_language
16
  }
17
  return out_dict
18
 
19
 
20
+ def create_allergen_attribute(allergen_type_code, allergen_containment_level='None'):
21
  out_dict = {
22
  "coordinates": "None",
23
  "probability": 1.0,
24
  "model": "rule-based",
25
  "entity": "Allergen",
26
  "allergenTypeCode": allergen_type_code,
27
+ "levelOfContainmentCode": allergen_containment_level
28
  }
29
  return out_dict
30
 
31
 
32
  def extract_attribute_value_from_df(df, attribute_id):
33
  try:
34
+ return df[df['attribute_id'] == attribute_id].iloc[0]['attribute_value']
35
  except IndexError:
36
  return None
37
 
 
39
  def extract_value_and_unit(value_in):
40
  if value_in is None:
41
  return value_in, value_in
42
+ split = value_in.split(' ')
43
  if len(split) == 2:
44
  return split[0], split[1]
45
  else:
 
51
  output_list = []
52
  gtin = None
53
  hierarchy_attributes = []
54
+ for attribute in pxm_output['data']:
55
+ attribute_id = str(attribute['attribute_id'])
56
+ attribute_language = attribute['locale']['value']
57
+ attribute_value = str(attribute['value'])
58
+ if attribute['path'] is not None:
59
+ hierarchy_attributes.append({'attribute_id': attribute_id, 'attribute_value': attribute_value,
60
+ 'attribute_language': attribute_language, 'path': attribute['path']})
61
+ if attribute_id in certifai_mapping.keys() and attribute_language in desired_language and attribute['path'] is None:
 
 
 
 
 
 
 
 
 
 
62
  attribute_name = certifai_mapping[attribute_id]
63
+ output_list.append(create_output_attribute(attribute_name, attribute_value, attribute_language))
 
 
 
 
64
  # Filter out the gtin
65
+ if attribute_id == '3603':
66
  gtin = attribute_value
67
 
68
  return output_list, hierarchy_attributes, gtin
 
71
  def extract_path_hierarchy(hierarchy_attributes_df):
72
  # Preprocess the path column from the attributes df to extract the path hierarchy
73
  path_list = []
74
+ for element in hierarchy_attributes_df['path'].str.split('.'):
75
  if len(element) == 1:
76
  path_list.append([element[0], None, None])
77
  elif len(element) == 2:
78
  path_list.append([element[0], element[1], None])
79
  elif len(element) == 3:
80
  path_list.append(element)
81
+ hierarchy_attributes_df[['path.0', 'path.1', 'path.2']] = path_list
82
  return hierarchy_attributes_df
83
 
84
 
85
  def process_allergens(hierarchy_attributes_df):
86
  # Allergens
87
+ allergens_df = hierarchy_attributes_df[(hierarchy_attributes_df['attribute_id'].isin(['5184', '5191']))]
 
 
88
  allergens_list = []
89
  if len(allergens_df) > 0:
90
+ allergens_df = allergens_df[allergens_df['attribute_language'] == allergens_df.iloc[0]['attribute_language']]
91
+ for path, group in allergens_df.groupby('path'):
92
+ allergen_type_code = group[group['attribute_id'] == '5184'].iloc[0]['attribute_value']
93
+ allergen_containment_level_code = group[group['attribute_id'] == '5191'].iloc[0]['attribute_value']
94
+ allergens_list.append(create_allergen_attribute(allergen_type_code, allergen_containment_level_code))
95
+
96
+ allergen_attribute = {'entity': 'allergens', 'values': allergens_list, 'model': 'rule-based'}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
  return allergen_attribute
99
 
100
 
101
+ # def process_communication_channels(hierarchy_attributes_df):
102
+ # # Communication Channels
103
+ # communication_channels_df = hierarchy_attributes_df[
104
+ # (hierarchy_attributes_df['attribute_id'].isin(['2400', '2401']))]
105
+ # communication_channels_df = communication_channels_df[
106
+ # communication_channels_df['attribute_language'] == communication_channels_df.iloc[0]['attribute_language']]
107
+ # communication_channels_list = []
108
+ # for path_1, group in communication_channels_df.groupby('path'):
109
+ # communication_channel_dict = {
110
+ # "coordinates": "None",
111
+ # "probability": 1.0,
112
+ # "model": "rule-based",
113
+ # "entity": "CommunicationChannel",
114
+ # "communicationChannelCode": extract_attribute_value_from_df(group, '2400'),
115
+ # "communicationValue": extract_attribute_value_from_df(group, '2401')
116
+ # }
117
+ # communication_channels_list.append(communication_channel_dict)
118
+ # communication_channel_attribute = {'entity': 'communicationChannels', 'values': communication_channels_list}
119
+ # return communication_channel_attribute
120
+
121
+
122
  def process_communication_channels(hierarchy_attributes_df):
123
  # Communication Channels
124
  communication_channels_df = hierarchy_attributes_df[
125
+ (hierarchy_attributes_df['attribute_id'].isin(['4896', '4897', "4898", "4900", "4901"]))]
 
126
  communication_channels_df = communication_channels_df[
127
+ communication_channels_df['attribute_language'] == communication_channels_df.iloc[0]['attribute_language']]
128
+
 
129
  communication_channels_list = []
130
+ for path_1, group in communication_channels_df.groupby('path'):
131
+ communication_channel_code = extract_attribute_value_from_df(group, '4900')
132
+ communication_value = extract_attribute_value_from_df(group, '4901')
133
+ if communication_channel_code and communication_value:
134
+ communication_channel_dict = {
135
+ "path": group["path.0"].iloc[0],
136
+ "coordinates": "None",
137
+ "probability": 1.0,
138
+ "model": "rule-based",
139
+ "entity": "CommunicationChannel",
140
+ "communicationChannelCode": communication_channel_code,
141
+ "communicationValue": communication_value,
142
+ }
143
+ communication_channels_list.append(communication_channel_dict)
144
+ return communication_channels_list
145
+
146
+
147
+ def process_contact_information(hierarchy_attributes_df, communication_channels_list):
148
+ # Contact information
149
+ contact_information_df = hierarchy_attributes_df[
150
+ (hierarchy_attributes_df['attribute_id'].isin(['4896', '4897', "4898"]))]
151
+ contact_information_df = contact_information_df[
152
+ contact_information_df['attribute_language'] == contact_information_df.iloc[0]['attribute_language']]
153
+
154
+ contact_information_list = []
155
+ for i, (path_1, group) in enumerate(contact_information_df.groupby('path')):
156
+ contact_name = extract_attribute_value_from_df(group, '4896')
157
+ contact_address = extract_attribute_value_from_df(group, '4897')
158
+ contact_type_code = extract_attribute_value_from_df(group, '4898')
159
+ if contact_name and contact_address and contact_type_code:
160
+ communication_channels = [dic for dic in communication_channels_list if dic["path"] == str(i)]
161
+ contact_information_dict = {
162
+ "contactInformation": str(i),
163
+ "contactName": contact_name,
164
+ "contact_address": contact_address,
165
+ "contactTypeCode": contact_type_code,
166
+ "communicationChannels": communication_channels,
167
+ }
168
+ contact_information_list.append(contact_information_dict)
169
+
170
+ contact_information_out = {'entity': 'contact_information', 'values': contact_information_list}
171
+
172
+ return contact_information_out
173
 
174
 
175
  def process_preparation_instructions(hierarchy_attributes_df):
176
  # Preparation instructions
177
  preparation_instructions_df = hierarchy_attributes_df[
178
+ (hierarchy_attributes_df['attribute_id'].isin(['5206', '5207']))]
 
179
  preparation_instructions_df = preparation_instructions_df[
180
+ preparation_instructions_df['attribute_language'] == preparation_instructions_df.iloc[0]['attribute_language']]
 
 
181
  preparation_instructions_list = []
182
+ for path_1, group in preparation_instructions_df.groupby('path'):
183
  preparation_instructions_dict = {
184
  "coordinates": "None",
185
  "probability": 1.0,
186
  "model": "rule-based",
187
  "entity": "PreparationInstruction",
188
+ "preparationTypeCode": extract_attribute_value_from_df(group, '5206'),
189
+ "preparationInstructions": extract_attribute_value_from_df(group, '5207')
190
  }
191
  preparation_instructions_list.append(preparation_instructions_dict)
192
+ preparation_instructions_attribute = {'entity': 'preparationInstructions', 'values': preparation_instructions_list}
 
 
 
193
  return preparation_instructions_attribute
194
 
195
 
196
  def process_diet_information(hierarchy_attributes_df):
197
  # Diet Information
198
  diet_information_df = hierarchy_attributes_df[
199
+ (hierarchy_attributes_df['attribute_id'].isin(['5203', '5204']))]
 
200
  diet_information_df = diet_information_df[
201
+ diet_information_df['attribute_language'] == diet_information_df.iloc[0]['attribute_language']]
 
 
202
  diet_information_list = []
203
+ for path_1, group in diet_information_df.groupby('path'):
204
  diet_information_dict = {
205
  "coordinates": "None",
206
  "probability": 1.0,
207
  "model": "rule-based",
208
  "entity": "DietInformation",
209
+ "dietTypeCode": extract_attribute_value_from_df(group, '5203'),
210
+ "isDietTypeMarkedOnPackage": extract_attribute_value_from_df(group, '5204')
211
  }
212
  diet_information_list.append(diet_information_dict)
213
+ diet_information_attribute = {'entity': 'dietInformation', 'values': diet_information_list}
 
 
 
214
  return diet_information_attribute
215
 
216
 
217
  def process_claim_element_information(hierarchy_attributes_df):
218
  # Claim element information
219
  claim_element_information_df = hierarchy_attributes_df[
220
+ (hierarchy_attributes_df['attribute_id'].isin(['5199', '5200', '5201']))]
 
221
  claim_element_information_df = claim_element_information_df[
222
+ claim_element_information_df['attribute_language'] == claim_element_information_df.iloc[0]['attribute_language']]
 
 
223
  claim_element_information_list = []
224
+ for path_1, group in claim_element_information_df.groupby('path'):
225
  claim_element_information_dict = {
226
  "coordinates": "None",
227
  "probability": 1.0,
228
  "model": "rule-based",
229
  "entity": "ClaimElementInformation",
230
+ "claimElementCode": extract_attribute_value_from_df(group, '5199'),
231
+ "claimTypeCode": extract_attribute_value_from_df(group, '5200'),
232
+ "claimMarkedOnPackage": extract_attribute_value_from_df(group, '5201'),
233
  }
234
  claim_element_information_list.append(claim_element_information_dict)
235
+ claim_element_information_attribute = {'entity': 'claimElementInformation', 'values': claim_element_information_list}
 
 
 
236
  return claim_element_information_attribute
237
 
238
 
239
  def process_nutrient_table(hierarchy_attributes_df):
240
  # Nutrient table
241
  nutrient_table_df = hierarchy_attributes_df[
242
+ (hierarchy_attributes_df['attribute_id'].isin(['5211', '5212', '5219', '5215', '5216', '5217']))]
 
 
 
 
 
243
  nutrient_table_list = []
244
  if len(nutrient_table_df) > 0:
245
+ preferred_language = 'en-GB'
246
+ nutrient_table_df = nutrient_table_df[nutrient_table_df['attribute_language'] == nutrient_table_df.iloc[0]['attribute_language']]
247
+ for path_0, group in nutrient_table_df.groupby('path.0'):
248
+ content = extract_attribute_value_from_df(group, '5211')
 
 
 
249
  nutrient_value, nutrient_unit = extract_value_and_unit(content)
250
 
251
  nutrient_basis_quantity_dict = {
252
  "nutrientBasisQuantityValue": nutrient_value,
253
  "nutrientBasisQuantityMeasurementUnitCode": nutrient_unit,
254
+ "preparationStateCode": extract_attribute_value_from_df(group, '5212')}
 
255
  nutrient_values_list = []
256
+ for path_1, sub_group in group.sort_values(by='path.1').groupby('path.1'):
257
+ content = extract_attribute_value_from_df(sub_group, '5219')
258
  nutrient_value, nutrient_unit = extract_value_and_unit(content)
259
 
260
  nutrient_values_dict = {
261
  "coordinates": "",
262
  "probability": 1.0,
263
+ "nutrientTypeCode": extract_attribute_value_from_df(sub_group, '5215'),
 
 
264
  "quantityContained": {
265
  "measurementUnitCode": nutrient_unit,
266
  "value": nutrient_value,
267
+ "precisionCode": extract_attribute_value_from_df(sub_group, '5216')
 
 
268
  },
269
  "dailyValueIntakePercent": {
270
+ 'value': extract_attribute_value_from_df(sub_group, '5217'),
271
+ "precisionCode": "APPROXIMATELY"
272
+ }
273
  }
274
  nutrient_values_list.append(nutrient_values_dict)
275
+ nutrient_basis_quantity_dict['values'] = nutrient_values_list
276
  nutrient_table_list.append(nutrient_basis_quantity_dict)
277
 
278
+ nutrient_attribute = {"coordinates": "None",
279
+ "entity": "nutrients",
280
+ "probability": 1.0,
281
+ "value": nutrient_table_list,
282
+ "model": "table-rule-based"}
 
 
283
  return nutrient_attribute
284
 
285
 
286
  def pad_gtin(gtin, desired_length=14):
287
+ for i in range(desired_length - len(gtin)):
288
+ gtin = '0' + gtin
289
  return gtin
290
 
291
 
 
314
  # Process the allergens
315
  allergen_attribute = process_allergens(hierarchy_attributes_df)
316
  output_list.append(allergen_attribute)
317
+ # # Process the communication channels
318
+ # try:
319
+ # communication_channel_attribute = process_communication_channels(hierarchy_attributes_df)
320
+ # output_list.append(communication_channel_attribute)
321
+ # except:
322
+ # pass
323
+
324
  # Process the communication channels
325
  try:
326
+ communication_channels_list = process_communication_channels(hierarchy_attributes_df)
327
+ except Exception as e:
328
+ print(e)
329
+ communication_channels_list = []
330
+ # Process contact information
331
+ try:
332
+ contact_information_attribute = process_contact_information(hierarchy_attributes_df, communication_channels_list)
333
+ output_list.append(contact_information_attribute)
334
  except Exception as e:
335
  print(e)
336
+ pass
337
  # Process preparation instructions
338
  try:
339
+ preparation_instructions_attribute = process_preparation_instructions(hierarchy_attributes_df)
 
 
340
  output_list.append(preparation_instructions_attribute)
341
  except Exception as e:
342
  print(e)
343
+ pass
344
  # Process diet information
345
  try:
346
+ diet_information_attribute = process_diet_information(hierarchy_attributes_df)
 
 
347
  output_list.append(diet_information_attribute)
348
  except Exception as e:
349
  print(e)
350
+ pass
351
  # Process claim element information
352
  try:
353
+ claim_element_information_attribute = process_claim_element_information(hierarchy_attributes_df)
 
 
354
  output_list.append(claim_element_information_attribute)
355
  except Exception as e:
356
  print(e)
357
+ pass
358
  # Process the nutrient table
359
  nutrient_table_attribute = process_nutrient_table(hierarchy_attributes_df)
360
  output_list.append(nutrient_table_attribute)
field_mapping.json CHANGED
@@ -2,10 +2,12 @@
2
  "2002": "Title",
3
  "2003": "brand",
4
  "2086": "variantDescription",
 
 
 
 
 
5
  "2396": "regulatedProductName",
6
- "2397": "contactName",
7
- "2398": "contactAddress",
8
- "2399": "contactTypeCode",
9
  "2400": "communicationChannelCode",
10
  "2401": "communicationValue",
11
  "2403": "netContentStatement",
@@ -21,8 +23,10 @@
21
  "2434": "healthClaimDescription",
22
  "2437": "packagingMarkedLabelAccreditationCode",
23
  "2453": "functionalName",
 
24
  "2792": "tradeItemUnitDescriptorCode",
25
  "2807": "targetSector",
 
26
  "2828": "isPriceOnPack",
27
  "2858": "targetMarketCountryCode",
28
  "2860": "brandName",
@@ -34,6 +38,8 @@
34
  "3601": "additionalLegalProductInformation",
35
  "3603": "gtin",
36
  "3605": "gpcCategoryCode",
 
 
37
  "3631": "nutritionalProgramCode",
38
  "3646": "productMarketingMessage",
39
  "3886": "recommendedDosage",
@@ -41,6 +47,9 @@
41
  "4485": "allergenRelevantDataProvided",
42
  "4785": "allergens",
43
  "4856": "numberOfServingsPerPackageMeasurementPrecisionCode",
 
 
 
44
  "5199": "claimTypeCode",
45
  "5200": "claimElementCode",
46
  "5201": "claimMarkedOnPackage",
 
2
  "2002": "Title",
3
  "2003": "brand",
4
  "2086": "variantDescription",
5
+ "2088": "tradeItemMarketingMessage",
6
+ "2186": "tradeItemMarketingMessageBulletPoint4",
7
+ "2188": "tradeItemMarketingMessageBulletPoint3",
8
+ "2189": "tradeItemMarketingMessageBulletPoint2",
9
+ "2190": "tradeItemMarketingMessageBulletPoint1",
10
  "2396": "regulatedProductName",
 
 
 
11
  "2400": "communicationChannelCode",
12
  "2401": "communicationValue",
13
  "2403": "netContentStatement",
 
23
  "2434": "healthClaimDescription",
24
  "2437": "packagingMarkedLabelAccreditationCode",
25
  "2453": "functionalName",
26
+ "2490": "packagingTypeCode",
27
  "2792": "tradeItemUnitDescriptorCode",
28
  "2807": "targetSector",
29
+ "2825": "dutyFeeTaxCategoryCode",
30
  "2828": "isPriceOnPack",
31
  "2858": "targetMarketCountryCode",
32
  "2860": "brandName",
 
38
  "3601": "additionalLegalProductInformation",
39
  "3603": "gtin",
40
  "3605": "gpcCategoryCode",
41
+ "3608": "countryCode",
42
+ "3625": "packagingMarkedLabelAccreditationCode",
43
  "3631": "nutritionalProgramCode",
44
  "3646": "productMarketingMessage",
45
  "3886": "recommendedDosage",
 
47
  "4485": "allergenRelevantDataProvided",
48
  "4785": "allergens",
49
  "4856": "numberOfServingsPerPackageMeasurementPrecisionCode",
50
+ "4896": "contactName",
51
+ "4897": "contactAddress",
52
+ "4898": "contactTypeCode",
53
  "5199": "claimTypeCode",
54
  "5200": "claimElementCode",
55
  "5201": "claimMarkedOnPackage",