TymaaHammouda commited on
Commit
a19fbdf
·
1 Parent(s): 340b3eb

Remove print statments

Browse files
Files changed (1) hide show
  1. app.py +0 -76
app.py CHANGED
@@ -16,28 +16,13 @@ from XML_to_HTML import NER_XML_to_HTML
16
  from NER_Distiller import distill_entities
17
 
18
  app = FastAPI()
19
- print("Version 2...")
20
-
21
- # Download files from model repo
22
- # tag_vocab_path = hf_hub_download(
23
- # repo_id="SinaLab/Nested",
24
- # filename="tag_vocab.pkl"
25
- # )
26
-
27
 
28
  pretrained_path = "aubmindlab/bert-base-arabertv2" # must match training
29
  tokenizer = AutoTokenizer.from_pretrained(pretrained_path)
30
  encoder = AutoModel.from_pretrained(pretrained_path).eval()
31
 
32
- # checkpoint_path = hf_hub_download(
33
- # repo_id="SinaLab/Nested",
34
- # filename="checkpoints/checkpoint_2.pt"
35
- # )
36
-
37
-
38
 
39
  checkpoint_path = snapshot_download(repo_id="SinaLab/Nested", allow_patterns="checkpoints/")
40
- print("checkpoint_path : ", checkpoint_path)
41
 
42
  args_path = hf_hub_download(
43
  repo_id="SinaLab/Nested",
@@ -47,8 +32,6 @@ args_path = hf_hub_download(
47
  with open(args_path, 'r') as f:
48
  args_data = json.load(f)
49
 
50
- print("data : ", args_data)
51
-
52
  # Load model
53
  with open("Nested/utils/tag_vocab.pkl", "rb") as f:
54
  label_vocab = pickle.load(f)
@@ -156,36 +139,26 @@ def extract(sentence):
156
 
157
 
158
  def NER(sentence, mode):
159
- print("within NER, and mode is: ", mode)
160
  output_list = []
161
  xml = ""
162
  if mode.strip() == "1":
163
  output_list = jsons_to_list_of_lists(extract(sentence))
164
  return output_list
165
  elif mode.strip() == "2":
166
- print("outputlist : ", output_list)
167
  if output_list != []:
168
  xml = IBO_to_XML(output_list)
169
- print("xml is: ", xml)
170
  return xml
171
  else:
172
  output_list = jsons_to_list_of_lists(extract(sentence))
173
- print("output_list .....: ", output_list)
174
  xml = IBO_to_XML(output_list)
175
- print("else xml is: ", xml)
176
  return xml
177
 
178
  elif mode.strip() == "3":
179
- print("mode is 3")
180
  if xml != "":
181
- #print("in if")
182
  html = NER_XML_to_HTML(xml)
183
  return html
184
  else:
185
- print("in else : ")
186
- print("extract : ", extract(sentence))
187
  output_list = jsons_to_list_of_lists(extract(sentence))
188
- #print("output list : ", output_list)
189
  xml = IBO_to_XML(output_list)
190
  html = NER_XML_to_HTML(xml)
191
  return html
@@ -228,57 +201,8 @@ def predict(request: NERRequest):
228
  "statusCode": 0,
229
  }
230
 
231
- print("content: ", content)
232
  return JSONResponse(
233
  content=content,
234
  media_type="application/json",
235
  status_code=200,
236
  )
237
-
238
-
239
- # sentence = "ذهب احمد إلى جامعة"
240
- # Load tagger
241
- # tagger, tag_vocab, train_config = load_checkpoint(checkpoint_path)
242
-
243
- # Convert text to a tagger dataset and index the tokens in args.text
244
- # dataset, token_vocab = text2segments(sentence)
245
-
246
- # vocabs = namedtuple("Vocab", ["tags", "tokens"])
247
- # vocab = vocabs(tokens=token_vocab, tags=tag_vocab)
248
-
249
- # From the datasets generate the dataloaders
250
- # dataloader = get_dataloaders(
251
- # (dataset,),
252
- # vocab,
253
- # args_data,
254
- # batch_size=32,
255
- # shuffle=(False,),
256
- # )[0]
257
-
258
- # Perform inference on the text and get back the tagged segments
259
- # segments = tagger.infer(dataloader)
260
- # segments_lists = []
261
-
262
-
263
- ## Print results
264
- ## for segment in segments:
265
- ## s = [
266
- ## f"{token.text} ({'|'.join([t['tag'] for t in token.pred_tag])})"
267
- ## for token in segment
268
- ## ]
269
- ## print(" ".join(s))
270
-
271
-
272
- # for segment in segments:
273
- # for token in segment:
274
- # segments_list = {}
275
- # segments_list["token"] = token.text
276
- # list_of_tags = [t['tag'] for t in token.pred_tag]
277
- # list_of_tags = [i for i in list_of_tags if i not in('O',' ','')]
278
- # if list_of_tags == []:
279
- # segments_list["tags"] = ' '.join(['O'])
280
- # else:
281
- # segments_list["tags"] = ' '.join(list_of_tags)
282
- # segments_lists.append(segments_list)
283
-
284
- # print(segments_lists)
 
16
  from NER_Distiller import distill_entities
17
 
18
  app = FastAPI()
 
 
 
 
 
 
 
 
19
 
20
  pretrained_path = "aubmindlab/bert-base-arabertv2" # must match training
21
  tokenizer = AutoTokenizer.from_pretrained(pretrained_path)
22
  encoder = AutoModel.from_pretrained(pretrained_path).eval()
23
 
 
 
 
 
 
 
24
 
25
  checkpoint_path = snapshot_download(repo_id="SinaLab/Nested", allow_patterns="checkpoints/")
 
26
 
27
  args_path = hf_hub_download(
28
  repo_id="SinaLab/Nested",
 
32
  with open(args_path, 'r') as f:
33
  args_data = json.load(f)
34
 
 
 
35
  # Load model
36
  with open("Nested/utils/tag_vocab.pkl", "rb") as f:
37
  label_vocab = pickle.load(f)
 
139
 
140
 
141
  def NER(sentence, mode):
 
142
  output_list = []
143
  xml = ""
144
  if mode.strip() == "1":
145
  output_list = jsons_to_list_of_lists(extract(sentence))
146
  return output_list
147
  elif mode.strip() == "2":
 
148
  if output_list != []:
149
  xml = IBO_to_XML(output_list)
 
150
  return xml
151
  else:
152
  output_list = jsons_to_list_of_lists(extract(sentence))
 
153
  xml = IBO_to_XML(output_list)
 
154
  return xml
155
 
156
  elif mode.strip() == "3":
 
157
  if xml != "":
 
158
  html = NER_XML_to_HTML(xml)
159
  return html
160
  else:
 
 
161
  output_list = jsons_to_list_of_lists(extract(sentence))
 
162
  xml = IBO_to_XML(output_list)
163
  html = NER_XML_to_HTML(xml)
164
  return html
 
201
  "statusCode": 0,
202
  }
203
 
 
204
  return JSONResponse(
205
  content=content,
206
  media_type="application/json",
207
  status_code=200,
208
  )