hjianganthony commited on
Commit
329b6b4
·
1 Parent(s): 0db34df

Update src/utils.py

Browse files
Files changed (1) hide show
  1. src/utils.py +7 -6
src/utils.py CHANGED
@@ -11,15 +11,16 @@ import numpy as np
11
  import spacy
12
  from spacy.tokens import DocBin
13
  from spacy.training import offsets_to_biluo_tags
14
- import en_fetch_ner_spacy_tsf
15
- nlp = en_fetch_ner_spacy_tsf.load()
16
  # clear_output()
17
 
18
- import nltk
19
  # nltk.download('stopwords')
20
 
21
- from nltk.corpus import stopwords
22
  # stop_words = set(stopwords.words('english'))
 
23
  # additional_stop_words = {'pack'}
24
  # stop_words.update(additional_stop_words)
25
  # clear_output()
@@ -194,7 +195,7 @@ def offer_finder_by_category(search_input: str, search_category_tuple: Tuple, ca
194
  jaccard_sim_score = get_jaccard_sim(search_input, potential_offers)
195
  output = extract_similar_offers(jaccard_sim_score, threshold)
196
  elif score not in ['cosine', 'jaccard']:
197
- raise ValueError(f'Please enter a valid score: cosine or jaccard; Not {score}')
198
  else: # this means something else is worng
199
  raise UnknownError(f'Something must be broken. Please try again.')
200
  return output
@@ -251,7 +252,7 @@ def offer_finder_by_entity(search_input: str, entities: Tuple, offers_data: pd.D
251
  return None
252
 
253
 
254
- def search_offers(search_input: str, offers: pd.DataFrame, category_dict: Dict, brand_belong_category_dict: Dict, score: str, score_threshold: float = 0.0):
255
  """Main function. Takes in a serach_input and decide whether it can find entities or not. Then excecute the appropriate functions
256
  Inputs:
257
  - search_input: a string that a user enters
 
11
  import spacy
12
  from spacy.tokens import DocBin
13
  from spacy.training import offsets_to_biluo_tags
14
+ # import en_fetch_ner_spacy_tsf
15
+ # nlp = en_fetch_ner_spacy_tsf.load()
16
  # clear_output()
17
 
18
+ # import nltk
19
  # nltk.download('stopwords')
20
 
21
+ # from nltk.corpus import stopwords
22
  # stop_words = set(stopwords.words('english'))
23
+ stop_words = ["at", "in", "get"]
24
  # additional_stop_words = {'pack'}
25
  # stop_words.update(additional_stop_words)
26
  # clear_output()
 
195
  jaccard_sim_score = get_jaccard_sim(search_input, potential_offers)
196
  output = extract_similar_offers(jaccard_sim_score, threshold)
197
  elif score not in ['cosine', 'jaccard']:
198
+ raise ParamsInputError(f'Please enter a valid score: cosine or jaccard; Not {score}')
199
  else: # this means something else is worng
200
  raise UnknownError(f'Something must be broken. Please try again.')
201
  return output
 
252
  return None
253
 
254
 
255
+ def search_offers(search_input: str, offers: pd.DataFrame, category_dict: Dict, brand_belong_category_dict: Dict, score: str = "jaccard", score_threshold: float = 0.0):
256
  """Main function. Takes in a serach_input and decide whether it can find entities or not. Then excecute the appropriate functions
257
  Inputs:
258
  - search_input: a string that a user enters