Spaces:
Sleeping
Sleeping
Commit ·
329b6b4
1
Parent(s): 0db34df
Update src/utils.py
Browse files- src/utils.py +7 -6
src/utils.py
CHANGED
|
@@ -11,15 +11,16 @@ import numpy as np
|
|
| 11 |
import spacy
|
| 12 |
from spacy.tokens import DocBin
|
| 13 |
from spacy.training import offsets_to_biluo_tags
|
| 14 |
-
import en_fetch_ner_spacy_tsf
|
| 15 |
-
nlp = en_fetch_ner_spacy_tsf.load()
|
| 16 |
# clear_output()
|
| 17 |
|
| 18 |
-
import nltk
|
| 19 |
# nltk.download('stopwords')
|
| 20 |
|
| 21 |
-
from nltk.corpus import stopwords
|
| 22 |
# stop_words = set(stopwords.words('english'))
|
|
|
|
| 23 |
# additional_stop_words = {'pack'}
|
| 24 |
# stop_words.update(additional_stop_words)
|
| 25 |
# clear_output()
|
|
@@ -194,7 +195,7 @@ def offer_finder_by_category(search_input: str, search_category_tuple: Tuple, ca
|
|
| 194 |
jaccard_sim_score = get_jaccard_sim(search_input, potential_offers)
|
| 195 |
output = extract_similar_offers(jaccard_sim_score, threshold)
|
| 196 |
elif score not in ['cosine', 'jaccard']:
|
| 197 |
-
raise
|
| 198 |
else: # this means something else is worng
|
| 199 |
raise UnknownError(f'Something must be broken. Please try again.')
|
| 200 |
return output
|
|
@@ -251,7 +252,7 @@ def offer_finder_by_entity(search_input: str, entities: Tuple, offers_data: pd.D
|
|
| 251 |
return None
|
| 252 |
|
| 253 |
|
| 254 |
-
def search_offers(search_input: str, offers: pd.DataFrame, category_dict: Dict, brand_belong_category_dict: Dict, score: str, score_threshold: float = 0.0):
|
| 255 |
"""Main function. Takes in a serach_input and decide whether it can find entities or not. Then excecute the appropriate functions
|
| 256 |
Inputs:
|
| 257 |
- search_input: a string that a user enters
|
|
|
|
| 11 |
import spacy
|
| 12 |
from spacy.tokens import DocBin
|
| 13 |
from spacy.training import offsets_to_biluo_tags
|
| 14 |
+
# import en_fetch_ner_spacy_tsf
|
| 15 |
+
# nlp = en_fetch_ner_spacy_tsf.load()
|
| 16 |
# clear_output()
|
| 17 |
|
| 18 |
+
# import nltk
|
| 19 |
# nltk.download('stopwords')
|
| 20 |
|
| 21 |
+
# from nltk.corpus import stopwords
|
| 22 |
# stop_words = set(stopwords.words('english'))
|
| 23 |
+
stop_words = ["at", "in", "get"]
|
| 24 |
# additional_stop_words = {'pack'}
|
| 25 |
# stop_words.update(additional_stop_words)
|
| 26 |
# clear_output()
|
|
|
|
| 195 |
jaccard_sim_score = get_jaccard_sim(search_input, potential_offers)
|
| 196 |
output = extract_similar_offers(jaccard_sim_score, threshold)
|
| 197 |
elif score not in ['cosine', 'jaccard']:
|
| 198 |
+
raise ParamsInputError(f'Please enter a valid score: cosine or jaccard; Not {score}')
|
| 199 |
else: # this means something else is worng
|
| 200 |
raise UnknownError(f'Something must be broken. Please try again.')
|
| 201 |
return output
|
|
|
|
| 252 |
return None
|
| 253 |
|
| 254 |
|
| 255 |
+
def search_offers(search_input: str, offers: pd.DataFrame, category_dict: Dict, brand_belong_category_dict: Dict, score: str = "jaccard", score_threshold: float = 0.0):
|
| 256 |
"""Main function. Takes in a serach_input and decide whether it can find entities or not. Then excecute the appropriate functions
|
| 257 |
Inputs:
|
| 258 |
- search_input: a string that a user enters
|