Spaces:
Sleeping
Sleeping
Commit ·
43e574d
1
Parent(s): 4813cc6
Update src/utils.py
Browse files- src/utils.py +25 -2
src/utils.py
CHANGED
|
@@ -25,6 +25,27 @@ stop_words.update(additional_stop_words)
|
|
| 25 |
# clear_output()
|
| 26 |
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
def single_text_cleaner(text: str, remove_stopwords: bool=False, upper_case: bool = False, remove_punctuation: bool=True) -> str:
|
| 29 |
"""Clean one single text input. By default it will convert text to lower case"""
|
| 30 |
if upper_case:
|
|
@@ -251,7 +272,9 @@ def offer_finder_by_entity(search_input: str, entities: Tuple, offers_data: pd.D
|
|
| 251 |
return None
|
| 252 |
|
| 253 |
|
| 254 |
-
def search_offers(search_input: str, offers: pd.DataFrame
|
|
|
|
|
|
|
| 255 |
"""Main function. Takes in a serach_input and decide whether it can find entities or not. Then excecute the appropriate functions
|
| 256 |
Inputs:
|
| 257 |
- search_input: a string that a user enters
|
|
@@ -282,4 +305,4 @@ def search_offers(search_input: str, offers: pd.DataFrame, category_dict: Dict,
|
|
| 282 |
return search_results
|
| 283 |
|
| 284 |
if __name__ == "__main__":
|
| 285 |
-
|
|
|
|
| 25 |
# clear_output()
|
| 26 |
|
| 27 |
|
| 28 |
+
# load operation data
|
| 29 |
+
path1 = "data/brand_belong_category_dict.json"
|
| 30 |
+
path2 = "data/product_upper_category_dict.json"
|
| 31 |
+
path3 = "data/offered_brands.pkl"
|
| 32 |
+
path4 = "data/offer_retailer.csv"
|
| 33 |
+
|
| 34 |
+
with open(path1, 'r') as f:
|
| 35 |
+
brand_belong_category_dict = json.load(f)
|
| 36 |
+
|
| 37 |
+
with open(path2, 'rb') as f:
|
| 38 |
+
category_dict = json.load(f)
|
| 39 |
+
|
| 40 |
+
with open(path3, 'rb') as f:
|
| 41 |
+
offered_brands = pickle.load(f)
|
| 42 |
+
|
| 43 |
+
df_offers_brand_retailer = pd.read_csv(path4)
|
| 44 |
+
|
| 45 |
+
example_search = "Simply Spiked Lemonade 12 pack at Walmart"
|
| 46 |
+
|
| 47 |
+
# helper functions
|
| 48 |
+
|
| 49 |
def single_text_cleaner(text: str, remove_stopwords: bool=False, upper_case: bool = False, remove_punctuation: bool=True) -> str:
|
| 50 |
"""Clean one single text input. By default it will convert text to lower case"""
|
| 51 |
if upper_case:
|
|
|
|
| 272 |
return None
|
| 273 |
|
| 274 |
|
| 275 |
+
def search_offers(search_input: str=example_search, offers: pd.DataFrame=df_offers_brand_retailer,
|
| 276 |
+
category_dict: Dict=category_dict, brand_belong_category_dict: Dict=brand_belong_category_dict,
|
| 277 |
+
score: str="jaccard", score_threshold: float = 0.0):
|
| 278 |
"""Main function. Takes in a serach_input and decide whether it can find entities or not. Then excecute the appropriate functions
|
| 279 |
Inputs:
|
| 280 |
- search_input: a string that a user enters
|
|
|
|
| 305 |
return search_results
|
| 306 |
|
| 307 |
if __name__ == "__main__":
|
| 308 |
+
search_offers()
|