ldhldh commited on
Commit
edec0a4
·
verified ·
1 Parent(s): b5b4a53

Update util/preprocessing.py

Browse files
Files changed (1) hide show
  1. util/preprocessing.py +27 -27
util/preprocessing.py CHANGED
@@ -1,28 +1,28 @@
1
- import difflib
2
- import pandas as pd
3
-
4
- def word_to_market_name(word):
5
- markets_df = pd.read_csv('data\market_name_utf8.csv')
6
- markets_names = markets_df['시장명']
7
-
8
- scores = dict()
9
-
10
- for m in markets_names:
11
- sm = difflib.SequenceMatcher(None, word, m)
12
- scores[m] = sm.ratio()
13
-
14
- sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True)
15
- top_3_markets = [market[0] for market in sorted_scores[:3]]
16
-
17
- return top_3_markets
18
-
19
-
20
-
21
- def check_word(word):
22
- markets_df = pd.read_csv('data\market_name_utf8.csv')
23
- markets_names = markets_df['시장명']
24
-
25
- for word in markets_names:
26
- return True
27
-
28
  return False
 
1
+ import difflib
2
+ import pandas as pd
3
+
4
+ def word_to_market_name(word):
5
+ markets_df = pd.read_csv('data/market_name_utf8.csv')
6
+ markets_names = markets_df['시장명']
7
+
8
+ scores = dict()
9
+
10
+ for m in markets_names:
11
+ sm = difflib.SequenceMatcher(None, word, m)
12
+ scores[m] = sm.ratio()
13
+
14
+ sorted_scores = sorted(scores.items(), key=lambda item: item[1], reverse=True)
15
+ top_3_markets = [market[0] for market in sorted_scores[:3]]
16
+
17
+ return top_3_markets
18
+
19
+
20
+
21
+ def check_word(word):
22
+ markets_df = pd.read_csv('data/market_name_utf8.csv')
23
+ markets_names = markets_df['시장명']
24
+
25
+ for word in markets_names:
26
+ return True
27
+
28
  return False