Spaces:
Runtime error
Runtime error
Upload utils.py
Browse files
utils.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pymorphy2
|
| 2 |
+
morph = pymorphy2.MorphAnalyzer()
|
| 3 |
+
|
| 4 |
+
def counter(s: str) -> dict:
|
| 5 |
+
d = {}
|
| 6 |
+
for i in s:
|
| 7 |
+
if i not in d:
|
| 8 |
+
d[i] = 0
|
| 9 |
+
d[i] += 1
|
| 10 |
+
return d
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def sweet_check(s1: str, s2: str):
|
| 14 |
+
STOP_PUNCT = list(',./!@#$%^&*()_+=-<>?\|{}[]`~/')
|
| 15 |
+
STOP = set(
|
| 16 |
+
["скидка", "скидкой", "скидки", "скидке", "скидкой", "скидке", "недорого", "дешево", "в", "на", "для", "о", "у",
|
| 17 |
+
"и", "с", "из"] + STOP_PUNCT)
|
| 18 |
+
s1 = s1.lower()
|
| 19 |
+
s2 = s2.lower()
|
| 20 |
+
set_s1 = set(s1.split(' ')) - STOP
|
| 21 |
+
set_s2 = set(s2.split(' ')) - STOP
|
| 22 |
+
if set_s1 == set_s2:
|
| 23 |
+
return False
|
| 24 |
+
diff_s1 = ' '.join(list(set_s1 - set_s2))
|
| 25 |
+
diff_s2 = ' '.join(list(set_s2 - set_s1))
|
| 26 |
+
|
| 27 |
+
if len(diff_s1) == 0:
|
| 28 |
+
# return diff_s2
|
| 29 |
+
return True
|
| 30 |
+
if len(diff_s2) == 0:
|
| 31 |
+
return False
|
| 32 |
+
|
| 33 |
+
return True
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def check(s1: str, s2: str, debag=False, morph=morph) -> float:
|
| 37 |
+
STOP_PUNCT = list(',./!@#$%^&*()_+=-<>?\|{}[]`~/')
|
| 38 |
+
STOP = set(
|
| 39 |
+
["скидка", "скидкой", "скидки", "скидке", "скидкой", "скидке", "недорого", "дешево", "в", "на", "для", "о", "у",
|
| 40 |
+
"и", "с", "из"] + STOP_PUNCT)
|
| 41 |
+
s1 = s1.lower()
|
| 42 |
+
s2 = s2.lower()
|
| 43 |
+
s1 = [morph.parse(i)[0].normal_form for i in s1.split(' ')]
|
| 44 |
+
s2 = [morph.parse(i)[0].normal_form for i in s2.split(' ')]
|
| 45 |
+
set_s1 = set(s1) - STOP
|
| 46 |
+
set_s2 = set(s2) - STOP
|
| 47 |
+
if set_s1 == set_s2:
|
| 48 |
+
return False
|
| 49 |
+
|
| 50 |
+
diff_s1 = ' '.join(list(set_s1 - set_s2))
|
| 51 |
+
diff_s2 = ' '.join(list(set_s2 - set_s1))
|
| 52 |
+
if debag:
|
| 53 |
+
print(s1)
|
| 54 |
+
print(s2)
|
| 55 |
+
|
| 56 |
+
if len(diff_s1) == 0:
|
| 57 |
+
return True
|
| 58 |
+
if len(diff_s2) == 0:
|
| 59 |
+
return False
|
| 60 |
+
|
| 61 |
+
dt = {len(diff_s1): diff_s1, len(diff_s2): diff_s2}
|
| 62 |
+
|
| 63 |
+
c = 0
|
| 64 |
+
max_s, min_s = dt[max(len(diff_s1), len(diff_s2))], dt[min(len(diff_s1), len(diff_s2))]
|
| 65 |
+
c_s1 = counter(min_s)
|
| 66 |
+
c_s2 = counter(max_s)
|
| 67 |
+
for i in min_s:
|
| 68 |
+
if i in c_s2 and c_s2[i] > 0:
|
| 69 |
+
c += 1
|
| 70 |
+
c_s2[i] -= 1
|
| 71 |
+
else:
|
| 72 |
+
c -= 1
|
| 73 |
+
|
| 74 |
+
if len(diff_s2) == len(diff_s1):
|
| 75 |
+
c -= 1
|
| 76 |
+
if debag:
|
| 77 |
+
print(c / len(min_s))
|
| 78 |
+
if c / len(min_s) < 1.0:
|
| 79 |
+
return True
|
| 80 |
+
return False
|