Spaces:
Sleeping
Sleeping
Update data_utils.py
Browse files- data_utils.py +3 -3
data_utils.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import re
|
| 2 |
import math
|
| 3 |
import pandas as pd
|
| 4 |
-
from typing import List
|
| 5 |
|
| 6 |
STOPWORDS = set("""
|
| 7 |
a an and the or for nor but so yet of to in on with at by from as is are was were be being been
|
|
@@ -50,7 +50,7 @@ def simple_rake(text, min_len=2, max_len=3, top_k=12):
|
|
| 50 |
for w in words:
|
| 51 |
if w in STOPWORDS:
|
| 52 |
if cur:
|
| 53 |
-
phrases.append(" ".join(cur)); cur
|
| 54 |
else:
|
| 55 |
cur.append(w)
|
| 56 |
if cur:
|
|
@@ -62,7 +62,7 @@ def simple_rake(text, min_len=2, max_len=3, top_k=12):
|
|
| 62 |
freq[t] = freq.get(t,0)+1
|
| 63 |
degree[t] = degree.get(t,0)+(len(toks)-1)
|
| 64 |
for ph in phrases:
|
| 65 |
-
scores[ph] = sum((degree.get(t,0)+1)/
|
| 66 |
ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
| 67 |
return [p for p,_ in ranked if min_len <= len(p.split()) <= max_len][:top_k]
|
| 68 |
|
|
|
|
| 1 |
import re
|
| 2 |
import math
|
| 3 |
import pandas as pd
|
| 4 |
+
from typing import List
|
| 5 |
|
| 6 |
STOPWORDS = set("""
|
| 7 |
a an and the or for nor but so yet of to in on with at by from as is are was were be being been
|
|
|
|
| 50 |
for w in words:
|
| 51 |
if w in STOPWORDS:
|
| 52 |
if cur:
|
| 53 |
+
phrases.append(" ".join(cur)); cur=[]
|
| 54 |
else:
|
| 55 |
cur.append(w)
|
| 56 |
if cur:
|
|
|
|
| 62 |
freq[t] = freq.get(t,0)+1
|
| 63 |
degree[t] = degree.get(t,0)+(len(toks)-1)
|
| 64 |
for ph in phrases:
|
| 65 |
+
scores[ph] = sum((degree.get(t,0)+1)/(freq.get(t,1)) for t in ph.split())
|
| 66 |
ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
|
| 67 |
return [p for p,_ in ranked if min_len <= len(p.split()) <= max_len][:top_k]
|
| 68 |
|