Alpha108 commited on
Commit
7e08480
·
verified ·
1 Parent(s): d5d0f39

Update data_utils.py

Browse files
Files changed (1) hide show
  1. data_utils.py +3 -3
data_utils.py CHANGED
@@ -1,7 +1,7 @@
1
  import re
2
  import math
3
  import pandas as pd
4
- from typing import List, Tuple
5
 
6
  STOPWORDS = set("""
7
  a an and the or for nor but so yet of to in on with at by from as is are was were be being been
@@ -50,7 +50,7 @@ def simple_rake(text, min_len=2, max_len=3, top_k=12):
50
  for w in words:
51
  if w in STOPWORDS:
52
  if cur:
53
- phrases.append(" ".join(cur)); cur = []
54
  else:
55
  cur.append(w)
56
  if cur:
@@ -62,7 +62,7 @@ def simple_rake(text, min_len=2, max_len=3, top_k=12):
62
  freq[t] = freq.get(t,0)+1
63
  degree[t] = degree.get(t,0)+(len(toks)-1)
64
  for ph in phrases:
65
- scores[ph] = sum((degree.get(t,0)+1)/ (freq.get(t,1)) for t in ph.split())
66
  ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
67
  return [p for p,_ in ranked if min_len <= len(p.split()) <= max_len][:top_k]
68
 
 
1
  import re
2
  import math
3
  import pandas as pd
4
+ from typing import List
5
 
6
  STOPWORDS = set("""
7
  a an and the or for nor but so yet of to in on with at by from as is are was were be being been
 
50
  for w in words:
51
  if w in STOPWORDS:
52
  if cur:
53
+ phrases.append(" ".join(cur)); cur=[]
54
  else:
55
  cur.append(w)
56
  if cur:
 
62
  freq[t] = freq.get(t,0)+1
63
  degree[t] = degree.get(t,0)+(len(toks)-1)
64
  for ph in phrases:
65
+ scores[ph] = sum((degree.get(t,0)+1)/(freq.get(t,1)) for t in ph.split())
66
  ranked = sorted(scores.items(), key=lambda x: x[1], reverse=True)
67
  return [p for p,_ in ranked if min_len <= len(p.split()) <= max_len][:top_k]
68