Upload score_fincat.py
Browse files- score_fincat.py +33 -0
score_fincat.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import nltk
|
| 3 |
+
from fincat_utils import extract_context_words
|
| 4 |
+
from fincat_utils import bert_embedding_extract
|
| 5 |
+
import pickle
|
| 6 |
+
lr_clf = pickle.load(open("lr_clf_FiNCAT.pickle",'rb'))
|
| 7 |
+
nltk.download('punkt')
|
| 8 |
+
|
| 9 |
+
def score_fincat(txt):
|
| 10 |
+
li = []
|
| 11 |
+
highlight = []
|
| 12 |
+
txt = " " + txt + " "
|
| 13 |
+
k = ''
|
| 14 |
+
for word in txt.split():
|
| 15 |
+
if any(char.isdigit() for char in word):
|
| 16 |
+
if word[-1] in ['.', ',', ';', ":", "-", "!", "?", ")", '"', "'"]:
|
| 17 |
+
k = word[-1]
|
| 18 |
+
word = word[:-1]
|
| 19 |
+
st = txt.find(" " + word + k + " ")+1
|
| 20 |
+
k = ''
|
| 21 |
+
ed = st + len(word)
|
| 22 |
+
x = {'paragraph' : txt, 'offset_start':st, 'offset_end':ed}
|
| 23 |
+
context_text = extract_context_words(x)
|
| 24 |
+
features = bert_embedding_extract(context_text, word)
|
| 25 |
+
if(features[0]=='None'):
|
| 26 |
+
highlight.append((txt, ' '))
|
| 27 |
+
return highlight
|
| 28 |
+
prediction = lr_clf.predict(features.reshape(1, 768))
|
| 29 |
+
prediction_probability = '{:.4f}'.format(round(lr_clf.predict_proba(features.reshape(1, 768))[:,1][0], 4))
|
| 30 |
+
highlight.append((word, ' In-claim' if prediction==1 else 'Out-of-Claim'))
|
| 31 |
+
else:
|
| 32 |
+
highlight.append((word, ' '))
|
| 33 |
+
return highlight
|