lytang
/

MiniCheck-Flan-T5-Large

Text Classification

text2text-generation

Model card Files Files and versions

Liyan06 commited on May 22, 2024

Commit

2d158d3

·

1 Parent(s): 93e9112

add entity highlight

Files changed (1) hide show

handler.py +22 -3

handler.py CHANGED Viewed

@@ -3,6 +3,16 @@ from web_retrieval import *
 from nltk.tokenize import sent_tokenize
 import evaluate
 def sort_chunks_single_doc_claim(used_chunk, support_prob_per_chunk):
     '''
@@ -19,7 +29,13 @@ def sort_chunks_single_doc_claim(used_chunk, support_prob_per_chunk):
     ranked_docs, scores = zip(*ranked_doc_score)
     return ranked_docs, scores
 class EndpointHandler():
     def __init__(self, path="./"):
@@ -30,6 +46,7 @@ class EndpointHandler():
     def __call__(self, data):
         claim = data['inputs']['claims'][0]
         # Using user-provided document to do fact-checking
         if len(data['inputs']['docs']) == 1 and data['inputs']['docs'][0] != '':
@@ -48,7 +65,8 @@ class EndpointHandler():
             outputs = {
                 'ranked_docs': ranked_docs,
                 'scores': scores,
-                'span_to_highlight': span_to_highlight
             }
         else:
@@ -69,7 +87,8 @@ class EndpointHandler():
                 'ranked_docs': ranked_docs,
                 'scores': scores,
                 'ranked_urls': ranked_urls,
-                'span_to_highlight': span_to_highlight
             }
         return outputs

 from nltk.tokenize import sent_tokenize
 import evaluate
+import spacy
+from spacy.cli import download
+try:
+    nlp = spacy.load("en_core_web_lg")
+except:
+    # If loading fails, download the model
+    download("en_core_web_lg")
+    nlp = spacy.load("en_core_web_lg")
 def sort_chunks_single_doc_claim(used_chunk, support_prob_per_chunk):
     '''
     ranked_docs, scores = zip(*ranked_doc_score)
     return ranked_docs, scores
+def extract_entities(text):
+    text = nlp(text)
+    ents = list({ent.text for ent in text.ents})
+    return ents
 class EndpointHandler():
     def __init__(self, path="./"):
     def __call__(self, data):
         claim = data['inputs']['claims'][0]
+        ents = extract_entities(claim)
         # Using user-provided document to do fact-checking
         if len(data['inputs']['docs']) == 1 and data['inputs']['docs'][0] != '':
             outputs = {
                 'ranked_docs': ranked_docs,
                 'scores': scores,
+                'span_to_highlight': span_to_highlight,
+                'entities': ents
             }
         else:
                 'ranked_docs': ranked_docs,
                 'scores': scores,
                 'ranked_urls': ranked_urls,
+                'span_to_highlight': span_to_highlight,
+                'entities': ents
             }
         return outputs