MCQ-multipartite / keyExtractor /multiPartiteGraph.py
mikymatt's picture
fix: log
c77bb1f
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
import string
import pke
import traceback
from flashtext import KeywordProcessor
class KeyExtractor:
def get_nouns_multipartite(self, content):
out=[]
try:
extractor = pke.unsupervised.MultipartiteRank()
extractor.load_document(input=content,language='en')
# not contain punctuation marks or stopwords as candidates.
pos = {'PROPN','NOUN'}
#pos = {'PROPN','NOUN'}
stoplist = list(string.punctuation)
stoplist += ['-lrb-', '-rrb-', '-lcb-', '-rcb-', '-lsb-', '-rsb-']
stoplist += stopwords.words('english')
# extractor.candidate_selection(pos=pos, stoplist=stoplist)
extractor.candidate_selection(pos=pos)
# 4. build the Multipartite graph and rank candidates using random walk,
# alpha controls the weight adjustment mechanism, see TopicRank for
# threshold/method parameters.
extractor.candidate_weighting(alpha=1.1,
threshold=0.75,
method='average')
keyphrases = extractor.get_n_best(n=15)
for val in keyphrases:
out.append(val[0])
except:
out = []
traceback.print_exc()
return out
def get_keywords(self, originaltext, summarytext):
keywords = self.get_nouns_multipartite(originaltext)
print ("keywords unsummarized: ",keywords)
keyword_processor = KeywordProcessor()
for keyword in keywords:
keyword_processor.add_keyword(keyword)
keywords_found = keyword_processor.extract_keywords(summarytext)
keywords_found = list(set(keywords_found))
print ("keywords_found in summarized: ",keywords_found)
important_keywords =[]
for keyword in keywords:
if keyword in keywords_found and keyword not in important_keywords:
important_keywords.append(keyword)
return important_keywords[:4]
Key = KeyExtractor()
# def run(text, summarized_text):
# result = []
# imp_keywords = Key.get_keywords(text,summarized_text)
# for answer in imp_keywords:
# result.append({
# "answer": answer.capitalize()
# })
# return result
# from pydantic import BaseModel
# from fastapi import FastAPI
# app = FastAPI()
# class Data(BaseModel):
# text: str
# summarized_text: str
# @app.post("/")
# async def read_main(data: Data):
# return run(data.text, data.summarized_text)