import nltk nltk.download('stopwords') from nltk.corpus import stopwords import string import pke import traceback from flashtext import KeywordProcessor class KeyExtractor: def get_nouns_multipartite(self, content): out=[] try: extractor = pke.unsupervised.MultipartiteRank() extractor.load_document(input=content,language='en') # not contain punctuation marks or stopwords as candidates. pos = {'PROPN','NOUN'} #pos = {'PROPN','NOUN'} stoplist = list(string.punctuation) stoplist += ['-lrb-', '-rrb-', '-lcb-', '-rcb-', '-lsb-', '-rsb-'] stoplist += stopwords.words('english') # extractor.candidate_selection(pos=pos, stoplist=stoplist) extractor.candidate_selection(pos=pos) # 4. build the Multipartite graph and rank candidates using random walk, # alpha controls the weight adjustment mechanism, see TopicRank for # threshold/method parameters. extractor.candidate_weighting(alpha=1.1, threshold=0.75, method='average') keyphrases = extractor.get_n_best(n=15) for val in keyphrases: out.append(val[0]) except: out = [] traceback.print_exc() return out def get_keywords(self, originaltext, summarytext): keywords = self.get_nouns_multipartite(originaltext) print ("keywords unsummarized: ",keywords) keyword_processor = KeywordProcessor() for keyword in keywords: keyword_processor.add_keyword(keyword) keywords_found = keyword_processor.extract_keywords(summarytext) keywords_found = list(set(keywords_found)) print ("keywords_found in summarized: ",keywords_found) important_keywords =[] for keyword in keywords: if keyword in keywords_found and keyword not in important_keywords: important_keywords.append(keyword) return important_keywords[:4] Key = KeyExtractor() # def run(text, summarized_text): # result = [] # imp_keywords = Key.get_keywords(text,summarized_text) # for answer in imp_keywords: # result.append({ # "answer": answer.capitalize() # }) # return result # from pydantic import BaseModel # from fastapi import FastAPI # app = FastAPI() # class Data(BaseModel): # text: str # summarized_text: str # @app.post("/") # async def read_main(data: Data): # return run(data.text, data.summarized_text)