File size: 2,612 Bytes
cc47e7d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c77bb1f
 
 
 
 
 
 
 
cc47e7d
c77bb1f
 
cc47e7d
c77bb1f
 
 
 
cc47e7d
c77bb1f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
import string
import pke
import traceback
from flashtext import KeywordProcessor

class KeyExtractor:

    def get_nouns_multipartite(self, content):
        out=[]
        try:
            extractor = pke.unsupervised.MultipartiteRank()
            extractor.load_document(input=content,language='en')
            #    not contain punctuation marks or stopwords as candidates.
            pos = {'PROPN','NOUN'}
            #pos = {'PROPN','NOUN'}
            stoplist = list(string.punctuation)
            stoplist += ['-lrb-', '-rrb-', '-lcb-', '-rcb-', '-lsb-', '-rsb-']
            stoplist += stopwords.words('english')
            # extractor.candidate_selection(pos=pos, stoplist=stoplist)
            extractor.candidate_selection(pos=pos)
            # 4. build the Multipartite graph and rank candidates using random walk,
            #    alpha controls the weight adjustment mechanism, see TopicRank for
            #    threshold/method parameters.
            extractor.candidate_weighting(alpha=1.1,
                                          threshold=0.75,
                                          method='average')
            keyphrases = extractor.get_n_best(n=15)
            for val in keyphrases:
                out.append(val[0])
        except:
            out = []
            traceback.print_exc()
        return out

    def get_keywords(self, originaltext, summarytext):
      keywords = self.get_nouns_multipartite(originaltext)
      print ("keywords unsummarized: ",keywords)
      keyword_processor = KeywordProcessor()
      for keyword in keywords:
        keyword_processor.add_keyword(keyword)
      keywords_found = keyword_processor.extract_keywords(summarytext)
      keywords_found = list(set(keywords_found))
      print ("keywords_found in summarized: ",keywords_found)
      important_keywords =[]
      for keyword in keywords:
        if keyword in keywords_found and keyword not in important_keywords:
          important_keywords.append(keyword)
      return important_keywords[:4]

Key = KeyExtractor()

# def run(text, summarized_text):
#   result = []
#   imp_keywords = Key.get_keywords(text,summarized_text)
#   for answer in imp_keywords:
#     result.append({
#         "answer": answer.capitalize()
#     })
#   return result

# from pydantic import BaseModel
# from fastapi import FastAPI

# app = FastAPI()
# class Data(BaseModel):
#     text: str
#     summarized_text: str

# @app.post("/")
# async def read_main(data: Data):
#     return run(data.text, data.summarized_text)