File size: 3,845 Bytes
4767334
6c07e47
eede2f2
4767334
 
 
 
 
 
6b28c2c
 
 
 
4767334
 
 
8b36b1f
b32f4b5
10ff21c
e7f2fe8
 
d2942e0
e7f2fe8
 
530b722
 
 
 
0897d96
530b722
 
eede2f2
530b722
 
 
 
 
 
 
 
 
6b28c2c
 
 
 
 
 
caa5338
10ff21c
 
 
caa5338
 
 
351697b
4767334
b32f4b5
4767334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10ff21c
4767334
 
 
10ff21c
4767334
 
 
 
 
b32f4b5
4767334
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import gradio
import re
from preprocess_function import lam_list
from kbert_topics import extract_topics
from mapping import get_mapping
from topics_extraction import classify
from PyDictionary import PyDictionary
dictionary=PyDictionary()

from nltk.stem import PorterStemmer
stemmer = PorterStemmer()



def get_output(event_info):
  if len(event_info.split(' '))>=20:
      # print(get_mapping(event_info))
      topic_name, distance_name,topic_name_dep, distance_name_dep,tags_list  = get_mapping(event_info)
      if distance_name!=None:
          sectors_main = topic_name + [topic_name_dep]
          sectors = [re.split(r'[ _]', x) for x in sectors_main]
          flattened_list = [item for sublist in sectors for item in (sublist if isinstance(sublist, list) else [sublist])]
          flattened_list.extend([x.strip().replace(' ','_') for x in sectors_main])
          flattened_list = [ x.strip() for x in flattened_list if x!='']
          
          flattened_list_v,flattened_list_n  = lam_list(flattened_list)
          flattened_list = flattened_list_v + flattened_list_n


          tags_list_check_v,tags_list_check_n = lam_list(tags_list)


          tag_dict_v = dict(zip(tags_list_check_v, tags_list))
          others_v = {check:tag for (check, tag) in tag_dict_v.items() if check not in flattened_list}

          tag_dict_n = dict(zip(tags_list_check_n, tags_list))
          others_n = {check:tag for (check, tag) in tag_dict_n.items() if check not in flattened_list}

          others = list(others_v.keys()) +  list(others_n.keys())
          
          others = list(set(others))
          
          list1_stemmed = {stemmer.stem(word) for word in list(set(flattened_list))}
          final_list_stemmed = [(word, stemmer.stem(word)) for word in others]
            
          others = [word for word, stemmed in final_list_stemmed if stemmed not in list1_stemmed]
          
          
      else:
          others = []
          
      work_list, keywords_2  = extract_topics(event_info)
      work_list = [i for i in work_list if bool(dictionary.meaning(i))]
          
      return str(topic_name) + ' '+str(distance_name), str(topic_name_dep) + ' '+str(distance_name_dep),str(tags_list), str(work_list), str(others)
  else:
      return 'Event discription should have >= 20 words', None, None, None,None

  
with gradio.Blocks(theme = 'gradio/monochrome', title = 'Keyword clustering Demo') as keyword_cluster_demo:
    
    gradio.Markdown(
        "<p align='center' style='font-size: 20px;'> Interface for checking results of keyword generation and sector/other industries mapping</p>"
    )
    gradio.HTML(
        """<center> Enter event discription <br>Note: Minimum 20 words</center>"""
    ) 
    gradio.HTML(
    """<center><div style="background-color: grey; padding: 5px; color: white; display: inline-block;">Time taken: 2 min </div></center>"""
    )
    with gradio.Row(scale = 2):
        with gradio.Column(scale=1):
          event_info = gradio.Textbox(label='enter event discription')
          button = gradio.Button("Submit")
            
        output_sec = gradio.outputs.Textbox(label="Mapped sector and score")
        output_dep = gradio.outputs.Textbox(label="Mapped industry and score")
        output_tags= gradio.outputs.Textbox(label="Identified tags (All)")
        
        with gradio.Row():
            output_keybert = gradio.outputs.Textbox(label="Identified keywords (1 word)")
            output_other_tags = gradio.outputs.Textbox(label="Identified other tags (not in sector /industry) ")



    button.click(get_output,
                 [event_info],
                 outputs=[output_sec, output_dep, output_tags, output_keybert, output_other_tags])

#keyword_cluster_demo.queue().launch(share=True,debug =True, show_error =True)
keyword_cluster_demo.queue().launch()