File size: 3,253 Bytes
743fd42
 
 
2225e5e
 
743fd42
 
2d0d0c7
 
 
 
2225e5e
743fd42
 
2d0d0c7
 
 
 
 
743fd42
 
 
2225e5e
2d0d0c7
 
 
 
743fd42
 
2d0d0c7
 
 
 
743fd42
2d0d0c7
 
2225e5e
743fd42
 
2d0d0c7
 
 
 
 
743fd42
 
 
3f135be
743fd42
 
2d0d0c7
 
 
 
 
 
 
 
 
 
8341ea7
743fd42
2d0d0c7
743fd42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d0d0c7
7e1c3f7
 
 
2d0d0c7
 
 
743fd42
 
 
 
2225e5e
 
 
 
743fd42
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import pandas as pd
import gradio as gr
from abc import ABC
from modules.module_word2Context import Word2Context
from typing import List, Tuple

class Connector(ABC):
    def parse_word(
        self, 
        word: str
    ) -> str:
        
        return word.lower().strip()

    def parse_words(
        self, 
        array_in_string: str
    ) -> List[str]:

        words = array_in_string.strip()
        if not words:
            return []

        words = [
            self.parse_word(word) 
            for word in words.split(',') if word.strip() != ''
        ]
        return words

    def process_error(
        self, 
        err: str
    ) -> str:

        if err:
            err = "<center><h3>" + err + "</h3></center>"
        return err    

class Word2ContextExplorerConnector(Connector):
    def __init__(
        self, 
        **kwargs
    ) -> None:

        vocabulary = kwargs.get('vocabulary', None)
        context = kwargs.get('context', None)

        if vocabulary is None or context is None:
            raise KeyError

        self.word2context_explorer = Word2Context(
            context,    # Context dataset HF name | path
            vocabulary  # Vocabulary class instance
        )

    def get_word_info(
        self, 
        word: str
    ) -> Tuple:
    
        word = self.parse_word(word)
        err = ""
        contexts = pd.DataFrame([], columns=[''])
        subsets_info = ""
        distribution_plot = None
        word_cloud_plot = None
        subsets_choice = gr.CheckboxGroup.update(choices=[])

        err = self.word2context_explorer.errorChecking(word)
        if err:
            return self.process_error(err), contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice

        subsets_info, subsets_origin_info = self.word2context_explorer.getSubsetsInfo(word)

        clean_keys = [key.split(" ")[0].strip() for key in subsets_origin_info]
        subsets_choice = gr.CheckboxGroup.update(choices=clean_keys)

        distribution_plot = self.word2context_explorer.genDistributionPlot(word)
        word_cloud_plot = self.word2context_explorer.genWordCloudPlot(word)

        return self.process_error(err), contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice

    def get_word_context(
        self,
        word: str,
        n_context: int,
        subset_choice: List[str]
    ) -> Tuple:

        word = self.parse_word(word)
        err = ""
        contexts = pd.DataFrame([], columns=[''])

        err = self.word2context_explorer.errorChecking(word)
        if err:
            return self.process_error(err), contexts

        if len(subset_choice) > 0:
            ds = self.word2context_explorer.findSplits(word, subset_choice)
        else:
            err = self.process_error("Error: Palabra no ingresada y/o conjunto/s de interés no seleccionado/s!")
            return err, contexts

        list_of_contexts = self.word2context_explorer.getContexts(word, n_context, ds)

        contexts = pd.DataFrame(list_of_contexts, columns=['#','contexto','conjunto'])
        contexts["buscar"] = contexts.contexto.apply(lambda text: self.word2context_explorer.genWebLink(text))

        return self.process_error(err), contexts