Spaces:
Runtime error
Runtime error
File size: 3,253 Bytes
743fd42 2225e5e 743fd42 2d0d0c7 2225e5e 743fd42 2d0d0c7 743fd42 2225e5e 2d0d0c7 743fd42 2d0d0c7 743fd42 2d0d0c7 2225e5e 743fd42 2d0d0c7 743fd42 3f135be 743fd42 2d0d0c7 8341ea7 743fd42 2d0d0c7 743fd42 2d0d0c7 7e1c3f7 2d0d0c7 743fd42 2225e5e 743fd42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
import pandas as pd
import gradio as gr
from abc import ABC
from modules.module_word2Context import Word2Context
from typing import List, Tuple
class Connector(ABC):
def parse_word(
self,
word: str
) -> str:
return word.lower().strip()
def parse_words(
self,
array_in_string: str
) -> List[str]:
words = array_in_string.strip()
if not words:
return []
words = [
self.parse_word(word)
for word in words.split(',') if word.strip() != ''
]
return words
def process_error(
self,
err: str
) -> str:
if err:
err = "<center><h3>" + err + "</h3></center>"
return err
class Word2ContextExplorerConnector(Connector):
def __init__(
self,
**kwargs
) -> None:
vocabulary = kwargs.get('vocabulary', None)
context = kwargs.get('context', None)
if vocabulary is None or context is None:
raise KeyError
self.word2context_explorer = Word2Context(
context, # Context dataset HF name | path
vocabulary # Vocabulary class instance
)
def get_word_info(
self,
word: str
) -> Tuple:
word = self.parse_word(word)
err = ""
contexts = pd.DataFrame([], columns=[''])
subsets_info = ""
distribution_plot = None
word_cloud_plot = None
subsets_choice = gr.CheckboxGroup.update(choices=[])
err = self.word2context_explorer.errorChecking(word)
if err:
return self.process_error(err), contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice
subsets_info, subsets_origin_info = self.word2context_explorer.getSubsetsInfo(word)
clean_keys = [key.split(" ")[0].strip() for key in subsets_origin_info]
subsets_choice = gr.CheckboxGroup.update(choices=clean_keys)
distribution_plot = self.word2context_explorer.genDistributionPlot(word)
word_cloud_plot = self.word2context_explorer.genWordCloudPlot(word)
return self.process_error(err), contexts, subsets_info, distribution_plot, word_cloud_plot, subsets_choice
def get_word_context(
self,
word: str,
n_context: int,
subset_choice: List[str]
) -> Tuple:
word = self.parse_word(word)
err = ""
contexts = pd.DataFrame([], columns=[''])
err = self.word2context_explorer.errorChecking(word)
if err:
return self.process_error(err), contexts
if len(subset_choice) > 0:
ds = self.word2context_explorer.findSplits(word, subset_choice)
else:
err = self.process_error("Error: Palabra no ingresada y/o conjunto/s de interés no seleccionado/s!")
return err, contexts
list_of_contexts = self.word2context_explorer.getContexts(word, n_context, ds)
contexts = pd.DataFrame(list_of_contexts, columns=['#','contexto','conjunto'])
contexts["buscar"] = contexts.contexto.apply(lambda text: self.word2context_explorer.genWebLink(text))
return self.process_error(err), contexts |