Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| import re | |
| from rapidfuzz import fuzz | |
| class Predictor: | |
| def __init__(self): | |
| self.id_list = [] | |
| self.text_list = [] | |
| def _split_text(self, text: str) -> list[str]: | |
| words = re.split('[ \t ]', text) | |
| normalized_words = [word.strip().lower() for word in words if word.strip()] | |
| return list(set(normalized_words)) | |
| def _normalize_text(self, text: str) -> str: | |
| return ' '.join(self._split_text(text)) | |
| def update_text_list(self, text: str): | |
| for line in text.replace('\r', '').split('\n'): | |
| fields = line.split(',') | |
| if len(fields) < 2: | |
| return | |
| id = fields[0].strip() | |
| text = ' '.join(fields[1:]) | |
| try: | |
| index = self.id_list.index(id) | |
| self.text_list[index] = self._normalize_text(f'{self.text_list[index]} {text}') | |
| except ValueError: | |
| self.id_list.append(id) | |
| self.text_list.append(self._normalize_text(text)) | |
| def _calc_score(self, text: str, keyword: str) -> float: | |
| keywords = self._split_text(keyword) | |
| wordlist = self._split_text(text) | |
| return sum(map(lambda k: max(map(lambda w: fuzz.ratio(w, k), wordlist)), keywords)) | |
| def predict(self, keyword: str) -> str: | |
| if len(self.text_list) <= 0: | |
| print('no data') | |
| return '' | |
| s = np.empty(0) | |
| for text in self.text_list: | |
| s = np.append(s, self._calc_score(text, keyword)) | |
| index = np.argmax(s) | |
| result_id = self.id_list[index] | |
| result_desc = self.text_list[index] | |
| print(f'{result_id} {result_desc}') | |
| return result_id | |
| def process_text(input_text: str, input_keyword: str) -> str: | |
| if input_text is None or input_text.strip() == '': | |
| print('no input_text') | |
| return None | |
| if input_keyword is None or input_keyword.strip() == '': | |
| print('no input_keyword') | |
| return None | |
| p = Predictor() | |
| p.update_text_list(input_text) | |
| return p.predict(input_keyword) | |
| app = gr.Interface( | |
| title='Fuzzy Search', | |
| fn=process_text, | |
| inputs=[ | |
| gr.Textbox(label='text (comma separated text for id and description)', lines=10), | |
| gr.Textbox(label='search keywords') | |
| ], | |
| outputs=[ | |
| gr.Textbox(label='predicted id'), | |
| ], | |
| allow_flagging='never', | |
| concurrency_limit=20, | |
| ) | |
| app.launch() | |