import gradio as gr import numpy as np import re from rapidfuzz import fuzz class Predictor: def __init__(self): self.id_list = [] self.text_list = [] def _split_text(self, text: str) -> list[str]: words = re.split('[ \t ]', text) normalized_words = [word.strip().lower() for word in words if word.strip()] return list(set(normalized_words)) def _normalize_text(self, text: str) -> str: return ' '.join(self._split_text(text)) def update_text_list(self, text: str): for line in text.replace('\r', '').split('\n'): fields = line.split(',') if len(fields) < 2: return id = fields[0].strip() text = ' '.join(fields[1:]) try: index = self.id_list.index(id) self.text_list[index] = self._normalize_text(f'{self.text_list[index]} {text}') except ValueError: self.id_list.append(id) self.text_list.append(self._normalize_text(text)) def _calc_score(self, text: str, keyword: str) -> float: keywords = self._split_text(keyword) wordlist = self._split_text(text) return sum(map(lambda k: max(map(lambda w: fuzz.ratio(w, k), wordlist)), keywords)) def predict(self, keyword: str) -> str: if len(self.text_list) <= 0: print('no data') return '' s = np.empty(0) for text in self.text_list: s = np.append(s, self._calc_score(text, keyword)) index = np.argmax(s) result_id = self.id_list[index] result_desc = self.text_list[index] print(f'{result_id} {result_desc}') return result_id def process_text(input_text: str, input_keyword: str) -> str: if input_text is None or input_text.strip() == '': print('no input_text') return None if input_keyword is None or input_keyword.strip() == '': print('no input_keyword') return None p = Predictor() p.update_text_list(input_text) return p.predict(input_keyword) app = gr.Interface( title='Fuzzy Search', fn=process_text, inputs=[ gr.Textbox(label='text (comma separated text for id and description)', lines=10), gr.Textbox(label='search keywords') ], outputs=[ gr.Textbox(label='predicted id'), ], allow_flagging='never', concurrency_limit=20, ) app.launch()