Spaces:
Runtime error
Runtime error
| from pymilvus import connections, utility, DataType, FieldSchema, CollectionSchema, Collection | |
| from sentence_transformers import SentenceTransformer | |
| from pyvi import ViTokenizer | |
| import string | |
| import json | |
| def load_json(path): | |
| with open(path, 'r', encoding='utf-8') as file: | |
| data = json.load(file) | |
| return data | |
| def convert_query(query): | |
| tokenized_query = ViTokenizer.tokenize(query.lower()) | |
| return tokenized_query | |
| def load_stopword(path): | |
| stop_words = [] | |
| with open(path, 'r', encoding='utf-8') as file: | |
| for line in file: | |
| stop_words.append(line.strip()) | |
| return stop_words | |
| def remove_stop_words(path, split_prompts): | |
| stop_words = load_stopword(path) | |
| clean_words = [] | |
| for ele in split_prompts: | |
| if ele not in stop_words: | |
| clean_words.append(ele.strip()) | |
| return clean_words | |
| def clean_query(path, query): | |
| vi_query = ViTokenizer.tokenize(query.lower()) | |
| word_query = vi_query.split(' ') | |
| #print("word query: ", word_query) | |
| query_remove_punc = [word.replace('_', ' ') for word in word_query if word not in string.punctuation] | |
| removed_stop_words = remove_stop_words(path, query_remove_punc) | |
| removed_stop_words = list(dict.fromkeys(removed_stop_words)) | |
| return removed_stop_words | |
| def load_model(model_name): | |
| model = SentenceTransformer(model_name) | |
| return model | |
| def connect_vector_db(): | |
| connections.connect('default', host='localhost', port='19530') | |
| print("Connect finished!") | |
| def load_collection(collection_name): | |
| collection = Collection(collection_name) | |
| collection.load() | |
| print(f"{collection_name} load complete!") | |
| return collection |