| |
|
| | import os |
| | import numpy as np |
| | import faiss |
| | import torch |
| | from sentence_transformers import SentenceTransformer |
| | from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
| |
|
| | def load_sentence_transformer(path_or_name): |
| | if os.path.exists(path_or_name): |
| | model = SentenceTransformer(path_or_name) |
| | else: |
| | model = SentenceTransformer(path_or_name) |
| | return model |
| |
|
| | def load_summarizer_model(path_or_name): |
| | |
| | tokenizer = AutoTokenizer.from_pretrained(path_or_name) |
| | model = AutoModelForSeq2SeqLM.from_pretrained(path_or_name) |
| | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| | model.to(device) |
| | return model, tokenizer |
| |
|
| | def load_tokenizer(path_or_name): |
| | return AutoTokenizer.from_pretrained(path_or_name) |
| |
|
| | def load_faiss_index(path): |
| | |
| | if not os.path.exists(path): |
| | raise FileNotFoundError(f"FAISS index not found: {path}") |
| | index = faiss.read_index(path) |
| | |
| | meta_path = os.path.join(os.path.dirname(path), "meta.jsonl") |
| | meta = [] |
| | if os.path.exists(meta_path): |
| | import json |
| | with open(meta_path) as f: |
| | for line in f: |
| | if line.strip(): |
| | meta.append(json.loads(line)) |
| | else: |
| | np_meta = os.path.join(os.path.dirname(path), "meta.npy") |
| | if os.path.exists(np_meta): |
| | meta = np.load(np_meta, allow_pickle=True).tolist() |
| | return index, meta |
| |
|