File size: 2,720 Bytes
50e8558 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import os
hf_token = os.getenv("HF_TOKEN")
self.tokenizer = AutoTokenizer.from_pretrained("antonypamo/ProSavantRRF", token=hf_token)
self.model = AutoModelForCausalLM.from_pretrained("antonypamo/ProSavantRRF", token=hf_token)
import pickle
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from sentence_transformers import SentenceTransformer
class SavantRRFEngine:
def __init__(self):
self.assets_path = os.path.join(os.path.dirname(__file__), "assets")
print("๐น Loading Savant-RRF memory...")
self.load_memory()
print("๐น Loading Savant-RRF model...")
self.load_model()
def load_memory(self):
index_path = os.path.join(self.assets_path, "memory.index")
memory_path = os.path.join(self.assets_path, "persistent_memory.pkl")
if not os.path.exists(index_path):
raise FileNotFoundError(f"โ Missing FAISS index: {index_path}")
if not os.path.exists(memory_path):
raise FileNotFoundError(f"โ Missing memory data: {memory_path}")
self.index = faiss.read_index(index_path)
with open(memory_path, "rb") as f:
self.memory = pickle.load(f)
print(f"โ
Memory bank loaded: {len(self.memory)} entries")
def load_model(self):
try:
# Primary: load from Hugging Face Hub
self.tokenizer = AutoTokenizer.from_pretrained("antonypamo/ProSavantRRF")
self.model = AutoModelForCausalLM.from_pretrained("antonypamo/ProSavantRRF")
print("โ
Model loaded from HF Hub.")
except Exception as e:
print(f"โ ๏ธ HF load failed: {e}\n๐ Falling back to local model...")
self.tokenizer = AutoTokenizer.from_pretrained(self.assets_path)
self.model = AutoModelForCausalLM.from_pretrained(self.assets_path)
print("โ
Local model loaded.")
self.pipe = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer)
def recall_memory(self, query, top_k=5):
from sentence_transformers import SentenceTransformer
encoder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
q_emb = encoder.encode([query])
D, I = self.index.search(np.array(q_emb).astype("float32"), top_k)
return [self.memory[i][0] for i in I[0]]
def infer(self, prompt):
retrieved = self.recall_memory(prompt, top_k=3)
context = "\n".join(retrieved)
full_prompt = f"Context:\n{context}\n\nUser: {prompt}\nSavant-RRF:"
result = self.pipe(full_prompt, max_new_tokens=150, do_sample=True, temperature=0.7)
return result[0]["generated_text"]
|