File size: 2,720 Bytes
50e8558
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import os
hf_token = os.getenv("HF_TOKEN")
self.tokenizer = AutoTokenizer.from_pretrained("antonypamo/ProSavantRRF", token=hf_token)
self.model = AutoModelForCausalLM.from_pretrained("antonypamo/ProSavantRRF", token=hf_token)
import pickle
import faiss
import numpy as np
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from sentence_transformers import SentenceTransformer




class SavantRRFEngine:
    def __init__(self):
        self.assets_path = os.path.join(os.path.dirname(__file__), "assets")
        print("๐Ÿ”น Loading Savant-RRF memory...")
        self.load_memory()
        print("๐Ÿ”น Loading Savant-RRF model...")
        self.load_model()

    def load_memory(self):
        index_path = os.path.join(self.assets_path, "memory.index")
        memory_path = os.path.join(self.assets_path, "persistent_memory.pkl")

        if not os.path.exists(index_path):
            raise FileNotFoundError(f"โŒ Missing FAISS index: {index_path}")
        if not os.path.exists(memory_path):
            raise FileNotFoundError(f"โŒ Missing memory data: {memory_path}")

        self.index = faiss.read_index(index_path)
        with open(memory_path, "rb") as f:
            self.memory = pickle.load(f)
        print(f"โœ… Memory bank loaded: {len(self.memory)} entries")

    def load_model(self):
        try:
            # Primary: load from Hugging Face Hub
            self.tokenizer = AutoTokenizer.from_pretrained("antonypamo/ProSavantRRF")
            self.model = AutoModelForCausalLM.from_pretrained("antonypamo/ProSavantRRF")
            print("โœ… Model loaded from HF Hub.")
        except Exception as e:
            print(f"โš ๏ธ HF load failed: {e}\n๐Ÿ” Falling back to local model...")
            self.tokenizer = AutoTokenizer.from_pretrained(self.assets_path)
            self.model = AutoModelForCausalLM.from_pretrained(self.assets_path)
            print("โœ… Local model loaded.")

        self.pipe = pipeline("text-generation", model=self.model, tokenizer=self.tokenizer)

    def recall_memory(self, query, top_k=5):
        from sentence_transformers import SentenceTransformer
        encoder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
        q_emb = encoder.encode([query])
        D, I = self.index.search(np.array(q_emb).astype("float32"), top_k)
        return [self.memory[i][0] for i in I[0]]

    def infer(self, prompt):
        retrieved = self.recall_memory(prompt, top_k=3)
        context = "\n".join(retrieved)
        full_prompt = f"Context:\n{context}\n\nUser: {prompt}\nSavant-RRF:"
        result = self.pipe(full_prompt, max_new_tokens=150, do_sample=True, temperature=0.7)
        return result[0]["generated_text"]