Spaces:
Paused
Paused
Delete nexframe_brain.py
Browse files- nexframe_brain.py +0 -57
nexframe_brain.py
DELETED
|
@@ -1,57 +0,0 @@
|
|
| 1 |
-
# =========================================================
|
| 2 |
-
# NexFrame Brain — simplified version for Hugging Face
|
| 3 |
-
# =========================================================
|
| 4 |
-
import os, re, json, hashlib
|
| 5 |
-
from pathlib import Path
|
| 6 |
-
from datetime import datetime, timezone
|
| 7 |
-
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 8 |
-
from sklearn.neighbors import NearestNeighbors
|
| 9 |
-
|
| 10 |
-
def utc_now(): return datetime.now(timezone.utc).isoformat()
|
| 11 |
-
def sha512_text(t): return hashlib.sha512(t.encode()).hexdigest()
|
| 12 |
-
def clean_ws(s): return re.sub(r"\s+", " ", s).strip()
|
| 13 |
-
|
| 14 |
-
class NexFrameBrain:
|
| 15 |
-
def __init__(self, base="./core"):
|
| 16 |
-
self.base = base
|
| 17 |
-
self.index = {}
|
| 18 |
-
self.corpus = []
|
| 19 |
-
self._load_files()
|
| 20 |
-
self._build_retriever()
|
| 21 |
-
|
| 22 |
-
# -------- load your txt/json/doc files ----------
|
| 23 |
-
def _load_files(self):
|
| 24 |
-
base = Path(self.base)
|
| 25 |
-
if not base.exists():
|
| 26 |
-
print("⚠️ core folder missing"); return
|
| 27 |
-
for f in base.rglob("*"):
|
| 28 |
-
if f.suffix.lower() in [".txt", ".json", ".md", ".py"]:
|
| 29 |
-
try:
|
| 30 |
-
txt = open(f, "r", errors="ignore").read()
|
| 31 |
-
self.corpus.append({"file": str(f.name), "text": clean_ws(txt)})
|
| 32 |
-
except Exception as e:
|
| 33 |
-
print("skip", f, e)
|
| 34 |
-
print(f"🧠 Loaded {len(self.corpus)} core files")
|
| 35 |
-
|
| 36 |
-
# -------- build small retriever ----------
|
| 37 |
-
def _build_retriever(self):
|
| 38 |
-
texts = [d["text"] for d in self.corpus]
|
| 39 |
-
if not texts:
|
| 40 |
-
self.vectorizer=None; self.nn=None; return
|
| 41 |
-
self.vectorizer = TfidfVectorizer(stop_words="english", max_features=10000)
|
| 42 |
-
X = self.vectorizer.fit_transform(texts)
|
| 43 |
-
self.nn = NearestNeighbors(n_neighbors=5, metric="cosine").fit(X)
|
| 44 |
-
|
| 45 |
-
# -------- main chat ----------
|
| 46 |
-
def ask(self, query):
|
| 47 |
-
if not self.vectorizer: return "No data yet."
|
| 48 |
-
Xq = self.vectorizer.transform([query])
|
| 49 |
-
dists, idxs = self.nn.kneighbors(Xq)
|
| 50 |
-
parts=[]
|
| 51 |
-
for d,i in zip(dists[0],idxs[0]):
|
| 52 |
-
doc=self.corpus[i]
|
| 53 |
-
parts.append(f"({doc['file']}) {doc['text'][:400]}...")
|
| 54 |
-
ans="\n\n".join(parts)
|
| 55 |
-
return ans or "No match found."
|
| 56 |
-
|
| 57 |
-
brain = NexFrameBrain()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|