Spaces:

raviix46
/

Mind-Mesh

Sleeping

App Files Files Community

raviix46 commited on Oct 26, 2025

Commit

5785ed4

verified ·

1 Parent(s): 05d3b70

Create logic.py

Browse files

Files changed (1) hide show

src/logic.py +115 -0

src/logic.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os, glob, json, faiss, numpy as np
+from sentence_transformers import SentenceTransformer
+from transformers import pipeline
+from groq import Groq
+from src.config import *
+# Ensure directories exist
+os.makedirs(INDEX_DIR, exist_ok=True)
+# Initialize models
+embedder = SentenceTransformer(EMBEDDING_MODEL)
+summarizer = pipeline("summarization", model=SUMMARIZER_MODEL)
+client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+# --- Token Counter ---
+try:
+    import tiktoken
+    enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
+    def count_tokens(text): return len(enc.encode(text))
+except Exception:
+    def count_tokens(text): return len(text) // 4
+# --- Build Index ---
+def build_index():
+    index = faiss.IndexFlatIP(384)
+    meta = []
+    def chunk_text(text, size=800, overlap=120):
+        chunks = []
+        i = 0
+        while i < len(text):
+            chunks.append(text[i:i+size].strip())
+            i += size - overlap
+        return chunks
+    for domain_dir in glob.glob(f"{RAW_DIR}/*"):
+        domain = os.path.basename(domain_dir)
+        for path in glob.glob(f"{domain_dir}/*.txt"):
+            with open(path, encoding="utf-8") as f:
+                text = f.read()
+            chunks = chunk_text(text)
+            vecs = embedder.encode(chunks, normalize_embeddings=True)
+            index.add(np.array(vecs).astype("float32"))
+            for ch in chunks:
+                meta.append({"domain": domain, "text": ch, "source": os.path.basename(path)})
+            print(f"✅ Indexed {domain}/{os.path.basename(path)} ({len(chunks)} chunks)")
+    faiss.write_index(index, INDEX_PATH)
+    json.dump(meta, open(META_PATH, "w"))
+    print(f"🎉 Index built: {len(meta)} chunks total.")
+    return index, meta
+# Load or build index
+if not os.path.exists(INDEX_PATH):
+    index, meta = build_index()
+else:
+    index = faiss.read_index(INDEX_PATH)
+    meta = json.load(open(META_PATH))
+# --- Retrieval ---
+def retrieve_text(query, topk=TOP_K_RESULTS):
+    qvec = embedder.encode([query], normalize_embeddings=True).astype("float32")
+    D, I = index.search(qvec, topk)
+    return [meta[i] for i in I[0]]
+# --- Token limiter ---
+def trim_to_token_limit(text, max_tokens=MAX_TOKENS):
+    tokens = count_tokens(text)
+    if tokens > max_tokens:
+        print(f"⚠️ Context too long ({tokens}). Trimming...")
+        cutoff_ratio = max_tokens / tokens
+        text = text[:int(len(text) * cutoff_ratio)]
+    return text
+# --- Main Answer Generator ---
+def generate_answer(query, mode):
+    retrieved = retrieve_text(query)
+    combined = " ".join([r["text"] for r in retrieved])
+    safe_context = trim_to_token_limit(combined)
+    if mode == "Quick Summary (Offline)":
+        summary = summarizer(safe_context, max_length=180, min_length=60, do_sample=False)[0]["summary_text"]
+    else:
+        prompt = f"""
+You are MindMesh, a cross-domain reasoning assistant.
+Question: {query}
+Context: {safe_context}
+Synthesize a precise and insightful answer across disciplines.
+"""
+        try:
+            response = client.chat.completions.create(
+                model=PRIMARY_GROQ_MODEL,
+                messages=[{"role": "user", "content": prompt}],
+            )
+            summary = response.choices[0].message.content.strip()
+        except Exception as e:
+            try:
+                response = client.chat.completions.create(
+                    model=FALLBACK_GROQ_MODEL,
+                    messages=[{"role": "user", "content": prompt}],
+                )
+                summary = response.choices[0].message.content.strip()
+            except Exception as e2:
+                summary = f"⚠️ Groq API error: {str(e2)}"
+    md = f"## 🧭 Synthesized Insight\n{summary}\n\n---\n### 🔍 Source Highlights\n"
+    for r in retrieved:
+        md += f"**{r['domain'].title()} — {r['source']}**  \n{r['text'][:300]}...\n\n"
+    return md
+# --- Rebuild Index with Feedback ---
+def rebuild():
+    yield "⚙️ Rebuilding FAISS index... please wait ⏳"
+    build_index()
+    yield "✅ Index rebuilt successfully! (FAISS + metadata updated)"