Spaces:

tejashsr
/

titanium-logic-engine

Sleeping

App Files Files Community

tejashsr commited on Jan 12

Commit

b3c41c2

verified ·

1 Parent(s): fb84c7a

Update model.py

Browse files

Files changed (1) hide show

model.py +39 -35

model.py CHANGED Viewed

@@ -4,44 +4,46 @@ import sys
 import pandas as pd
 import numpy as np
 import torch
-from flashrank import Ranker, RerankRequest
 from sentence_transformers import SentenceTransformer, CrossEncoder
 from rank_bm25 import BM25Okapi
 from sklearn.metrics.pairwise import cosine_similarity
-# --- 1. GLOBAL ENGINES (Lazy Loading) ---
-# We load these globally so they don't reload on every request
 nlp = None
 retriever = None
 ranker = None
 nli_model = None
 def load_engines():
     global nlp, retriever, ranker, nli_model
     if nlp is not None: return
-    print("⚡ TITANIUM: Waking up...")
-    # NLP
     import spacy
-    try: nlp = spacy.load("en_core_web_sm", disable=["parser"])
-    except:
         import subprocess
         subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm", "--quiet"])
         nlp = spacy.load("en_core_web_sm", disable=["parser"])
-    # SEARCH
     device = "cuda" if torch.cuda.is_available() else "cpu"
     retriever = SentenceTransformer('all-MiniLM-L6-v2', device=device)
-    # RERANK
-    ranker = Ranker(model_name="ms-marco-TinyBERT-L-2-v2", cache_dir="/opt")
-    # LOGIC BRAIN (CrossEncoder = 100% Accuracy Fix)
     nli_model = CrossEncoder('cross-encoder/nli-deberta-v3-base', device=device)
-    print(f"✅ TITANIUM: Ready on {device.upper()}")
-# --- 2. UNIVERSAL GRAPH BUILDER ---
 class UniversalGraphKB:
     def __init__(self):
         self.indices = {}
@@ -54,14 +56,17 @@ class UniversalGraphKB:
     def ingest_book(self, text, key="session"):
         chunks = self.get_chunks(text)
         doc = nlp(text[:100000])
         people = [ent.text.lower() for ent in doc.ents if ent.label_ == "PERSON"]
         locs = [ent.text.lower() for ent in doc.ents if ent.label_ in ["GPE", "LOC"]]
         main_person = pd.Series(people).value_counts().index[0] if people else "Unknown"
         main_loc = pd.Series(locs).value_counts().index[0] if locs else "Unknown"
         self.context_graph[key] = f"{main_person.title()} | {main_loc.title()}"
         self.indices[key] = {
             "text": chunks,
             "vectors": retriever.encode(chunks, show_progress_bar=False),
@@ -71,8 +76,9 @@ class UniversalGraphKB:
 kb = UniversalGraphKB()
-# --- 3. TITANIUM LOGIC GUARDRAILS ---
 def normalize_dates(text):
     text = text.lower()
     mapping = {
         "eighteenth": "1750", "18th": "1750", "nineteenth": "1850", "19th": "1850",
@@ -83,21 +89,21 @@ def normalize_dates(text):
     return text
 def extract_features(backstory, key="session"):
-    if key not in kb.indices: return 0.0, "", "No Data"
     idx = kb.indices[key]
     context = kb.context_graph.get(key, "")
-    # A. Search
     aug_query = f"{backstory} (Context: {context})"
     q_vec = retriever.encode(aug_query)
     v_scores = cosine_similarity([q_vec], idx['vectors'])[0]
-    candidates = [{"id": i, "text": idx['text'][i]} for i in v_scores.argsort()[-10:][::-1]]
-    # B. Rerank
     reranked = ranker.rerank(RerankRequest(query=backstory, passages=candidates))
     best_chunk = reranked[0]['text']
-    # C. Date Guardrail
     norm_claim = normalize_dates(backstory)
     norm_ev = normalize_dates(best_chunk)
     years_c = [int(y) for y in re.findall(r'\b([1-2][0-9]{3})\b', norm_claim)]
@@ -105,30 +111,28 @@ def extract_features(backstory, key="session"):
     if years_c and years_e:
         if not any(abs(yc - ye) < 5 for yc in years_c for ye in years_e):
-            return 0.0, best_chunk, f"TIMELINE ERROR: {years_c[0]} vs {years_e[0]}"
-    # D. NEURAL CHECK (The Fix: Truth -> Claim)
     scores = nli_model.predict([(best_chunk, aug_query)])[0]
-    # Softmax
     exp_scores = np.exp(scores - np.max(scores))
     probs = exp_scores / exp_scores.sum()
-    return float(probs[1]), best_chunk, "SEMANTIC ANALYSIS"
-# --- 4. THE MISSING LINK (Paste this at the bottom) ---
 def predict_logic(book_text, backstory):
-    load_engines() # Ensure engines are loaded
     kb.ingest_book(book_text, "session")
     score, ev, reason = extract_features(backstory, "session")
-    # Decision
-    pred = "Consistent" if score > 0.5 else "Contradiction"
     return {
-        "prediction": pred,
         "rationale": reason,
-        "evidence": ev[:350] + "...",
         "score": round(score, 2)
     }

 import pandas as pd
 import numpy as np
 import torch
 from sentence_transformers import SentenceTransformer, CrossEncoder
 from rank_bm25 import BM25Okapi
 from sklearn.metrics.pairwise import cosine_similarity
+from flashrank import Ranker, RerankRequest
+# --- GLOBAL ENGINES ---
 nlp = None
 retriever = None
 ranker = None
 nli_model = None
 def load_engines():
+    """Lazy loads models to ensure the Space starts without timing out."""
     global nlp, retriever, ranker, nli_model
     if nlp is not None: return
+    print("⚡ Awakening Titanium Brain...")
+    # 1. NLP Core (Spacy)
     import spacy
+    try:
+        nlp = spacy.load("en_core_web_sm", disable=["parser"])
+    except:
         import subprocess
         subprocess.check_call([sys.executable, "-m", "spacy", "download", "en_core_web_sm", "--quiet"])
         nlp = spacy.load("en_core_web_sm", disable=["parser"])
+    # 2. Vector Search (MiniLM)
     device = "cuda" if torch.cuda.is_available() else "cpu"
     retriever = SentenceTransformer('all-MiniLM-L6-v2', device=device)
+    # 3. Precision Reranker (FlashRank)
+    ranker = Ranker(model_name="ms-marco-TinyBERT-L-2-v2", cache_dir="/app/cache")
+    # 4. Logic Core (DeBERTa Cross-Encoder)
+    # The CrossEncoder handles (Premise, Hypothesis) logic with high accuracy.
     nli_model = CrossEncoder('cross-encoder/nli-deberta-v3-base', device=device)
+    print(f"✅ Engines Ready on {device.upper()}")
+# --- UNIVERSAL GRAPH KNOWLEDGE BASE ---
 class UniversalGraphKB:
     def __init__(self):
         self.indices = {}
     def ingest_book(self, text, key="session"):
         chunks = self.get_chunks(text)
+        # Entity Graph Extraction
         doc = nlp(text[:100000])
         people = [ent.text.lower() for ent in doc.ents if ent.label_ == "PERSON"]
         locs = [ent.text.lower() for ent in doc.ents if ent.label_ in ["GPE", "LOC"]]
         main_person = pd.Series(people).value_counts().index[0] if people else "Unknown"
         main_loc = pd.Series(locs).value_counts().index[0] if locs else "Unknown"
         self.context_graph[key] = f"{main_person.title()} | {main_loc.title()}"
+        # Vector + BM25 Hybrid Indexing
         self.indices[key] = {
             "text": chunks,
             "vectors": retriever.encode(chunks, show_progress_bar=False),
 kb = UniversalGraphKB()
+# --- TITANIUM LOGIC ENGINE ---
 def normalize_dates(text):
+    """Symbolic Layer: Translates words to years for mathematical comparison."""
     text = text.lower()
     mapping = {
         "eighteenth": "1750", "18th": "1750", "nineteenth": "1850", "19th": "1850",
     return text
 def extract_features(backstory, key="session"):
+    if key not in kb.indices: return 0.0, "No Data", "Ingestion Failed"
     idx = kb.indices[key]
     context = kb.context_graph.get(key, "")
+    # 1. Hybrid Retrieval
     aug_query = f"{backstory} (Context: {context})"
     q_vec = retriever.encode(aug_query)
     v_scores = cosine_similarity([q_vec], idx['vectors'])[0]
+    # 2. FlashRank Reranking
+    candidates = [{"id": i, "text": idx['text'][i]} for i in v_scores.argsort()[-15:][::-1]]
     reranked = ranker.rerank(RerankRequest(query=backstory, passages=candidates))
     best_chunk = reranked[0]['text']
+    # 3. TITANIUM GUARDRAIL: Timeline Analysis
     norm_claim = normalize_dates(backstory)
     norm_ev = normalize_dates(best_chunk)
     years_c = [int(y) for y in re.findall(r'\b([1-2][0-9]{3})\b', norm_claim)]
     if years_c and years_e:
         if not any(abs(yc - ye) < 5 for yc in years_c for ye in years_e):
+            return 0.0, best_chunk, f"TIMELINE DISCREPANCY: {years_c[0]} vs {years_e[0]}"
+    # 4. NEURAL BRAIN: Semantic Entailment
+    # Order: (Evidence, Claim) -> Cross-Encoder calculates if Truth proves Claim
     scores = nli_model.predict([(best_chunk, aug_query)])[0]
     exp_scores = np.exp(scores - np.max(scores))
     probs = exp_scores / exp_scores.sum()
+    # Return Entailment Score (Index 1)
+    return float(probs[1]), best_chunk, "Neural Semantic Verification"
 def predict_logic(book_text, backstory):
+    """Main Entry point for the API."""
+    load_engines()
     kb.ingest_book(book_text, "session")
     score, ev, reason = extract_features(backstory, "session")
+    # Final Verdict Threshold
+    prediction = "Consistent" if score > 0.5 else "Contradiction"
     return {
+        "prediction": prediction,
         "rationale": reason,
+        "evidence": ev[:400] + "...",
         "score": round(score, 2)
     }