Spaces:

tejashsr
/

titanium-logic-engine

Sleeping

App Files Files Community

tejashsr commited on 23 days ago

Commit

d27a216

verified ·

1 Parent(s): 8172812

Update model.py

Browse files

Files changed (1) hide show

model.py +33 -40

model.py CHANGED Viewed

@@ -5,34 +5,32 @@ import numpy as np
 import spacy
 import torch
 from flashrank import Ranker, RerankRequest
-from sentence_transformers import SentenceTransformer
 from rank_bm25 import BM25Okapi
 from sklearn.metrics.pairwise import cosine_similarity
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-# --- GLOBAL ENGINES (LAZY LOAD) ---
 nlp = None
 retriever = None
 ranker = None
-tokenizer = None
 nli_model = None
 def load_engines():
-    global nlp, retriever, ranker, tokenizer, nli_model
     if nlp is not None: return
-    print("⚡ TITANIUM: Waking up Neural Engines...")
     nlp = spacy.load("en_core_web_sm", disable=["parser"])
-    # 1. Retrieval Engine
     retriever = SentenceTransformer('all-MiniLM-L6-v2')
-    # 2. Rerank Engine
     ranker = Ranker(model_name="ms-marco-TinyBERT-L-2-v2", cache_dir="/app/cache")
-    # 3. Logic Engine (DeBERTa-v3)
-    tokenizer = AutoTokenizer.from_pretrained("cross-encoder/nli-deberta-v3-base")
-    nli_model = AutoModelForSequenceClassification.from_pretrained("cross-encoder/nli-deberta-v3-base")
     print("✅ TITANIUM: Ready.")
 # --- UNIVERSAL KNOWLEDGE GRAPH ---
@@ -52,8 +50,6 @@ class UniversalGraphKB:
     def ingest_book(self, text, key="session"):
         chunks = self.get_chunks(text)
-        # Auto-Protagonist Detection
         doc = nlp(text[:100000])
         names = [ent.text.lower() for ent in doc.ents if ent.label_ == "PERSON"]
         main_char = pd.Series(names).value_counts().index[0] if names else "Unknown"
@@ -70,7 +66,6 @@ kb = UniversalGraphKB()
 # --- TITANIUM LOGIC GUARDRAILS ---
 def normalize_dates(text):
-    """Visual Confirmation: Turns words to numbers for the Logic Engine."""
     text = text.lower()
     mapping = {
         "eighteenth": "1750", "18th": "1750",
@@ -82,15 +77,8 @@ def normalize_dates(text):
         if word in text: text += f" ({year}) "
     return text
-def get_nli_score(premise, hypothesis):
-    inputs = tokenizer(premise, hypothesis, return_tensors='pt', truncation=True, max_length=512)
-    with torch.no_grad():
-        outputs = nli_model(**inputs)
-        probs = torch.softmax(outputs.logits, dim=1).cpu().numpy()[0]
-    return float(probs[1]) # Entailment
 def extract_features(backstory, key="session"):
-    if key not in kb.indices: return [0,0], "", None, "Book not uploaded"
     idx = kb.indices[key]
     protagonist = kb.protagonists.get(key, "")
@@ -98,50 +86,55 @@ def extract_features(backstory, key="session"):
     backstory_norm = normalize_dates(backstory)
     aug_query = f"{backstory} (Context: {protagonist})"
-    # 2. Search & Rerank
     q_vec = retriever.encode(aug_query)
     v_scores = cosine_similarity([q_vec], idx['vectors'])[0]
-    candidates = list(v_scores.argsort()[-30:][::-1])
     passages = [{"id": i, "text": idx['text'][i]} for i in candidates]
     results = ranker.rerank(RerankRequest(query=backstory, passages=passages))
     best_chunk = results[0]['text']
     best_chunk_norm = normalize_dates(best_chunk)
-    # --- GUARDRAILS (The "Visual Confirmation") ---
     # A. Exact Match
     if backstory.strip() in best_chunk:
-        return [1.0, 0], best_chunk, 1, "VERIFIED: Exact Text Match"
-    # B. Math Timeline Guardrail
     YEAR_PATTERN = r'\b([1-2][0-9]{3})\b'
     q_years = [int(y) for y in re.findall(YEAR_PATTERN, backstory_norm)]
     e_years = [int(y) for y in re.findall(YEAR_PATTERN, best_chunk_norm)]
     if q_years and e_years:
         if not any(abs(by - ey) < 5 for by in q_years for ey in e_years):
-            return [0.0, 1], best_chunk, 0, f"CRITICAL: Timeline Mismatch ({q_years[0]} vs {e_years[0]})"
-    # C. Neural Semantic Check
-    score = get_nli_score(aug_query, best_chunk)
-    return [score, 0], best_chunk, None, ""
 # --- API WRAPPER ---
 def predict_logic(book_text, backstory):
     load_engines()
     kb.ingest_book(book_text, "session")
-    feats, ev, verdict, rat = extract_features(backstory, "session")
-    # Guardrail Triggered
-    if verdict is not None:
-        return {"prediction": "Consistent" if verdict==1 else "Contradiction", "rationale": rat, "evidence": ev[:350] + "...", "score": 1.0 if verdict==1 else 0.0}
-    # Neural Decision (Threshold 0.15)
-    pred = 1 if feats[0] > 0.15 else 0
     return {
-        "prediction": "Consistent" if pred==1 else "Contradiction",
-        "rationale": f"Semantic Consistency Score: {feats[0]:.2f}",
         "evidence": ev[:350] + "...",
-        "score": round(feats[0], 2)
     }

 import spacy
 import torch
 from flashrank import Ranker, RerankRequest
+from sentence_transformers import SentenceTransformer, CrossEncoder
 from rank_bm25 import BM25Okapi
 from sklearn.metrics.pairwise import cosine_similarity
+# --- GLOBAL ENGINES ---
 nlp = None
 retriever = None
 ranker = None
 nli_model = None
 def load_engines():
+    global nlp, retriever, ranker, nli_model
     if nlp is not None: return
+    print("⚡ TITANIUM: Waking up...")
     nlp = spacy.load("en_core_web_sm", disable=["parser"])
+    # 1. Retrieval (MiniLM)
     retriever = SentenceTransformer('all-MiniLM-L6-v2')
+    # 2. Rerank (FlashRank)
     ranker = Ranker(model_name="ms-marco-TinyBERT-L-2-v2", cache_dir="/app/cache")
+    # 3. Logic (CrossEncoder - THE FIX)
+    # This wrapper handles the labels automatically. No more 0.00 errors.
+    nli_model = CrossEncoder('cross-encoder/nli-deberta-v3-base')
     print("✅ TITANIUM: Ready.")
 # --- UNIVERSAL KNOWLEDGE GRAPH ---
     def ingest_book(self, text, key="session"):
         chunks = self.get_chunks(text)
         doc = nlp(text[:100000])
         names = [ent.text.lower() for ent in doc.ents if ent.label_ == "PERSON"]
         main_char = pd.Series(names).value_counts().index[0] if names else "Unknown"
 # --- TITANIUM LOGIC GUARDRAILS ---
 def normalize_dates(text):
     text = text.lower()
     mapping = {
         "eighteenth": "1750", "18th": "1750",
         if word in text: text += f" ({year}) "
     return text
 def extract_features(backstory, key="session"):
+    if key not in kb.indices: return 0.0, "", "Book not uploaded"
     idx = kb.indices[key]
     protagonist = kb.protagonists.get(key, "")
     backstory_norm = normalize_dates(backstory)
     aug_query = f"{backstory} (Context: {protagonist})"
+    # 2. Search
     q_vec = retriever.encode(aug_query)
     v_scores = cosine_similarity([q_vec], idx['vectors'])[0]
+    candidates = list(v_scores.argsort()[-15:][::-1])
     passages = [{"id": i, "text": idx['text'][i]} for i in candidates]
+    # 3. Rerank
     results = ranker.rerank(RerankRequest(query=backstory, passages=passages))
     best_chunk = results[0]['text']
     best_chunk_norm = normalize_dates(best_chunk)
+    # --- GUARDRAILS ---
     # A. Exact Match
     if backstory.strip() in best_chunk:
+        return 1.0, best_chunk, "VERIFIED: Exact Text Match"
+    # B. Math Timeline
     YEAR_PATTERN = r'\b([1-2][0-9]{3})\b'
     q_years = [int(y) for y in re.findall(YEAR_PATTERN, backstory_norm)]
     e_years = [int(y) for y in re.findall(YEAR_PATTERN, best_chunk_norm)]
     if q_years and e_years:
         if not any(abs(by - ey) < 5 for by in q_years for ey in e_years):
+            return 0.0, best_chunk, f"TIMELINE MISMATCH: {q_years[0]} vs {e_years[0]}"
+    # C. Neural Semantic Check (CrossEncoder)
+    # Returns logits: [Contradiction, Entailment, Neutral]
+    scores = nli_model.predict([(aug_query, best_chunk)])[0]
+    # We want Entailment (Index 1). We apply Softmax manually for a nice percentage.
+    exp_scores = np.exp(scores)
+    probs = exp_scores / np.sum(exp_scores)
+    entailment_score = probs[1]
+    return float(entailment_score), best_chunk, "SEMANTIC ANALYSIS"
 # --- API WRAPPER ---
 def predict_logic(book_text, backstory):
     load_engines()
     kb.ingest_book(book_text, "session")
+    score, ev, reason = extract_features(backstory, "session")
+    # Decision Threshold
+    pred = "Consistent" if score > 0.3 else "Contradiction"
     return {
+        "prediction": pred,
+        "rationale": reason,
         "evidence": ev[:350] + "...",
+        "score": round(score, 2)
     }