| |
| """ |
| Pure Reasoning Engine β Integrated Inference Loop |
| =================================================== |
| The complete inference pipeline combining: |
| |
| 1. SmolLM3-3B (reasoning model) β Reality123b/smollm3-3b-reasoning-engine |
| 2. DistilBERT filter (67M) β Reality123b/epistemic-filter-v1 |
| 3. DuckDuckGo search (free, no API key needed) |
| |
| Architecture: |
| User Query β Decompose into sub-questions |
| β Search internet for each sub-question |
| β Filter all retrieved text through epistemic filter |
| β Reason over cleaned, sourced information |
| β Output answer with confidence levels and citations |
| |
| The model is conditioned to: |
| - "I know nothing. The internet is ground truth." |
| - Always verify against external sources |
| - Evaluate source credibility |
| - Admit uncertainty |
| |
| Usage: |
| python inference_loop.py --query "What is the latest research on CRISPR?" |
| python inference_loop.py --interactive # Chat mode |
| |
| Requirements: |
| pip install transformers torch datasets duckduckgo_search |
| """ |
|
|
| import argparse |
| import json |
| import re |
| import sys |
| import time |
| from typing import Optional |
|
|
| import torch |
| from transformers import ( |
| AutoModelForCausalLM, |
| AutoTokenizer, |
| AutoModelForSequenceClassification, |
| ) |
|
|
|
|
| |
| |
| |
|
|
| class EpistemicFilter: |
| """Pre-screens retrieved text before it enters the reasoning model's context. |
| |
| Uses a fine-tuned DistilBERT classifier trained on FinerWeb-10BT quality labels. |
| Detects: grammar errors, formatting garbage, SEO spam, offensive content. |
| """ |
|
|
| def __init__( |
| self, |
| model_id: str = "Reality123b/epistemic-filter-v1", |
| threshold: float = 0.5, |
| device: str = "cuda" if torch.cuda.is_available() else "cpu", |
| ): |
| self.threshold = threshold |
| self.device = device |
| print(f"Loading epistemic filter: {model_id}...") |
| self.model = AutoModelForSequenceClassification.from_pretrained(model_id).to(device) |
| self.tokenizer = AutoTokenizer.from_pretrained(model_id) |
| self.model.eval() |
|
|
| def score(self, texts: list[str]) -> list[float]: |
| """Return quality scores (0-1) for a batch of text chunks.""" |
| inputs = self.tokenizer( |
| texts, |
| return_tensors="pt", |
| padding="longest", |
| truncation=True, |
| max_length=512, |
| ).to(self.device) |
| with torch.no_grad(): |
| logits = self.model(**inputs).logits.squeeze(-1) |
| scores = torch.sigmoid(logits).cpu().float().tolist() |
| if isinstance(scores, float): |
| scores = [scores] |
| return scores |
|
|
| def filter(self, texts: list[str]) -> list[str]: |
| """Return only texts that pass the quality threshold.""" |
| if not texts: |
| return [] |
| scores = self.score(texts) |
| return [t for t, s in zip(texts, scores) if s >= self.threshold] |
|
|
| def score_with_labels(self, texts: list[str]) -> list[dict]: |
| """Return texts with quality scores and pass/fail labels.""" |
| scores = self.score(texts) |
| return [ |
| { |
| "text": t, |
| "quality_score": round(s, 4), |
| "passed": s >= self.threshold, |
| } |
| for t, s in zip(texts, scores) |
| ] |
|
|
|
|
| |
| |
| |
|
|
| class ReasoningEngine: |
| """SmolLM3-3B fine-tuned for internet-grounded reasoning. |
| |
| Core conditioning (baked in via training): |
| "I know nothing. The internet is ground truth. My only job is to reason over what I retrieve." |
| |
| The model NEVER guesses β it searches, retrieves, then reasons. |
| """ |
|
|
| SYSTEM_PROMPT = """/no_think |
| ## Custom Instructions |
| |
| You are a pure reasoning engine. Your core operating principle: |
| |
| "I know nothing. The internet is ground truth. My only job is to reason over what I retrieve." |
| |
| Rules: |
| 1. NEVER fabricate information. If you don't have retrieved evidence, say so. |
| 2. ALWAYS cite your sources when making factual claims. |
| 3. When sources conflict, explain the contradiction and evaluate credibility. |
| 4. Express confidence levels: [High Confidence], [Medium Confidence], [Low Confidence], [Speculative]. |
| 5. Decompose complex questions into sub-questions and reason over each systematically. |
| 6. If the retrieved information is insufficient, state what additional information would help. |
| """ |
|
|
| def __init__( |
| self, |
| model_id: str = "Reality123b/smollm3-3b-reasoning-engine", |
| device: str = "cuda" if torch.cuda.is_available() else "cpu", |
| ): |
| self.device = device |
| print(f"Loading reasoning model: {model_id}...") |
| self.model = AutoModelForCausalLM.from_pretrained( |
| model_id, |
| torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32, |
| device_map="auto" if device == "cuda" else None, |
| ) |
| self.tokenizer = AutoTokenizer.from_pretrained(model_id) |
| if device == "cpu": |
| self.model = self.model.to(device) |
|
|
| def generate( |
| self, |
| messages: list[dict], |
| max_new_tokens: int = 1024, |
| temperature: float = 0.6, |
| top_p: float = 0.95, |
| ) -> str: |
| """Generate reasoning response given conversation messages.""" |
| text = self.tokenizer.apply_chat_template( |
| messages, |
| tokenize=False, |
| add_generation_prompt=True, |
| enable_thinking=False, |
| ) |
| inputs = self.tokenizer(text, return_tensors="pt").to(self.device) |
| with torch.no_grad(): |
| outputs = self.model.generate( |
| **inputs, |
| max_new_tokens=max_new_tokens, |
| temperature=temperature, |
| top_p=top_p, |
| do_sample=True, |
| pad_token_id=self.tokenizer.pad_token_id, |
| ) |
| response = self.tokenizer.decode( |
| outputs[0][inputs["input_ids"].shape[1]:], |
| skip_special_tokens=True, |
| ) |
| return response.strip() |
|
|
|
|
| |
| |
| |
|
|
| class InternetSearch: |
| """Free web search via DuckDuckGo (no API key required).""" |
|
|
| def __init__(self, max_results: int = 5): |
| self.max_results = max_results |
|
|
| def search(self, query: str) -> list[dict]: |
| """Search the web and return results with title, snippet, URL.""" |
| try: |
| from duckduckgo_search import DDGS |
| results = [] |
| with DDGS() as ddgs: |
| for r in ddgs.text(query, max_results=self.max_results): |
| results.append({ |
| "title": r.get("title", ""), |
| "snippet": r.get("body", ""), |
| "url": r.get("href", ""), |
| "source": self._extract_domain(r.get("href", "")), |
| }) |
| return results |
| except ImportError: |
| print("β οΈ duckduckgo_search not installed. Install: pip install duckduckgo_search") |
| return [] |
| except Exception as e: |
| print(f"β οΈ Search error: {e}") |
| return [] |
|
|
| @staticmethod |
| def _extract_domain(url: str) -> str: |
| """Extract domain from URL for source attribution.""" |
| match = re.search(r"https?://([^/]+)", url) |
| return match.group(1) if match else "unknown" |
|
|
|
|
| |
| |
| |
|
|
| class PureReasoningPipeline: |
| """The complete inference loop combining search, filter, and reasoning.""" |
|
|
| def __init__( |
| self, |
| reasoning_model_id: str = "Reality123b/smollm3-3b-reasoning-engine", |
| filter_model_id: str = "Reality123b/epistemic-filter-v1", |
| max_search_results: int = 5, |
| ): |
| print("\n" + "=" * 60) |
| print("Initializing Pure Reasoning Engine") |
| print("=" * 60) |
| |
| self.reasoning = ReasoningEngine(model_id=reasoning_model_id) |
| self.filter = EpistemicFilter(model_id=filter_model_id) |
| self.search_engine = InternetSearch(max_results=max_search_results) |
| |
| print("\nβ All components loaded.") |
| print(" Architecture: Search β Filter β Reason β Answer") |
| print(" Principle: 'I know nothing. The internet is ground truth.'\n") |
|
|
| def answer(self, query: str, verbose: bool = True) -> dict: |
| """Full pipeline: decompose, search, filter, reason, answer.""" |
| |
| if verbose: |
| print(f"\n{'β' * 60}") |
| print(f"Query: {query}") |
| print(f"{'β' * 60}\n") |
|
|
| |
| if verbose: |
| print("[Step 1/5] Decomposing query...") |
| |
| sub_questions = self._decompose_query(query) |
| |
| if verbose: |
| for i, sq in enumerate(sub_questions): |
| print(f" Sub-Q{i+1}: {sq}") |
|
|
| |
| if verbose: |
| print(f"\n[Step 2/5] Searching internet ({len(sub_questions)} sub-queries)...") |
| |
| all_results = [] |
| for sq in sub_questions: |
| results = self.search_engine.search(sq) |
| if verbose: |
| print(f" '{sq[:60]}...' β {len(results)} results") |
| all_results.extend(results) |
|
|
| if not all_results: |
| return { |
| "query": query, |
| "answer": "No search results found. Unable to provide an evidence-based answer.", |
| "confidence": "[Unable to Verify]", |
| "sources": [], |
| "sub_questions": sub_questions, |
| } |
|
|
| |
| if verbose: |
| print(f"\n[Step 3/5] Filtering {len(all_results)} results through epistemic filter...") |
| |
| snippets = [r["snippet"] for r in all_results] |
| filtered = self.filter.score_with_labels(snippets) |
| |
| passed = [r for r, f in zip(all_results, filtered) if f["passed"]] |
| rejected = len(filtered) - len(passed) |
| |
| if verbose: |
| print(f" β {len(passed)} passed | β {rejected} rejected (low quality)") |
|
|
| |
| if verbose: |
| print(f"\n[Step 4/5] Reasoning over {len(passed)} verified sources...") |
| |
| evidence_context = self._build_evidence_context(passed) |
| |
| messages = [ |
| {"role": "system", "content": self.reasoning.SYSTEM_PROMPT}, |
| { |
| "role": "user", |
| "content": f"""## User Query |
| {query} |
| |
| ## Retrieved Evidence (verified by quality filter) |
| {evidence_context} |
| |
| ## Instructions |
| Please reason step-by-step over the retrieved evidence to answer the query. |
| - Cite sources by number (e.g., [1], [2]) |
| - Note any contradictions between sources |
| - Express confidence in each claim |
| - If evidence is insufficient, state what's missing |
| - Do NOT fabricate any information not present in the evidence""", |
| }, |
| ] |
|
|
| response = self.reasoning.generate(messages) |
|
|
| |
| if verbose: |
| print(f"[Step 5/5] Structuring response...\n") |
| print(f"{'β' * 60}") |
| print("ANSWER:") |
| print(f"{'β' * 60}") |
| print(response) |
| print(f"{'β' * 60}") |
| print("\nSOURCES:") |
| for i, src in enumerate(passed): |
| print(f" [{i+1}] {src['title']} β {src['url']}") |
|
|
| return { |
| "query": query, |
| "answer": response, |
| "confidence": self._extract_confidence(response), |
| "sources": [ |
| {"title": r["title"], "url": r["url"], "domain": r["source"]} |
| for r in passed |
| ], |
| "sub_questions": sub_questions, |
| } |
|
|
| def _decompose_query(self, query: str) -> list[str]: |
| """Decompose a complex query into searchable sub-questions.""" |
| |
| messages = [ |
| {"role": "system", "content": self.reasoning.SYSTEM_PROMPT}, |
| { |
| "role": "user", |
| "content": f"""Decompose the following query into 2-4 specific sub-questions that can be answered by web search. Return ONLY the sub-questions, one per line. |
| |
| Query: {query} |
| |
| Sub-questions:""", |
| }, |
| ] |
| response = self.reasoning.generate(messages, max_new_tokens=256) |
| |
| |
| sub_qs = [] |
| for line in response.strip().split("\n"): |
| line = line.strip() |
| |
| line = re.sub(r"^[\d]+[\.\)]\s*", "", line) |
| line = re.sub(r"^[-*β’]\s*", "", line) |
| if line and len(line) > 10: |
| sub_qs.append(line) |
| |
| |
| if not sub_qs: |
| sub_qs = [query] |
| |
| return sub_qs[:4] |
|
|
| def _build_evidence_context(self, sources: list[dict]) -> str: |
| """Format retrieved and filtered sources into evidence context.""" |
| context_parts = [] |
| for i, src in enumerate(sources): |
| context_parts.append( |
| f"[{i+1}] {src['title']}\n" |
| f" Source: {src['source']} ({src['url']})\n" |
| f" Content: {src['snippet']}\n" |
| ) |
| return "\n".join(context_parts) |
|
|
| @staticmethod |
| def _extract_confidence(text: str) -> str: |
| """Extract the highest confidence level mentioned in the response.""" |
| for level in ["[High Confidence]", "[Medium Confidence]", "[Low Confidence]", "[Speculative]", "[Unable to Verify]"]: |
| if level in text: |
| return level |
| return "[Unspecified]" |
|
|
|
|
| |
| |
| |
|
|
| def main(): |
| parser = argparse.ArgumentParser( |
| description="Pure Reasoning Engine β Internet-Grounded AI" |
| ) |
| parser.add_argument("--query", "-q", type=str, help="Single query to answer") |
| parser.add_argument("--interactive", "-i", action="store_true", help="Interactive chat mode") |
| parser.add_argument( |
| "--reasoning-model", |
| type=str, |
| default="Reality123b/smollm3-3b-reasoning-engine", |
| help="Reasoning model ID", |
| ) |
| parser.add_argument( |
| "--filter-model", |
| type=str, |
| default="Reality123b/epistemic-filter-v1", |
| help="Epistemic filter model ID", |
| ) |
| parser.add_argument( |
| "--max-results", |
| type=int, |
| default=5, |
| help="Max search results per sub-query", |
| ) |
| parser.add_argument( |
| "--cpu", |
| action="store_true", |
| help="Force CPU mode (default: auto-detect GPU)", |
| ) |
| args = parser.parse_args() |
|
|
| device = "cpu" if args.cpu else ("cuda" if torch.cuda.is_available() else "cpu") |
| print(f"Device: {device}") |
|
|
| pipeline = PureReasoningPipeline( |
| reasoning_model_id=args.reasoning_model, |
| filter_model_id=args.filter_model, |
| max_search_results=args.max_results, |
| ) |
|
|
| if args.interactive: |
| print("\n" + "=" * 60) |
| print("Interactive Mode β Type 'exit' to quit") |
| print("=" * 60) |
| while True: |
| try: |
| query = input("\n> ").strip() |
| except (EOFError, KeyboardInterrupt): |
| print("\nGoodbye!") |
| break |
| if query.lower() in ("exit", "quit", "q"): |
| print("Goodbye!") |
| break |
| if query: |
| pipeline.answer(query) |
| elif args.query: |
| pipeline.answer(args.query) |
| else: |
| parser.print_help() |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|