import json import os import re from dataclasses import dataclass from typing import Dict, List, Tuple, Optional import gradio as gr # Lazy-loaded quantizer _quantizer = None _quantizer_error = None def _get_quantizer(): """Lazy load the embedding quantizer.""" global _quantizer, _quantizer_error if _quantizer is not None: return _quantizer if _quantizer_error is not None: return None try: from quantizer import EmbeddingQuantizer _quantizer = EmbeddingQuantizer(fallback_threshold=0.3) return _quantizer except Exception as e: _quantizer_error = str(e) return None def _get_keyword_quantizer(): """Fallback to keyword quantizer.""" try: from quantizer import KeywordQuantizer return KeywordQuantizer() except Exception: return None # --------------------------- # Loaded at build time # --------------------------- PAPER_TITLE = "Slipstream: Semantic Quantization for Efficient Multi-Agent Coordination" PAPER_AUTHORS = "Anthony Maio" PAPER_ABSTRACT = "As multi-agent LLM systems scale,coordination bandwidthbecomes a primary cost\ndriver: every token spent on routing, intent framing, and redundant context is paid repeat-\nedly across agents and turns. Current approaches waste 40\u201360% of compute on coordination\noverhead, with communication costs scalingO(n2)as agent counts increase.\nThis paper introducesSlipstream, a protocol that performssemantic quantization:\nmapping free-form messages onto a sharedUniversal Concept Reference (UCR)and\ntransmitting compactmnemonic anchorsthat identify structured intents. Unlike syn-\ntactic compression (which fails due to BPE tokenizer fragmentation), Slipstream transmits\nnatural-language mnemonics that tokenize efficiently across model architectures.\nSlipstream combines (1) a symbolic4D semantic manifold\u2014Action, Polarity, Domain,\nUrgency\u2014with (2) a data-drivenvector engine(embeddings + nearest-centroid retrieval)\nplus anevolutionary extension layerthat learns new anchors from low-confidence traf-\nfic. Results show82% token reduction(41.9\u21927.4 tokens average) while maintaining\nsemantic fidelity, making large-scale multi-agent deployments economically viable." PAPER_TAGS = "semantic-quantization, multi-agent-systems, protocol-standards, token-ef-" DEFAULT_LLM_MODEL = None DETECTED_MODELS = [] # --------------------------- # Robust file loading # --------------------------- def _load_chunks(path: str = "paper_chunks.jsonl") -> List[str]: chunks: List[str] = [] try: with open(path, "r", encoding="utf-8") as f: for line in f: line = line.strip() if not line: continue try: rec = json.loads(line) txt = (rec.get("text") or "").strip() if txt: chunks.append(txt) except Exception: continue except FileNotFoundError: return [] except Exception: return [] return chunks # --------------------------- # Tiny BM25-ish retrieval (pure Python) # --------------------------- def _tokenize(s: str) -> List[str]: return re.findall(r"[A-Za-z0-9][A-Za-z0-9_-]{1,}", s.lower()) @dataclass class Retriever: chunks: List[str] doc_tokens: List[List[str]] df: Dict[str, int] idf: Dict[str, float] @staticmethod def build(chunks: List[str]) -> "Retriever": doc_tokens = [_tokenize(c) for c in chunks] df: Dict[str, int] = {} for toks in doc_tokens: for t in set(toks): df[t] = df.get(t, 0) + 1 n = max(1, len(doc_tokens)) idf = {} for t, d in df.items(): idf[t] = float((n - d + 0.5) / (d + 0.5)) return Retriever(chunks=chunks, doc_tokens=doc_tokens, df=df, idf=idf) def topk(self, query: str, k: int = 4) -> List[Tuple[int, float]]: q = _tokenize(query) if not q: return [] scores: List[Tuple[int, float]] = [] qset = set(q) for i, toks in enumerate(self.doc_tokens): if not toks: continue overlap = qset.intersection(toks) if not overlap: continue score = 0.0 for t in overlap: score += self.idf.get(t, 0.0) score = score / (1.0 + (len(toks) / 200.0)) scores.append((i, score)) scores.sort(key=lambda x: x[1], reverse=True) return scores[:k] CHUNKS = _load_chunks() RETRIEVER = Retriever.build(CHUNKS) if CHUNKS else None def retrieve_context(query: str, k: int = 4, max_chars: int = 6000) -> str: if not RETRIEVER: return "" hits = RETRIEVER.topk(query, k=k) parts: List[str] = [] for idx, _score in hits: txt = CHUNKS[idx].strip() if txt: parts.append(txt) ctx = "\n\n".join(parts).strip() return ctx[:max_chars] # --------------------------- # HF Inference helpers (optional) # --------------------------- def _get_hf_client(model_id: str): try: from huggingface_hub import InferenceClient except Exception as e: raise RuntimeError("huggingface_hub is not installed. Add it to requirements.txt") from e token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGINGFACEHUB_API_TOKEN") if not token: raise RuntimeError("HF_TOKEN is not set as a Space secret.") return InferenceClient(model=model_id, token=token) def _llm_chat(model_id: str, messages: List[dict], max_tokens: int = 512) -> str: client = _get_hf_client(model_id) out = client.chat_completion(messages=messages, max_tokens=max_tokens) return out.choices[0].message.content # --------------------------- # Chat with paper (RAG-lite) # --------------------------- def paper_chat(message: str, history: List[Tuple[str, str]]) -> str: message = (message or "").strip() if not message: return "Ask a question about the paper." ctx = retrieve_context(message, k=4, max_chars=6000) model_id = os.environ.get("PAPER_LLM_MODEL") or DEFAULT_LLM_MODEL if not model_id: if not ctx: return "No indexed context found. (paper_chunks.jsonl missing?)" return "Top matches in the paper:\n\n" + ctx[:1200] trimmed_history = history[-4:] if history else [] messages: List[dict] = [ { "role": "system", "content": ( "You are a precise research assistant. Answer using ONLY the provided paper context. " "If the context is insufficient, say what is missing and point to what section would help." ), }, ] if ctx: messages.append({"role": "system", "content": "PAPER CONTEXT:\n\n" + ctx}) for u, a in trimmed_history: messages.append({"role": "user", "content": u}) messages.append({"role": "assistant", "content": a}) messages.append({"role": "user", "content": message}) try: return _llm_chat(model_id, messages, max_tokens=512).strip() except Exception as e: if ctx: return f"(LLM unavailable: {e})\n\nTop matches in the paper:\n\n" + ctx[:1200] return f"LLM unavailable: {e}" # --------------------------- # Share Kit (generators) # --------------------------- def _fallback_tweet_thread(title: str, abstract: str) -> str: abs_one = re.sub(r"\s+", " ", abstract).strip() bullets = [ f"1/ {title}", "2/ TL;DR: " + (abs_one[:220] + ("…" if len(abs_one) > 220 else "")), "3/ Key idea: (open the Space → Chat tab and ask for the method overview)", "4/ Try it: use the Share Kit tab to generate a talk outline / FAQ.", "5/ Links: add your paper + code links in the README.", ] return "\n\n".join(bullets) def generate_share(kind: str) -> str: kind = (kind or "").strip().lower() base_title = PAPER_TITLE or "Paper" base_abs = PAPER_ABSTRACT or "" model_id = os.environ.get("PAPER_LLM_MODEL") or DEFAULT_LLM_MODEL if not model_id: if kind == "tweet thread": return _fallback_tweet_thread(base_title, base_abs) if kind == "talk outline": return "\n".join([ f"Title: {base_title}", "- Motivation", "- Problem setup", "- Method", "- Results", "- Limitations", "- Q&A", ]) if kind == "faq": return "\n".join([ "Q: What problem does this address?\nA: " + (base_abs[:220] + ("…" if len(base_abs) > 220 else "")), "", "Q: What is the main contribution?\nA: Ask in the Chat tab.", "", "Q: How do I reproduce it?\nA: Link code + add steps in README.", ]) return "Select an item to generate." prompt = { "tweet thread": "Write a concise 6-tweet thread summarizing the paper for the ML community.", "talk outline": "Create a 10-minute talk outline with section headers and bullet points.", "faq": "Write an FAQ with 6 Q/A pairs focused on method, results, limitations, and usage.", }.get(kind, "Summarize the paper in 8 bullet points.") ctx = (PAPER_ABSTRACT or "").strip() messages = [ {"role": "system", "content": "You are an expert technical writer for ML research audiences."}, {"role": "user", "content": f"Paper title: {base_title}\nAuthors: {PAPER_AUTHORS}\n\nAbstract/context:\n{ctx}\n\nTask: {prompt}"}, ] try: return _llm_chat(model_id, messages, max_tokens=600).strip() except Exception as e: if kind == "tweet thread": return _fallback_tweet_thread(base_title, base_abs) + f"\n\n(LLM unavailable: {e})" return f"LLM unavailable: {e}" # --------------------------- # Model Playground (chat) # --------------------------- def model_chat(model_id: str, message: str, history: List[Tuple[str, str]]) -> str: model_id = (model_id or "").strip() message = (message or "").strip() if not model_id: return "Provide a model id." if not message: return "Send a message." messages: List[dict] = [{"role": "system", "content": "You are a helpful assistant."}] for u, a in (history[-4:] if history else []): messages.append({"role": "user", "content": u}) messages.append({"role": "assistant", "content": a}) messages.append({"role": "user", "content": message}) try: return _llm_chat(model_id, messages, max_tokens=512).strip() except Exception as e: return f"Model call failed: {e}" # --------------------------- # UI helpers # --------------------------- def quantize_intent(intent: str) -> Tuple[str, str, str]: """ Quantize a natural language intent to UCR anchor. Returns: (primary_result_md, alternatives_md, wire_format) """ intent = (intent or "").strip() if not intent: return "Enter an intent to quantize.", "", "" # Try embedding quantizer first, fall back to keyword quantizer = _get_quantizer() method = "embedding" if quantizer is None: quantizer = _get_keyword_quantizer() method = "keyword" if quantizer is None: return "Quantizer unavailable. Check logs.", "", "" try: result = quantizer.quantize(intent) except Exception as e: return f"Quantization error: {e}", "", "" # Confidence color conf = result.confidence if conf >= 0.7: color = "green" conf_label = "High" elif conf >= 0.5: color = "orange" conf_label = "Medium" else: color = "red" conf_label = "Low" # Primary result primary_md = f""" ### {result.anchor.mnemonic} **Confidence:** {conf:.0%} ({conf_label}) **Canonical meaning:** {result.anchor.canonical} **Method:** {method} {'(fallback)' if result.is_fallback else ''} **Coordinates:** `{result.anchor.coords}` (Action, Polarity, Domain, Urgency) """ # Alternatives if result.alternatives: alt_lines = ["| Anchor | Similarity |", "|--------|------------|"] for alt_anchor, alt_score in result.alternatives[:3]: bar_len = int(alt_score * 10) bar = "█" * bar_len + "░" * (10 - bar_len) alt_lines.append(f"| {alt_anchor.mnemonic} | {bar} {alt_score:.0%} |") alternatives_md = "\n".join(alt_lines) else: alternatives_md = "*No alternatives*" # Wire format wire = f"SLIP v1 user agent {result.anchor.mnemonic}" if result.is_fallback: # Truncate long intents for fallback payload payload = intent[:100].replace('"', "'") wire = f'SLIP v1 user agent Fallback "{payload}"' return primary_md, alternatives_md, wire EXAMPLE_INTENTS = [ ("Review my code", "RequestReview"), ("Task complete!", "InformComplete"), ("System down!", "ObserveError"), ("Can you help?", "RequestHelp"), ("Looks good to me", "EvalApprove"), ] def start_here(choice: str) -> str: choice = (choice or "").strip().lower() if choice == "quick summary": return f"### {PAPER_TITLE}\n\n**Authors:** {PAPER_AUTHORS}\n\n**Abstract:**\n\n{PAPER_ABSTRACT}" if choice == "how does it work?": return "Go to **Chat** and ask: *Give me a method overview with the key steps.*" if choice == "what are the limitations?": return "Go to **Chat** and ask: *List limitations and failure modes discussed in the paper.*" if choice == "generate a tweet thread": return generate_share("tweet thread") return "Pick an option." def _load_gallery_items() -> List[Tuple[str, str]]: items: List[Tuple[str, str]] = [] if os.path.isdir("assets/images"): for fn in sorted(os.listdir("assets/images"))[:48]: path = os.path.join("assets/images", fn) if os.path.isfile(path): items.append((path, fn)) if not items and os.path.isdir("assets/pages"): for fn in sorted(os.listdir("assets/pages"))[:24]: path = os.path.join("assets/pages", fn) if os.path.isfile(path): items.append((path, fn)) return items CSS = ''' .paper-hero h1 { margin-bottom: 0.2rem; } .paper-hero p { margin-top: 0.2rem; opacity: 0.9; } .hint { opacity: 0.85; } ''' with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo: gr.Markdown(f"# {PAPER_TITLE}", elem_classes=["paper-hero"]) if PAPER_AUTHORS: gr.Markdown(f"**Authors:** {PAPER_AUTHORS}", elem_classes=["paper-hero"]) if PAPER_TAGS: gr.Markdown(f"**Tags:** {PAPER_TAGS}", elem_classes=["paper-hero"]) with gr.Tabs(): with gr.Tab("Start here"): gr.Markdown("Pick an interaction to explore the paper quickly.", elem_classes=["hint"]) choice = gr.Radio( ["Quick summary", "How does it work?", "What are the limitations?", "Generate a tweet thread"], value="Quick summary", label="What do you want?", ) out = gr.Markdown() choice.change(start_here, inputs=choice, outputs=out) demo.load(start_here, inputs=choice, outputs=out) with gr.Tab("Overview"): gr.Markdown("## Abstract") gr.Markdown(PAPER_ABSTRACT) gr.Markdown("---") gr.Markdown("### Text search (snippet)") q = gr.Textbox(label="Find a phrase", placeholder="e.g., scalable oversight", lines=1) snippet = gr.Textbox(label="Top matching context", lines=10) def _snippet(query: str) -> str: query = (query or "").strip() if not query: return "" ctx = retrieve_context(query, k=4, max_chars=1600) return ctx or "No matches." q.change(_snippet, inputs=q, outputs=snippet) with gr.Tab("Gallery"): gr.Markdown("Extracted images / rendered page previews (if included at build time).", elem_classes=["hint"]) gallery = gr.Gallery(label="Figures / pages", columns=2, rows=2, height=520) def _gallery(): return _load_gallery_items() demo.load(_gallery, outputs=gallery) with gr.Tab("Chat"): gr.Markdown( "Ask questions. If you set `HF_TOKEN` + `PAPER_LLM_MODEL` as Space secrets, answers become generative; " "otherwise it returns top-matching snippets.", elem_classes=["hint"], ) gr.ChatInterface(fn=paper_chat, title="Chat with the Paper") with gr.Tab("Share Kit"): gr.Markdown("Generate shareable assets. Works without secrets (deterministic fallback).", elem_classes=["hint"]) kind = gr.Dropdown(["Tweet thread", "Talk outline", "FAQ"], value="Tweet thread", label="Generate") btn = gr.Button("Create") share_out = gr.Textbox(lines=14, label="Output") btn.click(lambda k: generate_share(k), inputs=kind, outputs=share_out) with gr.Tab("Model Playground"): gr.Markdown("Chat with a referenced Hub model (if any) or provide your own. Requires `HF_TOKEN` secret.", elem_classes=["hint"]) model_id = gr.Dropdown( choices=(DETECTED_MODELS if DETECTED_MODELS else []), value=(DETECTED_MODELS[0] if DETECTED_MODELS else None), label="Model id", allow_custom_value=True, ) def _model_chat_fn(message: str, history: List[Tuple[str, str]], mid: str) -> str: return model_chat(mid, message, history) gr.ChatInterface(fn=_model_chat_fn, additional_inputs=[model_id], title="Model Playground") with gr.Tab("Live Quantizer"): gr.Markdown(""" ## Think → Quantize → Transmit Type a messy, natural-language intent and watch it get quantized to a UCR anchor. This demonstrates the core Slipstream innovation: mapping free-form language onto a shared semantic manifold. """) with gr.Row(): with gr.Column(scale=2): intent_input = gr.Textbox( label="Your intent (natural language)", placeholder="Hey, I'm kinda stuck on this auth bug, can you take a look?", lines=2, ) quantize_btn = gr.Button("Quantize", variant="primary") gr.Markdown("**Try these examples:**") with gr.Row(): for ex_text, ex_anchor in EXAMPLE_INTENTS: ex_btn = gr.Button(ex_text, size="sm") ex_btn.click(lambda t=ex_text: t, outputs=intent_input) with gr.Column(scale=3): primary_out = gr.Markdown(label="Result") with gr.Accordion("Nearby Anchors", open=True): alternatives_out = gr.Markdown() wire_out = gr.Code(label="SLIP Wire Format", language=None) quantize_btn.click( quantize_intent, inputs=intent_input, outputs=[primary_out, alternatives_out, wire_out], ) intent_input.submit( quantize_intent, inputs=intent_input, outputs=[primary_out, alternatives_out, wire_out], ) gr.Markdown("---\nBuilt with Gradio on Hugging Face Spaces.") if __name__ == "__main__": demo.launch()