Spaces:

theostos
/

LLM4Docq-retrieval

Sleeping

App Files Files Community

theostos commited on Oct 14, 2025

Commit

0a5e5a2

1 Parent(s): fb3c11d

initial commit

Browse files

Files changed (3) hide show

.gitignore +1 -0
app.py +230 -0
requirements.txt +5 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .vscode

app.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import functools
+import json
+import os
+import textwrap
+from typing import List, Dict, Any
+import gradio as gr
+import requests
+import torch
+import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoModel
+# -----------------------------
+# Embedding utilities (from your snippet, with a couple of safety tweaks)
+# -----------------------------
+def last_token_pool(last_hidden_states: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
+    left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
+    if left_padding:
+        return last_hidden_states[:, -1]
+    else:
+        sequence_lengths = attention_mask.sum(dim=1) - 1
+        batch_size = last_hidden_states.shape[0]
+        return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
+def get_detailed_instruct(task_description: str, query: str) -> str:
+    return f"Instruct: {task_description}\nQuery: {query}"
+class Qwen3Embedding:
+    def __init__(self, device: str, size: str = "0.6B"):
+        assert size in ["0.6B", "4B", "8B"]
+        model_id = "Qwen/Qwen3-Embedding-" + size
+        self.device = device
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+        # Use bfloat16 on GPU, float32 on CPU (safer on Spaces CPU)
+        dtype = torch.bfloat16 if device != "cpu" else torch.float32
+        self.model = AutoModel.from_pretrained(model_id, trust_remote_code=True).to(device, dtype=dtype)
+        self.prompt_query = (
+            "Given a natural language query, retrieve formal Coq statements whose docstrings "
+            "best match the intent of the query."
+        )
+    @torch.inference_mode()
+    def generate(self, sentence: str, is_query: bool = False) -> torch.Tensor:
+        input_text = get_detailed_instruct(self.prompt_query, sentence) if is_query else sentence
+        batch_dict = self.tokenizer(input_text, padding=True, truncation=True, return_tensors="pt").to(self.device)
+        outputs = self.model(**batch_dict)
+        embeddings = last_token_pool(outputs.last_hidden_state, batch_dict["attention_mask"])
+        embeddings = F.normalize(embeddings, p=2, dim=1)
+        return embeddings
+    def name(self) -> str:
+        return "qwen_embedding_base"
+@functools.lru_cache(maxsize=3)
+def get_embedder() -> Qwen3Embedding:
+    return Qwen3Embedding(device="cpu", size="4B")
+# -----------------------------
+# Backend call
+# -----------------------------
+def call_retrieval_service(
+    server_url: str, embedding: List[float], top_k: int, timeout: int = 60
+) -> List[Dict[str, Any]]:
+    if server_url.endswith("/"):
+        server_url = server_url[:-1]
+    url = f"{server_url}/query"
+    payload = {"query": [embedding], "top_k": int(top_k)}
+    resp = requests.post(url, json=payload, timeout=timeout)
+    resp.raise_for_status()
+    data = resp.json()
+    if not isinstance(data, list):
+        raise ValueError("Unexpected response format: expected a list of entries.")
+    return data
+# -----------------------------
+# Formatting helpers
+# -----------------------------
+def _html_escape(s: str) -> str:
+    return (
+        s.replace("&", "&amp;")
+        .replace("<", "&lt;")
+        .replace(">", "&gt;")
+    )
+def render_results(items: List[Dict[str, Any]]) -> str:
+    if not items:
+        return "<div>No results.</div>"
+    rows = []
+    for idx, it in enumerate(items, start=1):
+        score = it.get("score", 0.0)
+        name = it.get("name", "")
+        kind = it.get("kind", "")
+        doc = it.get("docstring", "") or ""
+        location = it.get("location", "") or ""
+        # Trim long docstrings for the summary line
+        summary = " ".join(doc.strip().split())
+        if len(summary) > 240:
+            summary = summary[:240].rstrip() + "…"
+        block = f"""
+        <div class="result-card">
+          <div class="header">
+            <span class="rank">#{idx}</span>
+            <code class="name">{_html_escape(name)}</code>
+            <span class="meta">[{_html_escape(kind)}] · score={score:.4f}</span>
+          </div>
+          <div class="location">in {_html_escape(location)}</div>
+          <details class="doc">
+            <summary>{_html_escape(summary or "(no docstring)")}</summary>
+            <pre>{_html_escape(doc)}</pre>
+          </details>
+        </div>
+        """
+        rows.append(block)
+    style = """
+    <style>
+      .result-card {border: 1px solid rgba(0,0,0,.08); padding: 12px 14px; border-radius: 12px; margin-bottom: 12px;}
+      .header {display:flex; gap:10px; align-items:center; flex-wrap:wrap;}
+      .rank {font-weight: 700;}
+      .name {font-size: 0.95rem; background: rgba(0,0,0,.03); padding: 2px 6px; border-radius: 6px;}
+      .meta {opacity: 0.7;}
+      .location {font-size: 0.9rem; opacity: 0.8; margin: 4px 0 8px;}
+      details.doc summary {cursor: pointer; font-weight: 500; margin-bottom: 6px;}
+      details.doc pre {white-space: pre-wrap; background: rgba(0,0,0,.02); padding: 10px; border-radius: 8px;}
+    </style>
+    """
+    return style + "\n".join(rows)
+# -----------------------------
+# Gradio app
+# -----------------------------
+DEFAULT_SERVER = os.environ.get("COSIM_SERVER_URL", "https://theostos-llm4docq-cosim.hf.space")
+def search(
+    query: str,
+    top_k: int,
+    server_url: str,
+    show_raw: bool,
+) -> List[Any]:
+    query = (query or "").strip()
+    if not query:
+        return [gr.update(value="<div>Please enter a query.</div>"), None]
+    try:
+        embedder = get_embedder()
+        with torch.inference_mode():
+            emb = embedder.generate(query, is_query=True)
+        # Convert to plain list[float]
+        emb_list = emb[0].detach().to(torch.float32).cpu().tolist()
+        items = call_retrieval_service(server_url, emb_list, top_k)
+        html = render_results(items)
+        if show_raw:
+            return [html, items]
+        else:
+            return [html, None]
+    except requests.exceptions.RequestException as e:
+        msg = f"<div style='color:#b00020'>Request error: {_html_escape(str(e))}</div>"
+        return [msg, None]
+    except RuntimeError as e:
+        # Often OOM or dtype issues
+        tip = " (Try CPU / smaller model size.)"
+        msg = f"<div style='color:#b00020'>Runtime error: {_html_escape(str(e))}{_html_escape(tip)}</div>"
+        return [msg, None]
+    except Exception as e:
+        msg = f"<div style='color:#b00020'>Unexpected error: {_html_escape(str(e))}</div>"
+        return [msg, None]
+with gr.Blocks(title="MathComp Retrieval (Qwen3 Embedding → Cosim)", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        """
+        # 🔎 MathComp Retrieval
+        Embed your natural-language query with **Qwen3-Embedding** and fetch nearest MathComp items from your retrieval server.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=3):
+            query = gr.Textbox(
+                label="Query",
+                placeholder="e.g., reasoning about commutative group morphisms",
+                lines=3,
+                autofocus=True,
+            )
+            with gr.Row():
+                top_k = gr.Slider(1, 50, value=5, step=1, label="top_k")
+            with gr.Accordion("Advanced", open=False):
+                server_url = gr.Textbox(value=DEFAULT_SERVER, label="Retrieval server URL")
+                show_raw = gr.Checkbox(value=False, label="Also show raw JSON response")
+            with gr.Row():
+                run_btn = gr.Button("Search", variant="primary")
+                clear_btn = gr.Button("Clear")
+        with gr.Column(scale=4):
+            pretty = gr.HTML(label="Results")
+            raw_json = gr.JSON(label="Raw JSON", visible=False)
+    def on_toggle_raw(show: bool):
+        return gr.update(visible=show)
+    show_raw.change(fn=on_toggle_raw, inputs=show_raw, outputs=raw_json)
+    run_btn.click(
+        fn=search,
+        inputs=[query, top_k, server_url, show_raw],
+        outputs=[pretty, raw_json],
+        api_name="search",
+    )
+    clear_btn.click(lambda: ("", 5, "0.6B", True, DEFAULT_SERVER, False, "<div/>", None),
+                    inputs=None,
+                    outputs=[query, top_k, server_url, show_raw, pretty, raw_json])
+    gr.Examples(
+        examples=[
+            ["polynomial division lemma for ringType"],
+            ["matrix rank properties over finite fields"],
+            ["group homomorphism kernel characterization"],
+            ["bigop lemmas about summation reindexing"],
+        ],
+        inputs=[query],
+        label="Try these",
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio>=4.38.0
+transformers>=4.41.0
+torch>=2.3.0
+accelerate>=0.30.0
+requests>=2.31.0