Spaces:

apsora
/

cad_chatbox

Sleeping

App Files Files Community

apsora commited on Dec 5, 2025

Commit

b7c755d

verified ·

1 Parent(s): ba3fc56

Update app.py

Browse files

Files changed (1) hide show

app.py +1207 -49

app.py CHANGED Viewed

@@ -1,69 +1,1227 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
     """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
     """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
         messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
 """
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
         ),
-    ],
-)
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
 if __name__ == "__main__":

+import json
+import re
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Tuple
+import numpy as np
+import torch
+from PIL import Image
 import gradio as gr
+from transformers import AutoProcessor, AutoModelForVision2Seq
+import spaces  # <-- needed for Stateless GPU / zeroGPU
+# ---------------------------------------------------------------------
+# Minimal GPU-decorated function so Stateless GPU doesn't error out
+# ---------------------------------------------------------------------
+@spaces.GPU
+def gpu_ping() -> str:
+    """
+    Dummy GPU endpoint so Hugging Face Stateless GPU / zeroGPU
+    detects at least one @spaces.GPU function.
+    We don't actually use this in the app logic. It just keeps
+    the Space from throwing:
+    'No @spaces.GPU function detected during startup'.
     """
+    return "gpu_ready"
+# ============================================================
+# 0.  Model + guidelines setup
+# ============================================================
+# NOTE: we keep everything on CPU here to avoid touching CUDA
+# in the main process (required for Stateless GPU).
+DEVICE = "cpu"
+DTYPE = torch.float32
+MODEL_NAME = "maryzhang/qwen3vl-guideline-lora-model"
+print(f"Loading unified vision+text model {MODEL_NAME} on {DEVICE}", flush=True)
+model_vlm = AutoModelForVision2Seq.from_pretrained(
+    MODEL_NAME,
+    dtype=DTYPE,
+    trust_remote_code=True,
+)
+model_vlm.to(DEVICE)
+model_vlm.eval()
+processor_vlm = AutoProcessor.from_pretrained(
+    MODEL_NAME,
+    trust_remote_code=True,
+)
+GUIDELINES_PATH = "guidelines_final.json"
+def load_guidelines(path: str) -> List[Dict[str, Any]]:
+    """
+    Robust loader for guidelines_final.json.
+    Accepts:
+      - a big sequence of JSON objects (your current format)
+      - or a single list
+      - or {"guidelines": [...]}
+    Returns flat list of dicts that contain "guideline_id".
     """
+    with open(path, "r") as f:
+        raw = f.read()
+    raw = raw.strip()
+    if not raw:
+        raise ValueError("guidelines_final.json is empty.")
+    decoder = json.JSONDecoder()
+    pos = 0
+    length = len(raw)
+    objects: List[Any] = []
+    # collect all JSON fragments
+    while pos < length:
+        while pos < length and raw[pos].isspace():
+            pos += 1
+        if pos >= length:
+            break
+        try:
+            obj, end = decoder.raw_decode(raw, pos)
+        except json.JSONDecodeError:
+            pos += 1
+            continue
+        objects.append(obj)
+        pos = end
+    if not objects:
+        raise ValueError("No JSON fragments found in guidelines_final.json")
+    candidates: List[Any] = []
+    for obj in objects:
+        if isinstance(obj, list):
+            candidates.extend(obj)
+        elif isinstance(obj, dict) and isinstance(obj.get("guidelines"), list):
+            candidates.extend(obj["guidelines"])
+        elif isinstance(obj, dict):
+            candidates.append(obj)
+    guidelines: List[Dict[str, Any]] = []
+    for c in candidates:
+        if isinstance(c, dict) and "guideline_id" in c:
+            guidelines.append(c)
+    if not guidelines:
+        raise ValueError("Found JSON but no objects with 'guideline_id' field.")
+    return guidelines
+ALL_GUIDELINES: List[Dict[str, Any]] = load_guidelines(GUIDELINES_PATH)
+GUIDELINE_BY_ID: Dict[str, Dict[str, Any]] = {g["guideline_id"]: g for g in ALL_GUIDELINES}
+print(f"Loaded {len(ALL_GUIDELINES)} guidelines", flush=True)
+# ============================================================
+# 1.  Core LLM helpers (text-only + vision)
+# ============================================================
+def run_text_llm(system_prompt: str, user_prompt: str, max_new_tokens: int = 768) -> str:
+    """
+    Use Qwen3-VL (LoRA) in text-only mode.
+    """
+    messages = [
+        {"role": "system", "content": [{"type": "text", "text": system_prompt}]},
+        {"role": "user", "content": [{"type": "text", "text": user_prompt}]},
+    ]
+    prompt_text = processor_vlm.apply_chat_template(
         messages,
+        tokenize=False,
+        add_generation_prompt=True,
+    )
+    inputs = processor_vlm(
+        text=prompt_text,
+        return_tensors="pt",
+    ).to(DEVICE)
+    with torch.no_grad():
+        output_ids = model_vlm.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            temperature=0.0,
+            do_sample=False,
+        )
+    generated = processor_vlm.decode(
+        output_ids[0],
+        skip_special_tokens=True,
+    ).strip()
+    return generated
+def vlm_generate_json_from_images(
+    prompt: str,
+    images: List[Image.Image],
+) -> Dict[str, Any]:
+    """
+    Call Qwen3-VL with images and ask it to return STRICT JSON.
+    """
+    if not images:
+        images = [Image.new("RGB", (64, 64), "white")]
+    content = [{"type": "image"} for _ in images]
+    content.append({"type": "text", "text": prompt})
+    messages = [{"role": "user", "content": content}]
+    prompt_text = processor_vlm.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True,
+    )
+    inputs = processor_vlm(
+        text=prompt_text,
+        images=images,
+        return_tensors="pt",
+    ).to(DEVICE)
+    with torch.no_grad():
+        output_ids = model_vlm.generate(
+            **inputs,
+            max_new_tokens=512,
+            temperature=0.0,
+            do_sample=False,
+        )
+    generated = processor_vlm.decode(
+        output_ids[0],
+        skip_special_tokens=True,
+    ).strip()
+    m = re.search(r"\{.*\}", generated, re.DOTALL)
+    if m:
+        try:
+            return json.loads(m.group(0))
+        except Exception:
+            pass
+    return {"parse_error": True, "raw": generated}
+# ============================================================
+# 2.  Feature extraction & guideline selection
+# ============================================================
+FEATURE_PROMPT = """
+You are assisting with manufacturability and GD&T review.
+Given these 1–3 CAD / drawing images, return a JSON object with:
+{
+  "image_type": "cad_model" | "dimensioned_drawing" | "photo" | "other",
+  "has_gdt": bool,
+  "has_dimensions": bool,
+  "features": {
+    "holes": int,
+    "vertical_faces": bool,
+    "possible_draft": bool,
+    "ribs": int,
+    "fillets": bool,
+    "chamfers": bool,
+    "datum_symbols": ["A", "B"],
+    "gdt_frames_present": bool,
+    "text_dimensions_present": bool
+  },
+  "raw_notes": "short human-readable notes about what you see",
+  "generated_description": "one-sentence description of the part/drawing",
+  "suggested_guidelines": []
+}
+Rules:
+- Infer only what is visible or strongly implied.
+- Keep numbers rough (e.g., count of holes), not exact metrology.
+- Only output valid JSON. No explanation outside the JSON.
+- Do NOT hard-code any specific guideline IDs.
 """
+def extract_visual_features(images: List[Image.Image]) -> Dict[str, Any]:
+    if not images:
+        return {
+            "image_type": "",
+            "has_gdt": False,
+            "has_dimensions": False,
+            "features": {
+                "holes": 0,
+                "vertical_faces": False,
+                "possible_draft": False,
+                "ribs": 0,
+                "fillets": False,
+                "chamfers": False,
+                "datum_symbols": [],
+                "gdt_frames_present": False,
+                "text_dimensions_present": False,
+            },
+            "raw_notes": "",
+            "generated_description": "",
+            "suggested_guidelines": [],
+        }
+    vlm_json = vlm_generate_json_from_images(FEATURE_PROMPT, images)
+    return {
+        "image_type": vlm_json.get("image_type", ""),
+        "has_gdt": vlm_json.get("has_gdt", False),
+        "has_dimensions": vlm_json.get("has_dimensions", False),
+        "features": vlm_json.get("features", {}),
+        "raw_notes": vlm_json.get("raw_notes", ""),
+        "generated_description": vlm_json.get("generated_description", ""),
+        "suggested_guidelines": vlm_json.get("suggested_guidelines", []),
+    }
+def rag_retrieve(query: str, top_k: int = 6) -> List[Dict[str, Any]]:
+    """
+    Tiny RAG over the 20 guidelines.
+    Now also includes pass_fail_logic in the searchable blob so the
+    evaluator can "see" the numeric rules.
+    """
+    q = (query or "").lower()
+    if not q.strip():
+        return []
+    scored = []
+    for g in ALL_GUIDELINES:
+        pfl = g.get("pass_fail_logic") or {}
+        pfl_text = " ".join(
+            f"{k}: {v}" for k, v in pfl.items()
+        )
+        blob = " ".join(
+            [
+                g.get("topic", ""),
+                " ".join(g.get("evaluation_criteria", []) or []),
+                " ".join(g.get("expected_answers", []) or []),
+                pfl_text,
+            ]
+        ).lower()
+        score = sum(token in blob for token in q.split())
+        if score > 0:
+            scored.append((score, g))
+    scored.sort(key=lambda x: x[0], reverse=True)
+    hits = []
+    for score, g in scored[:top_k]:
+        pfl = g.get("pass_fail_logic") or {}
+        pfl_text = " ".join(
+            f"{k}: {v}" for k, v in pfl.items()
+        )
+        text = (
+            " ".join(g.get("evaluation_criteria", []) or [])
+            or " ".join(g.get("expected_answers", []) or [])
+            or pfl_text
+        )
+        hits.append(
+            {
+                "source": "guideline",
+                "text": text,
+                "meta": {
+                    "guideline_id": g["guideline_id"],
+                    "topic": g.get("topic", ""),
+                },
+            }
+        )
+    return hits
+def classify_mode(description: str, feature_summary: Dict[str, Any]) -> str:
+    desc_lower = (description or "").lower()
+    feats = feature_summary.get("features", {})
+    image_type = (feature_summary.get("image_type") or "").lower()
+    has_gdt_flag = bool(feature_summary.get("has_gdt"))
+    has_dims_flag = bool(feature_summary.get("has_dimensions"))
+    has_datum = bool(feats.get("datum_symbols"))
+    has_gdt_feat = feats.get("gdt_frames_present", False)
+    cad_like_words = ["cad", "model", "solid", "surface", "bottle", "housing", "rib"]
+    drawing_like_words = ["drawing", "dimension", "tolerance"]
+    has_cad_words = any(w in desc_lower for w in cad_like_words)
+    has_drawing_words = any(w in desc_lower for w in drawing_like_words)
+    gd_signals = any(
+        [
+            image_type == "dimensioned_drawing",
+            has_gdt_flag,
+            has_gdt_feat,
+            has_datum,
+            has_dims_flag,
+            has_drawing_words,
+        ]
+    )
+    cad_signals = any(
+        [
+            image_type == "cad_model",
+            has_cad_words,
+        ]
+    )
+    if gd_signals and cad_signals:
+        return "mixed"
+    if gd_signals:
+        return "gdt"
+    if cad_signals:
+        return "dfm"
+    return "dfm"
+def select_applicable_guidelines(
+    feature_summary: Dict[str, Any],
+    description: str,
+    max_guidelines: int = 5,
+) -> List[Dict[str, Any]]:
+    """
+    Choose a subset of guidelines out of all 20, based on dfm/gdt mode.
+    Returns lightweight dicts (guideline_id + topic), but the evaluator
+    will later look up the full objects from GUIDELINE_BY_ID.
+    """
+    mode = classify_mode(description, feature_summary)
+    suggestions = feature_summary.get("suggested_guidelines") or []
+    def category_of(g: Dict[str, Any]) -> str:
+        cat = (g.get("category") or "").lower()
+        if cat in ("dfm", "gdt"):
+            return cat
+        gid = (g.get("guideline_id") or "").upper()
+        if gid.startswith("D"):
+            return "dfm"
+        if gid.startswith("G"):
+            return "gdt"
+        return ""
+    picked: List[Dict[str, Any]] = []
+    suggested_ids = set()
+    # 1) honour any suggested_guidelines (if they match the mode)
+    for s in suggestions:
+        gid = s.get("guideline_id")
+        if not gid:
+            continue
+        g = GUIDELINE_BY_ID.get(gid)
+        if not g:
+            continue
+        cat = category_of(g)
+        if mode == "gdt" and cat != "gdt":
+            continue
+        if mode == "dfm" and cat != "dfm":
+            continue
+        picked.append({"guideline_id": gid, "topic": g.get("topic", "")})
+        suggested_ids.add(gid)
+    # 2) fill in from ALL_GUIDELINES based on mode
+    for g in ALL_GUIDELINES:
+        gid = g["guideline_id"]
+        if gid in suggested_ids:
+            continue
+        cat = category_of(g)
+        if mode == "gdt" and cat == "gdt":
+            picked.append({"guideline_id": gid, "topic": g["topic"]})
+        elif mode == "dfm" and cat == "dfm":
+            picked.append({"guideline_id": gid, "topic": g["topic"]})
+        elif mode == "mixed" and cat in ("gdt", "dfm"):
+            picked.append({"guideline_id": gid, "topic": g["topic"]})
+    # 3) in mixed mode, bias GD&T first
+    if mode == "mixed":
+        def is_gdt(gid: str) -> bool:
+            g = GUIDELINE_BY_ID.get(gid, {})
+            return category_of(g) == "gdt"
+        picked.sort(key=lambda x: 0 if is_gdt(x["guideline_id"]) else 1)
+    return picked[:max_guidelines]
+# ============================================================
+# 3.  Evaluation utilities
+# ============================================================
+def extract_json_from_text(text: str) -> Dict[str, Any]:
+    m = re.search(r"\{.*\}", text, re.DOTALL)
+    if not m:
+        return {"parse_error": True, "raw": text}
+    try:
+        return json.loads(m.group(0))
+    except Exception:
+        return {"parse_error": True, "raw": text}
+def downgrade_if_no_measurements(
+    eval_json: Dict[str, Any],
+    qa_text: str,
+) -> Dict[str, Any]:
+    q_lower = (qa_text or "").lower()
+    no_data = any(
+        phrase in q_lower
+        for phrase in [
+            "no measurement data",
+            "no measured data",
+            "assume 0 mm",
+            "assume zero",
+            "no cmm data",
+        ]
+    )
+    if not no_data:
+        return eval_json
+    sensitive_topics = [
+        "True Position",
+        "Profile",
+        "Flatness",
+        "Concentricity",
+        "Runout",
+        "Cylindricity",
+        "Circularity",
+    ]
+    for g in eval_json.get("guidelines", []):
+        topic = g.get("topic", "")
+        if any(t in topic for t in sensitive_topics):
+            g["result"] = "NEEDS_INFO"
+            g["reason"] = (
+                "This guideline depends on measurement data, and you mentioned that "
+                "measurements are not available yet. That's completely fine at the "
+                "design stage, so this is marked as NEEDS_INFO rather than PASS/FAIL."
+            )
+            g["recommendation"] = (
+                "Once you have inspection or simulation data, you can re-run this check "
+                "to confirm the tolerance is still realistic."
+            )
+    return eval_json
+def calibrate_eval_scores(eval_json: Dict[str, Any]) -> Dict[str, Any]:
+    guidelines = eval_json.get("guidelines", [])
+    eval_json.setdefault("overall", {})
+    if not guidelines:
+        eval_json["overall"].update(
+            {
+                "summary": "No guidelines were evaluated.",
+                "verdict": "NEEDS_MORE_DATA",
+                "manufacturability_score": 0.6,
+            }
+        )
+        return eval_json
+    weights = {"PASS": 1.0, "NEEDS_INFO": 0.7, "FAIL": 0.0}
+    results = [g.get("result", "NEEDS_INFO") for g in guidelines]
+    if all(r == "NEEDS_INFO" for r in results):
+        eval_json["overall"].update(
+            {
+                "summary": (
+                    "All guidelines are marked as NEEDS_INFO for now because some data "
+                    "is missing. That's okay—this just means more information will make "
+                    "the review stronger later."
+                ),
+                "verdict": "NEEDS_MORE_DATA",
+                "manufacturability_score": 0.65,
+            }
+        )
+        return eval_json
+    scores = [weights.get(r, 0.7) for r in results]
+    avg = sum(scores) / len(scores)
+    if avg > 0.9:
+        verdict = "GOOD"
+    elif avg > 0.75:
+        verdict = "ACCEPTABLE"
+    elif avg > 0.6:
+        verdict = "RISKY"
+    else:
+        verdict = "NEEDS_MORE_DATA"
+    eval_json["overall"].update(
+        {
+            "summary": (
+                "Automatic manufacturability summary based on the "
+                "reviewed guidelines."
+            ),
+            "verdict": verdict,
+            "manufacturability_score": round(float(avg), 2),
+        }
+    )
+    return eval_json
+def sanitize_eval_language(
+    eval_json: Dict[str, Any],
+    description: str,
+    feature_summary: Dict[str, Any],
+) -> Dict[str, Any]:
+    desc_lower = (description or "").lower()
+    feats = feature_summary.get("features", {})
+    is_machined = any(
+        w in desc_lower for w in ["machined", "cnc", "turned", "lathe", "ground"]
+    )
+    is_molded_like = feats.get("possible_draft", False) or any(
+        w in desc_lower for w in ["mold", "mould", "injection", "cast", "die cast"]
+    )
+    guideline_explanations = {
+        "True Position Tolerance": (
+            "True position helps ensure that holes or pins line up correctly in "
+            "assembly, so parts fit together without binding or excessive play."
         ),
+        "Profile Tolerance": (
+            "Profile controls how closely a surface matches its ideal CAD shape. "
+            "This matters a lot for sealing, smooth airflow, and consistent contact."
+        ),
+        "Flatness": (
+            "Flatness makes sure a surface does not bow or warp, which is important "
+            "for good sealing and accurate mounting faces."
+        ),
+        "Concentricity": (
+            "Concentricity ensures that different cylindrical features share the same "
+            "axis. This is crucial for rotating parts, shafts, and precision fits."
+        ),
+    }
+    encouraging_phrases = {
+        "PASS": (
+            "Nice work—this guideline looks solid. If you want to go further, you "
+            "could explore tolerance stack-ups or measurement planning for production."
+        ),
+        "NEEDS_INFO": (
+            "This isn’t a failure—it just means more information (like measurements "
+            "or simulation results) would help finish the story."
+        ),
+        "FAIL": (
+            "This might cause manufacturability or inspection challenges, but it's a "
+            "great opportunity to iterate and improve the design early."
+        ),
+    }
+    for g in eval_json.get("guidelines", []):
+        topic = g.get("topic", "")
+        result = g.get("result", "NEEDS_INFO")
+        if topic in guideline_explanations:
+            g["why_it_matters"] = guideline_explanations[topic]
+        g.setdefault("recommendation", "")
+        g["recommendation"] = (g["recommendation"] or "").strip()
+        extra = encouraging_phrases.get(result)
+        if extra:
+            if g["recommendation"]:
+                g["recommendation"] += " "
+            g["recommendation"] += extra
+        # clean out weird generic ranges / hole size hallucinations
+        for key in ["reason", "recommendation"]:
+            text = g.get(key, "")
+            if not isinstance(text, str):
+                continue
+            sentences = re.split(r"(?<=[.!?])\s+", text)
+            cleaned_sents = []
+            for s in sentences:
+                s_lower = s.lower()
+                if (
+                    "typical range" in s_lower
+                    or "small holes" in s_lower
+                    or "< 5 mm" in s_lower
+                    or "less than 5 mm" in s_lower
+                ):
+                    continue
+                cleaned_sents.append(s)
+            new_text = " ".join(cleaned_sents).strip()
+            if is_machined and not is_molded_like:
+                new_text = (
+                    new_text.replace(
+                        "molding process capabilities",
+                        "machining process capabilities",
+                    )
+                    .replace("molding process capability", "machining process capability")
+                    .replace("molding process", "machining process")
+                )
+            g[key] = new_text
+    overall = eval_json.get("overall", {})
+    if overall.get("verdict") == "POOR":
+        overall["verdict"] = "NEEDS_MORE_DATA"
+        overall["summary"] = (
+            "Some guidelines look challenging with the current information, but that "
+            "just means there is room to refine the design and collect more data."
+        )
+    eval_json["overall"] = overall
+    return eval_json
+def evaluation_agent_txt(
+    description: str,
+    guidelines: List[Dict[str, Any]],
+    qa_text: str,
+    feature_summary: Dict[str, Any],
+) -> Dict[str, Any]:
+    """
+    Core evaluator: this is where we now pass in:
+    - evaluation_criteria
+    - expected_answers
+    - pass_fail_logic
+    for EACH guideline, so the model can truly reason over your 20 rules.
+    """
+    # Enrich guideline objects from the global GUIDELINE_BY_ID
+    enriched_guidelines = []
+    for g in guidelines:
+        gid = g.get("guideline_id")
+        base = GUIDELINE_BY_ID.get(gid, {})
+        enriched_guidelines.append(
+            {
+                "guideline_id": gid,
+                "topic": base.get("topic", g.get("topic", "")),
+                "category": base.get("category", ""),
+                "evaluation_criteria": base.get("evaluation_criteria", []),
+                "user_questions": base.get("user_questions", []),
+                "expected_answers": base.get("expected_answers", []),
+                "pass_fail_logic": base.get("pass_fail_logic", {}),
+            }
+        )
+    rag_query_text = " ".join(
+        [
+            description or "",
+            qa_text or "",
+            json.dumps(feature_summary.get("features", {})),
+        ]
+    )
+    rag_hits = rag_retrieve(rag_query_text, top_k=6)
+    rag_context_lines = []
+    for h in rag_hits:
+        meta = h.get("meta", {})
+        gid = meta.get("guideline_id", "UNKNOWN")
+        topic = meta.get("topic", "")
+        rag_context_lines.append(f"[GUIDELINE {gid} - {topic}]\n{h['text']}")
+    rag_context = (
+        "\n\n---\n\n".join(rag_context_lines)
+        if rag_context_lines
+        else "(no extra context)"
+    )
+    sys_prompt = (
+        "You are a senior manufacturing / GD&T engineer and a patient instructor.\n"
+        "You are given:\n"
+        "- An optional short description of the part/product\n"
+        "- A set of DFM/GD&T guidelines to apply (including evaluation_criteria,\n"
+        "  expected_answers, and pass_fail_logic for each guideline)\n"
+        "- A Q&A history where the student answered questions about each guideline\n"
+        "- A feature summary extracted from CAD/drawing images\n"
+        "- Additional reference passages from a guideline knowledge base (RAG)\n\n"
+        "Your goals:\n"
+        "1) For EACH guideline, use the student's numeric/text answers and the\n"
+        "   'pass_fail_logic' rules to decide whether the guideline is PASS, FAIL,\n"
+        "   or NEEDS_INFO.\n"
+        "   • PASS = clearly satisfies the numeric / logical rules.\n"
+        "   • FAIL = clearly violates at least one rule in pass_fail_logic.\n"
+        "   • NEEDS_INFO = only if you truly cannot tell from the Q&A + features.\n"
+        "2) Refer directly to the variables in pass_fail_logic (e.g., nominal_wall,\n"
+        "   variation, rib_or_boss_thickness) and the numbers in the Q&A when\n"
+        "   making decisions. Treat the rules as engineering check equations.\n"
+        "3) Explain briefly WHY in clear engineering language.\n"
+        "4) Offer encouraging, actionable recommendations—talk like a helpful TA.\n"
+        "5) Comment qualitatively on tolerance feasibility in the 'overall' block.\n\n"
+        "IMPORTANT:\n"
+        "- You MUST try to produce PASS or FAIL when the numeric conditions are\n"
+        "  clearly satisfied or violated. Do NOT default to NEEDS_INFO if the\n"
+        "  student already provided the key numbers.\n"
+        "- Only use NEEDS_INFO when the data is genuinely missing or ambiguous.\n\n"
+        "Respond ONLY as a single JSON object with this schema:\n"
+        "{\n"
+        '  "guidelines": [\n'
+        "    {\n"
+        '      "guideline_id": str,\n'
+        '      "topic": str,\n'
+        '      "result": "PASS" | "FAIL" | "NEEDS_INFO",\n'
+        '      "reason": str,\n'
+        '      "recommendation": str\n'
+        "    }\n"
+        "  ],\n"
+        '  "overall": {\n'
+        '    "summary": str,\n'
+        '    "verdict": "GOOD" | "ACCEPTABLE" | "RISKY" | "NEEDS_MORE_DATA",\n'
+        '    "manufacturability_score": float\n'
+        "  }\n"
+        "}\n"
+    )
+    user_parts = [
+        "DESCRIPTION:",
+        description or "(none provided)",
+        "\n\nGUIDELINES UNDER REVIEW (with criteria and logic):",
+        json.dumps(enriched_guidelines, indent=2),
+        "\n\nQ&A HISTORY (questions and answers as free text):",
+        qa_text or "(no questions asked yet)",
+        "\n\nFEATURE SUMMARY FROM IMAGE(S):",
+        json.dumps(feature_summary, indent=2),
+        "\n\nRETRIEVED REFERENCES (RAG):",
+        rag_context,
+        "\n\nProduce ONLY the JSON object.",
+    ]
+    user_prompt = "\n".join(user_parts)
+    raw = run_text_llm(sys_prompt, user_prompt, max_new_tokens=1024)
+    eval_json = extract_json_from_text(raw)
+    if not eval_json.get("parse_error"):
+        eval_json = downgrade_if_no_measurements(eval_json, qa_text)
+        eval_json = calibrate_eval_scores(eval_json)
+        eval_json = sanitize_eval_language(eval_json, description, feature_summary)
+    return eval_json
+def summarize_eval_for_student(eval_json: Dict[str, Any]) -> str:
+    guidelines = eval_json.get("guidelines", [])
+    overall = eval_json.get("overall", {})
+    lines: List[str] = []
+    lines.append(
+        "Thanks, that’s all the questions I needed for now. "
+        "Here’s your manufacturability snapshot based on those answers:"
+    )
+    lines.append("")
+    score = overall.get("manufacturability_score")
+    verdict = overall.get("verdict")
+    summary = overall.get("summary", "")
+    if score is not None or verdict:
+        headline = "• Overall verdict: "
+        if verdict:
+            headline += str(verdict)
+        if score is not None:
+            headline += f" (score ≈ {score:.2f})"
+        lines.append(headline)
+    if summary:
+        lines.append(f"• Summary: {summary}")
+    lines.append("")
+    if guidelines:
+        lines.append("Guideline-by-guideline notes:")
+        for g in guidelines:
+            topic = g.get("topic", "Unnamed guideline")
+            result = g.get("result", "NEEDS_INFO")
+            reason = g.get("reason", "")
+            rec = g.get("recommendation", "")
+            lines.append(f"- {topic} → {result}")
+            if reason:
+                lines.append(f"    • Why: {reason}")
+            if rec:
+                lines.append(f"    • Suggestion: {rec}")
+    else:
+        lines.append(
+            "I wasn’t able to evaluate any specific guidelines, likely because "
+            "we didn’t get enough structured answers."
+        )
+    lines.append("")
+    lines.append(
+        "If you’d like to see the raw JSON data for debugging or research, "
+        "you can ask: “show me the JSON summary.”"
+    )
+    return "\n".join(lines)
+# ============================================================
+# 4.  Conversation state & router
+# ============================================================
+@dataclass
+class GuidelineConversationState:
+    selected_guidelines: List[Dict[str, Any]] = field(default_factory=list)
+    current_guideline_idx: int = 0
+    qa_log: List[Tuple[str, str]] = field(default_factory=list)
+    max_questions: int = 8
+    questions_asked: int = 0
+    feature_summary: Dict[str, Any] = field(default_factory=dict)
+    description: str = ""
+def current_guideline(
+    state: GuidelineConversationState,
+) -> Optional[Dict[str, Any]]:
+    if 0 <= state.current_guideline_idx < len(state.selected_guidelines):
+        return state.selected_guidelines[state.current_guideline_idx]
+    return None
+def build_intro_message(
+    description: str,
+    feature_summary: Dict[str, Any],
+    selected_guidelines: List[Dict[str, Any]],
+    max_questions: int,
+) -> str:
+    gen_desc = feature_summary.get("generated_description") or ""
+    raw_notes = feature_summary.get("raw_notes") or ""
+    desc_bits = []
+    if gen_desc:
+        desc_bits.append(gen_desc)
+    if description:
+        desc_bits.append(description)
+    if raw_notes:
+        desc_bits.append(raw_notes)
+    combined_desc = (
+        " ".join(desc_bits)
+        if desc_bits
+        else "I’ll infer as much as I can directly from your image."
+    )
+    guideline_topics = [g["topic"] for g in selected_guidelines]
+    guideline_list_str = (
+        ", ".join(guideline_topics)
+        if guideline_topics
+        else "a small set of relevant DFM/GD&T rules"
+    )
+    intro = (
+        f"{combined_desc}\n\n"
+        "Based on this, I’ll walk you through a short manufacturability review.\n"
+        f"We’ll look at these guidelines: {guideline_list_str}.\n"
+        "I’ll ask at most ~"
+        f"{max_questions} focused questions, and then summarize how "
+        "manufacturable this design looks and where you could improve it.\n\n"
+        "Let’s start with the first guideline."
+    )
+    return intro
+def get_guideline_questions(gid: str) -> List[str]:
+    g = GUIDELINE_BY_ID.get(gid)
+    if not g:
+        return []
+    qs = g.get("user_questions") or g.get("questions") or []
+    out = []
+    for q in qs:
+        if isinstance(q, str):
+            out.append(q)
+        elif isinstance(q, dict) and "question" in q:
+            out.append(q["question"])
+    return out
+def classify_user_turn(user_text: str, last_question: str) -> str:
+    """
+    Tiny router: is the user answering the guideline question,
+    or asking their own side question?
+    Returns "answer" or "student_question".
+    """
+    sys_prompt = (
+        "You are a routing model for a tutoring chat about DFM/GD&T.\n"
+        "Given the last question asked by the tutor and the student's reply,\n"
+        "decide if the student is primarily ANSWERING the question, or asking a new\n"
+        "QUESTION of their own (e.g., 'can I add a fillet here?').\n\n"
+        "Reply ONLY in JSON like {\"label\": \"answer\"} or "
+        "{\"label\": \"student_question\"}."
+    )
+    user_prompt = (
+        f"Tutor_question: {last_question}\n"
+        f"Student_message: {user_text}\n"
+        "Label:"
+    )
+    raw = run_text_llm(sys_prompt, user_prompt, max_new_tokens=64)
+    m = re.search(r"\{.*\}", raw, re.DOTALL)
+    if not m:
+        return "answer"
+    try:
+        obj = json.loads(m.group(0))
+        label = (obj.get("label") or "").lower()
+        if label in {"answer", "student_question"}:
+            return label
+    except Exception:
+        pass
+    return "answer"
+def answer_student_question(
+    user_text: str,
+    state: GuidelineConversationState,
+    chat_history: List[Tuple[str, str]],
+) -> str:
+    """
+    Use the same model to answer a side-question in a friendly way.
+    This does NOT advance the guideline review.
+    """
+    last_q = chat_history[-1][0] if chat_history else ""
+    qa_snippets = []
+    for q, a in state.qa_log[-3:]:
+        qa_snippets.append(f"Q: {q}\nA: {a}")
+    qa_str = "\n---\n".join(qa_snippets) if qa_snippets else "(no prior Q&A)"
+    sys_prompt = (
+        "You are a friendly manufacturing / GD&T teaching assistant inside a small app.\n"
+        "The student may ask meta-questions like 'can I add a fillet here?', "
+        "'is this draft enough?', or 'what tolerance should I use?'.\n"
+        "Use the selected DFM/GD&T guidelines, the feature summary, and their answers\n"
+        "to give concrete, practical advice.\n\n"
+        "Prefer to reference guidelines by topic (e.g., Wall Thickness, Draft Angle).\n"
+        "Talk about trade-offs (manufacturability, cost, risk).\n"
+        "Keep answers short (2–6 sentences).\n"
+        "Do NOT output JSON; just respond as normal helpful text."
+    )
+    user_parts = [
+        "Part description:",
+        state.description or "(none)",
+        "\nFeature summary:",
+        json.dumps(state.feature_summary, indent=2),
+        "\nSelected guidelines:",
+        json.dumps(state.selected_guidelines, indent=2),
+        "\nRecent Q&A:",
+        qa_str,
+        "\nLast tutor question:",
+        last_q or "(none)",
+        "\nStudent question:",
+        user_text,
+    ]
+    user_prompt = "\n".join(user_parts)
+    reply = run_text_llm(sys_prompt, user_prompt, max_new_tokens=256)
+    return reply
+def step_conversation(
+    chat_history: List[Tuple[str, str]],
+    user_message: str,
+    state: GuidelineConversationState,
+) -> Tuple[List[Tuple[str, str]], GuidelineConversationState]:
+    """
+    One conversation step for an ANSWER (router already decided).
+    """
+    # Log student's answer into QA log
+    if chat_history and user_message.strip():
+        last_assistant, _ = chat_history[-1]
+        state.qa_log.append((last_assistant, user_message))
+        state.questions_asked += 1
+    # Stopping condition
+    if state.questions_asked >= state.max_questions or not current_guideline(state):
+        qas_text = "\n".join([f"Q: {q}\nA: {a}" for q, a in state.qa_log])
+        eval_json = evaluation_agent_txt(
+            state.description,
+            state.selected_guidelines,
+            qas_text,
+            state.feature_summary,
+        )
+        friendly_summary = summarize_eval_for_student(eval_json)
+        chat_history.append((friendly_summary, ""))
+        return chat_history, state
+    # Otherwise, determine next question
+    current = current_guideline(state)
+    gid = current["guideline_id"]
+    topic = current["topic"]
+    questions = get_guideline_questions(gid)
+    asked_for_this_topic = [q for q, _ in state.qa_log if topic in q]
+    idx = len(asked_for_this_topic)
+    if idx >= len(questions):
+        # move to next guideline
+        state.current_guideline_idx += 1
+        if not current_guideline(state):
+            return step_conversation(chat_history, user_message, state)
+        current = current_guideline(state)
+        gid = current["guideline_id"]
+        topic = current["topic"]
+        questions = get_guideline_questions(gid)
+        idx = 0
+        if not questions:
+            return step_conversation(chat_history, user_message, state)
+    q_text = questions[idx]
+    header = (
+        f"Now let’s look at {topic}.\n\n"
+        "For this guideline, we’re checking a few key points from your DFM/GD&T rules. "
+        "I’ll ask a quick question to see whether your design satisfies it.\n\n"
+    )
+    full_q = header + q_text
+    chat_history.append((full_q, ""))
+    return chat_history, state
+# --------- helper to convert internal tuples -> Chatbot messages ----------
+def tuples_to_messages(history: List[Tuple[str, str]]) -> List[Dict[str, Any]]:
+    """
+    Convert [(assistant, user), ...] to Chatbot 'messages' format:
+    [{"role": "assistant", "content": "..."},
+     {"role": "user", "content": "..."}, ...]
+    """
+    messages: List[Dict[str, Any]] = []
+    for assistant_text, user_text in history:
+        if assistant_text:
+            messages.append({"role": "assistant", "content": assistant_text})
+        if user_text:
+            messages.append({"role": "user", "content": user_text})
+    return messages
+# ============================================================
+# 5.  Gradio UI
+# ============================================================
+with gr.Blocks(title="DFM / GD&T Manufacturability Tutor") as demo:
+    gr.Markdown(
+        """
+        # 📐 DFM / GD&T Manufacturability Tutor
+        1. Upload **1–3 CAD screenshots or drawings**
+        2. *(Optional)* Add a short description of the part
+        3. Click **Start review**
+        4. Answer a few focused questions → get a guideline-by-guideline summary
+        This tool is meant to feel like a mini design review with a friendly TA.
+        """
+    )
+    state = gr.State(GuidelineConversationState())
+    chat_state = gr.State([])  # internal: list[Tuple[str, str]]
+    with gr.Row():
+        with gr.Column(scale=3):
+            chat = gr.Chatbot(
+                label="Conversation",
+                height=480,
+            )
+            user_box = gr.Textbox(
+                label="Your answer or question",
+                placeholder=(
+                    "Answer the current question, or ask something like "
+                    "'can I 3D print this?'"
+                ),
+            )
+            start_btn = gr.Button("▶️ Start review (or restart)")
+        with gr.Column(scale=2):
+            image_input = gr.Image(
+                type="numpy",
+                label="Upload 1–3 CAD/drawing screenshots",
+            )
+            description_box = gr.Textbox(
+                label="(Optional) Short description of the part",
+                placeholder="e.g., 'Machined plunger for a relief valve with 60° cone'",
+            )
+            max_q_slider = gr.Slider(
+                label="Max questions",
+                minimum=3,
+                maximum=12,
+                value=8,
+                step=1,
+            )
+            feature_debug = gr.JSON(
+                label="Feature Summary (debug)",
+                visible=False,
+            )
+            guideline_debug = gr.JSON(
+                label="Selected Guidelines (debug)",
+                visible=False,
+            )
+    # ---------- Event wiring ----------
+    def _start(images, desc, max_q):
+        """
+        Gradio callback for 'Start review (or restart)'.
+        Normalize images, run feature extractor, pick guidelines,
+        compose intro + first question.
+        """
+        if images is None:
+            image_list: List[np.ndarray] = []
+        elif isinstance(images, list):
+            image_list = images
+        else:
+            image_list = [images]
+        pil_images = [Image.fromarray(img) for img in image_list] if image_list else []
+        feature_summary = extract_visual_features(pil_images)
+        selected = select_applicable_guidelines(
+            feature_summary,
+            desc or "",
+            max_guidelines=5,
+        )
+        state_obj = GuidelineConversationState(
+            selected_guidelines=selected,
+            current_guideline_idx=0,
+            qa_log=[],
+            max_questions=int(max_q),
+            questions_asked=0,
+            feature_summary=feature_summary,
+            description=desc or "",
+        )
+        chat_tuples: List[Tuple[str, str]] = []
+        intro_msg = build_intro_message(
+            desc or "",
+            feature_summary,
+            selected,
+            int(max_q),
+        )
+        chat_tuples.append((intro_msg, ""))
+        # Ask first guideline question
+        chat_tuples, state_obj = step_conversation(chat_tuples, "", state_obj)
+        chat_messages = tuples_to_messages(chat_tuples)
+        return chat_messages, "", feature_summary, selected, state_obj, chat_tuples
+    def _answer(user_text, tuple_history, state_obj: GuidelineConversationState):
+        """
+        Gradio callback for the textbox submit.
+        - Route the user turn to 'answer' vs 'student_question'
+        - If answer → advance guideline flow
+        - If student_question → chatty side-answer, no state advancement
+        """
+        chat_history: List[Tuple[str, str]] = tuple_history or []
+        user_text = (user_text or "").strip()
+        if not user_text:
+            chat_messages = tuples_to_messages(chat_history)
+            return chat_messages, "", state_obj, chat_history
+        last_question = chat_history[-1][0] if chat_history else ""
+        label = classify_user_turn(user_text, last_question)
+        if label == "student_question":
+            reply = answer_student_question(user_text, state_obj, chat_history)
+            chat_history.append((reply, ""))
+            chat_messages = tuples_to_messages(chat_history)
+            return chat_messages, "", state_obj, chat_history
+        # label == "answer": attach answer to last question and advance
+        if chat_history:
+            last_q, _ = chat_history[-1]
+            chat_history[-1] = (last_q, user_text)
+        chat_history, new_state = step_conversation(
+            chat_history,
+            user_text,
+            state_obj,
+        )
+        chat_messages = tuples_to_messages(chat_history)
+        return chat_messages, "", new_state, chat_history
+    # Button → start/restart the review
+    start_btn.click(
+        _start,
+        inputs=[image_input, description_box, max_q_slider],
+        outputs=[chat, user_box, feature_debug, guideline_debug, state, chat_state],
+    )
+    # Textbox submit → route + respond
+    user_box.submit(
+        _answer,
+        inputs=[user_box, chat_state, state],
+        outputs=[chat, user_box, state, chat_state],
+    )
 if __name__ == "__main__":