Spaces:
Sleeping
Sleeping
| import json | |
| import re | |
| import base64 | |
| import io | |
| from dataclasses import dataclass, field | |
| from typing import Any, Dict, List, Optional, Tuple | |
| import numpy as np | |
| from PIL import Image | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| # ============================================================ | |
| # 0. Model + guidelines setup (Inference API version) | |
| # ============================================================ | |
| MODEL_NAME = "maryzhang/qwen3vl-guideline-lora-model" | |
| print(f"Using hosted model via Inference API: {MODEL_NAME}", flush=True) | |
| # This uses the HF Inference API (no local weights, no GPU in the Space) | |
| # If the model is private, set HF_TOKEN as an environment variable in the Space. | |
| hf_client = InferenceClient(MODEL_NAME) | |
| GUIDELINES_PATH = "guidelines_final.json" | |
| def load_guidelines(path: str) -> List[Dict[str, Any]]: | |
| """ | |
| Robust loader for guidelines_final.json. | |
| Accepts: | |
| - a big sequence of JSON objects (your current format) | |
| - or a single list | |
| - or {"guidelines": [...]} | |
| Returns flat list of dicts that contain "guideline_id". | |
| """ | |
| with open(path, "r") as f: | |
| raw = f.read() | |
| raw = raw.strip() | |
| if not raw: | |
| raise ValueError("guidelines_final.json is empty.") | |
| decoder = json.JSONDecoder() | |
| pos = 0 | |
| length = len(raw) | |
| objects: List[Any] = [] | |
| # collect all JSON fragments | |
| while pos < length: | |
| while pos < length and raw[pos].isspace(): | |
| pos += 1 | |
| if pos >= length: | |
| break | |
| try: | |
| obj, end = decoder.raw_decode(raw, pos) | |
| except json.JSONDecodeError: | |
| pos += 1 | |
| continue | |
| objects.append(obj) | |
| pos = end | |
| if not objects: | |
| raise ValueError("No JSON fragments found in guidelines_final.json") | |
| candidates: List[Any] = [] | |
| for obj in objects: | |
| if isinstance(obj, list): | |
| candidates.extend(obj) | |
| elif isinstance(obj, dict) and isinstance(obj.get("guidelines"), list): | |
| candidates.extend(obj["guidelines"]) | |
| elif isinstance(obj, dict): | |
| candidates.append(obj) | |
| guidelines: List[Dict[str, Any]] = [] | |
| for c in candidates: | |
| if isinstance(c, dict) and "guideline_id" in c: | |
| guidelines.append(c) | |
| if not guidelines: | |
| raise ValueError("Found JSON but no objects with 'guideline_id' field.") | |
| return guidelines | |
| ALL_GUIDELINES: List[Dict[str, Any]] = load_guidelines(GUIDELINES_PATH) | |
| GUIDELINE_BY_ID: Dict[str, Dict[str, Any]] = {g["guideline_id"]: g for g in ALL_GUIDELINES} | |
| print(f"Loaded {len(ALL_GUIDELINES)} guidelines", flush=True) | |
| # ============================================================ | |
| # 1. Core LLM helpers (text-only + vision via Inference API) | |
| # ============================================================ | |
| def run_text_llm(system_prompt: str, user_prompt: str, max_new_tokens: int = 768) -> str: | |
| """ | |
| Use the hosted Qwen3-VL model in text-only mode via chat_completion. | |
| We build a simple system+user messages list and ask for a deterministic | |
| response (temperature=0). | |
| """ | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": user_prompt}, | |
| ] | |
| response = hf_client.chat_completion( | |
| messages=messages, | |
| max_tokens=max_new_tokens, | |
| temperature=0.0, | |
| stream=False, | |
| ) | |
| # HuggingFace InferenceClient returns a ChatCompletionOutput | |
| text = response.choices[0].message.content | |
| return (text or "").strip() | |
| def _pil_to_data_url(img: Image.Image, fmt: str = "PNG") -> str: | |
| """ | |
| Convert a PIL image to a data URL (base64-encoded), which matches the | |
| format expected by chat_completion with vision support: | |
| type: "image_url", image_url: {"url": "data:image/png;base64,..."} | |
| """ | |
| buf = io.BytesIO() | |
| img.save(buf, format=fmt) | |
| b64 = base64.b64encode(buf.getvalue()).decode("utf-8") | |
| mime = "image/png" if fmt.upper() == "PNG" else "image/jpeg" | |
| return f"data:{mime};base64,{b64}" | |
| def vlm_generate_json_from_images( | |
| prompt: str, | |
| images: List[Image.Image], | |
| ) -> Dict[str, Any]: | |
| """ | |
| Call the hosted Qwen3-VL model with images + text using chat_completion. | |
| We ask it to return STRICT JSON and then parse the JSON out of the reply. | |
| This assumes the model supports OpenAI-style multimodal messages where | |
| each content item can be {"type": "image_url", "image_url": {"url": ...}} | |
| plus a text chunk. | |
| """ | |
| if not images: | |
| images = [Image.new("RGB", (64, 64), "white")] | |
| # Build message content with multiple images + prompt text | |
| content: List[Dict[str, Any]] = [] | |
| for img in images: | |
| url = _pil_to_data_url(img) | |
| content.append( | |
| { | |
| "type": "image_url", | |
| "image_url": {"url": url}, | |
| } | |
| ) | |
| content.append( | |
| { | |
| "type": "text", | |
| "text": prompt, | |
| } | |
| ) | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": "You are a vision model that ONLY replies with strict JSON.", | |
| }, | |
| { | |
| "role": "user", | |
| "content": content, | |
| }, | |
| ] | |
| # Ask for a deterministic, non-streaming, JSON-like answer | |
| response = hf_client.chat_completion( | |
| messages=messages, | |
| max_tokens=512, | |
| temperature=0.0, | |
| stream=False, | |
| # If your model supports response_format, you can uncomment: | |
| # response_format={"type": "json_object"}, | |
| ) | |
| raw = response.choices[0].message.content or "" | |
| raw = raw.strip() | |
| # Try to extract JSON object from the raw string | |
| m = re.search(r"\{.*\}", raw, re.DOTALL) | |
| if m: | |
| try: | |
| return json.loads(m.group(0)) | |
| except Exception: | |
| pass | |
| return {"parse_error": True, "raw": raw} | |
| # ============================================================ | |
| # 2. Feature extraction & guideline selection | |
| # ============================================================ | |
| FEATURE_PROMPT = """ | |
| You are assisting with manufacturability and GD&T review. | |
| Given these 1β3 CAD / drawing images, return a JSON object with: | |
| { | |
| "image_type": "cad_model" | "dimensioned_drawing" | "photo" | "other", | |
| "has_gdt": bool, | |
| "has_dimensions": bool, | |
| "features": { | |
| "holes": int, | |
| "vertical_faces": bool, | |
| "possible_draft": bool, | |
| "ribs": int, | |
| "fillets": bool, | |
| "chamfers": bool, | |
| "datum_symbols": ["A", "B"], | |
| "gdt_frames_present": bool, | |
| "text_dimensions_present": bool | |
| }, | |
| "raw_notes": "short human-readable notes about what you see", | |
| "generated_description": "one-sentence description of the part/drawing", | |
| "suggested_guidelines": [] | |
| } | |
| Rules: | |
| - Infer only what is visible or strongly implied. | |
| - Keep numbers rough (e.g., count of holes), not exact metrology. | |
| - Only output valid JSON. No explanation outside the JSON. | |
| - Do NOT hard-code any specific guideline IDs. | |
| """ | |
| def extract_visual_features(images: List[Image.Image]) -> Dict[str, Any]: | |
| if not images: | |
| return { | |
| "image_type": "", | |
| "has_gdt": False, | |
| "has_dimensions": False, | |
| "features": { | |
| "holes": 0, | |
| "vertical_faces": False, | |
| "possible_draft": False, | |
| "ribs": 0, | |
| "fillets": False, | |
| "chamfers": False, | |
| "datum_symbols": [], | |
| "gdt_frames_present": False, | |
| "text_dimensions_present": False, | |
| }, | |
| "raw_notes": "", | |
| "generated_description": "", | |
| "suggested_guidelines": [], | |
| } | |
| vlm_json = vlm_generate_json_from_images(FEATURE_PROMPT, images) | |
| return { | |
| "image_type": vlm_json.get("image_type", ""), | |
| "has_gdt": vlm_json.get("has_gdt", False), | |
| "has_dimensions": vlm_json.get("has_dimensions", False), | |
| "features": vlm_json.get("features", {}), | |
| "raw_notes": vlm_json.get("raw_notes", ""), | |
| "generated_description": vlm_json.get("generated_description", ""), | |
| "suggested_guidelines": vlm_json.get("suggested_guidelines", []), | |
| } | |
| def rag_retrieve(query: str, top_k: int = 6) -> List[Dict[str, Any]]: | |
| """ | |
| Tiny RAG over the 20 guidelines. | |
| Now also includes pass_fail_logic in the searchable blob so the | |
| evaluator can "see" the numeric rules. | |
| """ | |
| q = (query or "").lower() | |
| if not q.strip(): | |
| return [] | |
| scored = [] | |
| for g in ALL_GUIDELINES: | |
| pfl = g.get("pass_fail_logic") or {} | |
| pfl_text = " ".join(f"{k}: {v}" for k, v in pfl.items()) | |
| blob = " ".join( | |
| [ | |
| g.get("topic", ""), | |
| " ".join(g.get("evaluation_criteria", []) or []), | |
| " ".join(g.get("expected_answers", []) or []), | |
| pfl_text, | |
| ] | |
| ).lower() | |
| score = sum(token in blob for token in q.split()) | |
| if score > 0: | |
| scored.append((score, g)) | |
| scored.sort(key=lambda x: x[0], reverse=True) | |
| hits = [] | |
| for score, g in scored[:top_k]: | |
| pfl = g.get("pass_fail_logic") or {} | |
| pfl_text = " ".join(f"{k}: {v}" for k, v in pfl.items()) | |
| text = ( | |
| " ".join(g.get("evaluation_criteria", []) or []) | |
| or " ".join(g.get("expected_answers", []) or []) | |
| or pfl_text | |
| ) | |
| hits.append( | |
| { | |
| "source": "guideline", | |
| "text": text, | |
| "meta": { | |
| "guideline_id": g["guideline_id"], | |
| "topic": g.get("topic", ""), | |
| }, | |
| } | |
| ) | |
| return hits | |
| def classify_mode(description: str, feature_summary: Dict[str, Any]) -> str: | |
| desc_lower = (description or "").lower() | |
| feats = feature_summary.get("features", {}) | |
| image_type = (feature_summary.get("image_type") or "").lower() | |
| has_gdt_flag = bool(feature_summary.get("has_gdt")) | |
| has_dims_flag = bool(feature_summary.get("has_dimensions")) | |
| has_datum = bool(feats.get("datum_symbols")) | |
| has_gdt_feat = feats.get("gdt_frames_present", False) | |
| cad_like_words = ["cad", "model", "solid", "surface", "bottle", "housing", "rib"] | |
| drawing_like_words = ["drawing", "dimension", "tolerance"] | |
| has_cad_words = any(w in desc_lower for w in cad_like_words) | |
| has_drawing_words = any(w in desc_lower for w in drawing_like_words) | |
| gd_signals = any( | |
| [ | |
| image_type == "dimensioned_drawing", | |
| has_gdt_flag, | |
| has_gdt_feat, | |
| has_datum, | |
| has_dims_flag, | |
| has_drawing_words, | |
| ] | |
| ) | |
| cad_signals = any( | |
| [ | |
| image_type == "cad_model", | |
| has_cad_words, | |
| ] | |
| ) | |
| if gd_signals and cad_signals: | |
| return "mixed" | |
| if gd_signals: | |
| return "gdt" | |
| if cad_signals: | |
| return "dfm" | |
| return "dfm" | |
| def select_applicable_guidelines( | |
| feature_summary: Dict[str, Any], | |
| description: str, | |
| max_guidelines: int = 5, | |
| ) -> List[Dict[str, Any]]: | |
| """ | |
| Choose a subset of guidelines out of all 20, based on dfm/gdt mode. | |
| Returns lightweight dicts (guideline_id + topic), but the evaluator | |
| will later look up the full objects from GUIDELINE_BY_ID. | |
| """ | |
| mode = classify_mode(description, feature_summary) | |
| suggestions = feature_summary.get("suggested_guidelines") or [] | |
| def category_of(g: Dict[str, Any]) -> str: | |
| cat = (g.get("category") or "").lower() | |
| if cat in ("dfm", "gdt"): | |
| return cat | |
| gid = (g.get("guideline_id") or "").upper() | |
| if gid.startswith("D"): | |
| return "dfm" | |
| if gid.startswith("G"): | |
| return "gdt" | |
| return "" | |
| picked: List[Dict[str, Any]] = [] | |
| suggested_ids = set() | |
| # 1) honour any suggested_guidelines (if they match the mode) | |
| for s in suggestions: | |
| gid = s.get("guideline_id") | |
| if not gid: | |
| continue | |
| g = GUIDELINE_BY_ID.get(gid) | |
| if not g: | |
| continue | |
| cat = category_of(g) | |
| if mode == "gdt" and cat != "gdt": | |
| continue | |
| if mode == "dfm" and cat != "dfm": | |
| continue | |
| picked.append({"guideline_id": gid, "topic": g.get("topic", "")}) | |
| suggested_ids.add(gid) | |
| # 2) fill in from ALL_GUIDELINES based on mode | |
| for g in ALL_GUIDELINES: | |
| gid = g["guideline_id"] | |
| if gid in suggested_ids: | |
| continue | |
| cat = category_of(g) | |
| if mode == "gdt" and cat == "gdt": | |
| picked.append({"guideline_id": gid, "topic": g["topic"]}) | |
| elif mode == "dfm" and cat == "dfm": | |
| picked.append({"guideline_id": gid, "topic": g["topic"]}) | |
| elif mode == "mixed" and cat in ("gdt", "dfm"): | |
| picked.append({"guideline_id": gid, "topic": g["topic"]}) | |
| # 3) in mixed mode, bias GD&T first | |
| if mode == "mixed": | |
| def is_gdt(gid: str) -> bool: | |
| g = GUIDELINE_BY_ID.get(gid, {}) | |
| return category_of(g) == "gdt" | |
| picked.sort(key=lambda x: 0 if is_gdt(x["guideline_id"]) else 1) | |
| return picked[:max_guidelines] | |
| # ============================================================ | |
| # 3. Evaluation utilities | |
| # ============================================================ | |
| def extract_json_from_text(text: str) -> Dict[str, Any]: | |
| m = re.search(r"\{.*\}", text, re.DOTALL) | |
| if not m: | |
| return {"parse_error": True, "raw": text} | |
| try: | |
| return json.loads(m.group(0)) | |
| except Exception: | |
| return {"parse_error": True, "raw": text} | |
| def downgrade_if_no_measurements( | |
| eval_json: Dict[str, Any], | |
| qa_text: str, | |
| ) -> Dict[str, Any]: | |
| q_lower = (qa_text or "").lower() | |
| no_data = any( | |
| phrase in q_lower | |
| for phrase in [ | |
| "no measurement data", | |
| "no measured data", | |
| "assume 0 mm", | |
| "assume zero", | |
| "no cmm data", | |
| ] | |
| ) | |
| if not no_data: | |
| return eval_json | |
| sensitive_topics = [ | |
| "True Position", | |
| "Profile", | |
| "Flatness", | |
| "Concentricity", | |
| "Runout", | |
| "Cylindricity", | |
| "Circularity", | |
| ] | |
| for g in eval_json.get("guidelines", []): | |
| topic = g.get("topic", "") | |
| if any(t in topic for t in sensitive_topics): | |
| g["result"] = "NEEDS_INFO" | |
| g["reason"] = ( | |
| "This guideline depends on measurement data, and you mentioned that " | |
| "measurements are not available yet. That's completely fine at the " | |
| "design stage, so this is marked as NEEDS_INFO rather than PASS/FAIL." | |
| ) | |
| g["recommendation"] = ( | |
| "Once you have inspection or simulation data, you can re-run this check " | |
| "to confirm the tolerance is still realistic." | |
| ) | |
| return eval_json | |
| def calibrate_eval_scores(eval_json: Dict[str, Any]) -> Dict[str, Any]: | |
| guidelines = eval_json.get("guidelines", []) | |
| eval_json.setdefault("overall", {}) | |
| if not guidelines: | |
| eval_json["overall"].update( | |
| { | |
| "summary": "No guidelines were evaluated.", | |
| "verdict": "NEEDS_MORE_DATA", | |
| "manufacturability_score": 0.6, | |
| } | |
| ) | |
| return eval_json | |
| weights = {"PASS": 1.0, "NEEDS_INFO": 0.7, "FAIL": 0.0} | |
| results = [g.get("result", "NEEDS_INFO") for g in guidelines] | |
| if all(r == "NEEDS_INFO" for r in results): | |
| eval_json["overall"].update( | |
| { | |
| "summary": ( | |
| "All guidelines are marked as NEEDS_INFO for now because some data " | |
| "is missing. That's okayβthis just means more information will make " | |
| "the review stronger later." | |
| ), | |
| "verdict": "NEEDS_MORE_DATA", | |
| "manufacturability_score": 0.65, | |
| } | |
| ) | |
| return eval_json | |
| scores = [weights.get(r, 0.7) for r in results] | |
| avg = sum(scores) / len(scores) | |
| if avg > 0.9: | |
| verdict = "GOOD" | |
| elif avg > 0.75: | |
| verdict = "ACCEPTABLE" | |
| elif avg > 0.6: | |
| verdict = "RISKY" | |
| else: | |
| verdict = "NEEDS_MORE_DATA" | |
| eval_json["overall"].update( | |
| { | |
| "summary": ( | |
| "Automatic manufacturability summary based on the " | |
| "reviewed guidelines." | |
| ), | |
| "verdict": verdict, | |
| "manufacturability_score": round(float(avg), 2), | |
| } | |
| ) | |
| return eval_json | |
| def sanitize_eval_language( | |
| eval_json: Dict[str, Any], | |
| description: str, | |
| feature_summary: Dict[str, Any], | |
| ) -> Dict[str, Any]: | |
| desc_lower = (description or "").lower() | |
| feats = feature_summary.get("features", {}) | |
| is_machined = any( | |
| w in desc_lower for w in ["machined", "cnc", "turned", "lathe", "ground"] | |
| ) | |
| is_molded_like = feats.get("possible_draft", False) or any( | |
| w in desc_lower for w in ["mold", "mould", "injection", "cast", "die cast"] | |
| ) | |
| guideline_explanations = { | |
| "True Position Tolerance": ( | |
| "True position helps ensure that holes or pins line up correctly in " | |
| "assembly, so parts fit together without binding or excessive play." | |
| ), | |
| "Profile Tolerance": ( | |
| "Profile controls how closely a surface matches its ideal CAD shape. " | |
| "This matters a lot for sealing, smooth airflow, and consistent contact." | |
| ), | |
| "Flatness": ( | |
| "Flatness makes sure a surface does not bow or warp, which is important " | |
| "for good sealing and accurate mounting faces." | |
| ), | |
| "Concentricity": ( | |
| "Concentricity ensures that different cylindrical features share the same " | |
| "axis. This is crucial for rotating parts, shafts, and precision fits." | |
| ), | |
| } | |
| encouraging_phrases = { | |
| "PASS": ( | |
| "Nice workβthis guideline looks solid. If you want to go further, you " | |
| "could explore tolerance stack-ups or measurement planning for production." | |
| ), | |
| "NEEDS_INFO": ( | |
| "This isnβt a failureβit just means more information (like measurements " | |
| "or simulation results) would help finish the story." | |
| ), | |
| "FAIL": ( | |
| "This might cause manufacturability or inspection challenges, but it's a " | |
| "great opportunity to iterate and improve the design early." | |
| ), | |
| } | |
| for g in eval_json.get("guidelines", []): | |
| topic = g.get("topic", "") | |
| result = g.get("result", "NEEDS_INFO") | |
| if topic in guideline_explanations: | |
| g["why_it_matters"] = guideline_explanations[topic] | |
| g.setdefault("recommendation", "") | |
| g["recommendation"] = (g["recommendation"] or "").strip() | |
| extra = encouraging_phrases.get(result) | |
| if extra: | |
| if g["recommendation"]: | |
| g["recommendation"] += " " | |
| g["recommendation"] += extra | |
| # clean out weird generic ranges / hole size hallucinations | |
| for key in ["reason", "recommendation"]: | |
| text = g.get(key, "") | |
| if not isinstance(text, str): | |
| continue | |
| sentences = re.split(r"(?<=[.!?])\s+", text) | |
| cleaned_sents = [] | |
| for s in sentences: | |
| s_lower = s.lower() | |
| if ( | |
| "typical range" in s_lower | |
| or "small holes" in s_lower | |
| or "< 5 mm" in s_lower | |
| or "less than 5 mm" in s_lower | |
| ): | |
| continue | |
| cleaned_sents.append(s) | |
| new_text = " ".join(cleaned_sents).strip() | |
| if is_machined and not is_molded_like: | |
| new_text = ( | |
| new_text.replace( | |
| "molding process capabilities", | |
| "machining process capabilities", | |
| ) | |
| .replace("molding process capability", "machining process capability") | |
| .replace("molding process", "machining process") | |
| ) | |
| g[key] = new_text | |
| overall = eval_json.get("overall", {}) | |
| if overall.get("verdict") == "POOR": | |
| overall["verdict"] = "NEEDS_MORE_DATA" | |
| overall["summary"] = ( | |
| "Some guidelines look challenging with the current information, but that " | |
| "just means there is room to refine the design and collect more data." | |
| ) | |
| eval_json["overall"] = overall | |
| return eval_json | |
| def evaluation_agent_txt( | |
| description: str, | |
| guidelines: List[Dict[str, Any]], | |
| qa_text: str, | |
| feature_summary: Dict[str, Any], | |
| ) -> Dict[str, Any]: | |
| """ | |
| Core evaluator: this is where we now pass in: | |
| - evaluation_criteria | |
| - expected_answers | |
| - pass_fail_logic | |
| for EACH guideline, so the model can truly reason over your 20 rules. | |
| """ | |
| # Enrich guideline objects from the global GUIDELINE_BY_ID | |
| enriched_guidelines = [] | |
| for g in guidelines: | |
| gid = g.get("guideline_id") | |
| base = GUIDELINE_BY_ID.get(gid, {}) | |
| enriched_guidelines.append( | |
| { | |
| "guideline_id": gid, | |
| "topic": base.get("topic", g.get("topic", "")), | |
| "category": base.get("category", ""), | |
| "evaluation_criteria": base.get("evaluation_criteria", []), | |
| "user_questions": base.get("user_questions", []), | |
| "expected_answers": base.get("expected_answers", []), | |
| "pass_fail_logic": base.get("pass_fail_logic", {}), | |
| } | |
| ) | |
| rag_query_text = " ".join( | |
| [ | |
| description or "", | |
| qa_text or "", | |
| json.dumps(feature_summary.get("features", {})), | |
| ] | |
| ) | |
| rag_hits = rag_retrieve(rag_query_text, top_k=6) | |
| rag_context_lines = [] | |
| for h in rag_hits: | |
| meta = h.get("meta", {}) | |
| gid = meta.get("guideline_id", "UNKNOWN") | |
| topic = meta.get("topic", "") | |
| rag_context_lines.append(f"[GUIDELINE {gid} - {topic}]\n{h['text']}") | |
| rag_context = ( | |
| "\n\n---\n\n".join(rag_context_lines) | |
| if rag_context_lines | |
| else "(no extra context)" | |
| ) | |
| sys_prompt = ( | |
| "You are a senior manufacturing / GD&T engineer and a patient instructor.\n" | |
| "You are given:\n" | |
| "- An optional short description of the part/product\n" | |
| "- A set of DFM/GD&T guidelines to apply (including evaluation_criteria,\n" | |
| " expected_answers, and pass_fail_logic for each guideline)\n" | |
| "- A Q&A history where the student answered questions about each guideline\n" | |
| "- A feature summary extracted from CAD/drawing images\n" | |
| "- Additional reference passages from a guideline knowledge base (RAG)\n\n" | |
| "Your goals:\n" | |
| "1) For EACH guideline, use the student's numeric/text answers and the\n" | |
| " 'pass_fail_logic' rules to decide whether the guideline is PASS, FAIL,\n" | |
| " or NEEDS_INFO.\n" | |
| " β’ PASS = clearly satisfies the numeric / logical rules.\n" | |
| " β’ FAIL = clearly violates at least one rule in pass_fail_logic.\n" | |
| " β’ NEEDS_INFO = only if you truly cannot tell from the Q&A + features.\n" | |
| "2) Refer directly to the variables in pass_fail_logic (e.g., nominal_wall,\n" | |
| " variation, rib_or_boss_thickness) and the numbers in the Q&A when\n" | |
| " making decisions. Treat the rules as engineering check equations.\n" | |
| "3) Explain briefly WHY in clear engineering language.\n" | |
| "4) Offer encouraging, actionable recommendationsβtalk like a helpful TA.\n" | |
| "5) Comment qualitatively on tolerance feasibility in the 'overall' block.\n\n" | |
| "IMPORTANT:\n" | |
| "- You MUST try to produce PASS or FAIL when the numeric conditions are\n" | |
| " clearly satisfied or violated. Do NOT default to NEEDS_INFO if the\n" | |
| " student already provided the key numbers.\n" | |
| "- Only use NEEDS_INFO when the data is genuinely missing or ambiguous.\n\n" | |
| "Respond ONLY as a single JSON object with this schema:\n" | |
| "{\n" | |
| ' "guidelines": [\n' | |
| " {\n" | |
| ' "guideline_id": str,\n' | |
| ' "topic": str,\n' | |
| ' "result": "PASS" | "FAIL" | "NEEDS_INFO",\n' | |
| ' "reason": str,\n' | |
| ' "recommendation": str\n' | |
| " }\n" | |
| " ],\n" | |
| ' "overall": {\n' | |
| ' "summary": str,\n' | |
| ' "verdict": "GOOD" | "ACCEPTABLE" | "RISKY" | "NEEDS_MORE_DATA",\n' | |
| ' "manufacturability_score": float\n' | |
| " }\n" | |
| "}\n" | |
| ) | |
| user_parts = [ | |
| "DESCRIPTION:", | |
| description or "(none provided)", | |
| "\n\nGUIDELINES UNDER REVIEW (with criteria and logic):", | |
| json.dumps(enriched_guidelines, indent=2), | |
| "\n\nQ&A HISTORY (questions and answers as free text):", | |
| qa_text or "(no questions asked yet)", | |
| "\n\nFEATURE SUMMARY FROM IMAGE(S):", | |
| json.dumps(feature_summary, indent=2), | |
| "\n\nRETRIEVED REFERENCES (RAG):", | |
| rag_context, | |
| "\n\nProduce ONLY the JSON object.", | |
| ] | |
| user_prompt = "\n".join(user_parts) | |
| raw = run_text_llm(sys_prompt, user_prompt, max_new_tokens=1024) | |
| eval_json = extract_json_from_text(raw) | |
| if not eval_json.get("parse_error"): | |
| eval_json = downgrade_if_no_measurements(eval_json, qa_text) | |
| eval_json = calibrate_eval_scores(eval_json) | |
| eval_json = sanitize_eval_language(eval_json, description, feature_summary) | |
| return eval_json | |
| def summarize_eval_for_student(eval_json: Dict[str, Any]) -> str: | |
| guidelines = eval_json.get("guidelines", []) | |
| overall = eval_json.get("overall", {}) | |
| lines: List[str] = [] | |
| lines.append( | |
| "Thanks, thatβs all the questions I needed for now. " | |
| "Hereβs your manufacturability snapshot based on those answers:" | |
| ) | |
| lines.append("") | |
| score = overall.get("manufacturability_score") | |
| verdict = overall.get("verdict") | |
| summary = overall.get("summary", "") | |
| if score is not None or verdict: | |
| headline = "β’ Overall verdict: " | |
| if verdict: | |
| headline += str(verdict) | |
| if score is not None: | |
| headline += f" (score β {score:.2f})" | |
| lines.append(headline) | |
| if summary: | |
| lines.append(f"β’ Summary: {summary}") | |
| lines.append("") | |
| if guidelines: | |
| lines.append("Guideline-by-guideline notes:") | |
| for g in guidelines: | |
| topic = g.get("topic", "Unnamed guideline") | |
| result = g.get("result", "NEEDS_INFO") | |
| reason = g.get("reason", "") | |
| rec = g.get("recommendation", "") | |
| lines.append(f"- {topic} β {result}") | |
| if reason: | |
| lines.append(f" β’ Why: {reason}") | |
| if rec: | |
| lines.append(f" β’ Suggestion: {rec}") | |
| else: | |
| lines.append( | |
| "I wasnβt able to evaluate any specific guidelines, likely because " | |
| "we didnβt get enough structured answers." | |
| ) | |
| lines.append("") | |
| lines.append( | |
| "If youβd like to see the raw JSON data for debugging or research, " | |
| "you can ask: βshow me the JSON summary.β" | |
| ) | |
| return "\n".join(lines) | |
| # ============================================================ | |
| # 4. Conversation state & router | |
| # ============================================================ | |
| class GuidelineConversationState: | |
| selected_guidelines: List[Dict[str, Any]] = field(default_factory=list) | |
| current_guideline_idx: int = 0 | |
| qa_log: List[Tuple[str, str]] = field(default_factory=list) | |
| max_questions: int = 8 | |
| questions_asked: int = 0 | |
| feature_summary: Dict[str, Any] = field(default_factory=dict) | |
| description: str = "" | |
| def current_guideline( | |
| state: GuidelineConversationState, | |
| ) -> Optional[Dict[str, Any]]: | |
| if 0 <= state.current_guideline_idx < len(state.selected_guidelines): | |
| return state.selected_guidelines[state.current_guideline_idx] | |
| return None | |
| def build_intro_message( | |
| description: str, | |
| feature_summary: Dict[str, Any], | |
| selected_guidelines: List[Dict[str, Any]], | |
| max_questions: int, | |
| ) -> str: | |
| gen_desc = feature_summary.get("generated_description") or "" | |
| raw_notes = feature_summary.get("raw_notes") or "" | |
| desc_bits = [] | |
| if gen_desc: | |
| desc_bits.append(gen_desc) | |
| if description: | |
| desc_bits.append(description) | |
| if raw_notes: | |
| desc_bits.append(raw_notes) | |
| combined_desc = ( | |
| " ".join(desc_bits) | |
| if desc_bits | |
| else "Iβll infer as much as I can directly from your image." | |
| ) | |
| guideline_topics = [g["topic"] for g in selected_guidelines] | |
| guideline_list_str = ( | |
| ", ".join(guideline_topics) | |
| if guideline_topics | |
| else "a small set of relevant DFM/GD&T rules" | |
| ) | |
| intro = ( | |
| f"{combined_desc}\n\n" | |
| "Based on this, Iβll walk you through a short manufacturability review.\n" | |
| f"Weβll look at these guidelines: {guideline_list_str}.\n" | |
| "Iβll ask at most ~" | |
| f"{max_questions} focused questions, and then summarize how " | |
| "manufacturable this design looks and where you could improve it.\n\n" | |
| "Letβs start with the first guideline." | |
| ) | |
| return intro | |
| def get_guideline_questions(gid: str) -> List[str]: | |
| g = GUIDELINE_BY_ID.get(gid) | |
| if not g: | |
| return [] | |
| qs = g.get("user_questions") or g.get("questions") or [] | |
| out = [] | |
| for q in qs: | |
| if isinstance(q, str): | |
| out.append(q) | |
| elif isinstance(q, dict) and "question" in q: | |
| out.append(q["question"]) | |
| return out | |
| def classify_user_turn(user_text: str, last_question: str) -> str: | |
| """ | |
| Tiny router: is the user answering the guideline question, | |
| or asking their own side question? | |
| Returns "answer" or "student_question". | |
| """ | |
| sys_prompt = ( | |
| "You are a routing model for a tutoring chat about DFM/GD&T.\n" | |
| "Given the last question asked by the tutor and the student's reply,\n" | |
| "decide if the student is primarily ANSWERING the question, or asking a new\n" | |
| "QUESTION of their own (e.g., 'can I add a fillet here?').\n\n" | |
| "Reply ONLY in JSON like {\"label\": \"answer\"} or " | |
| "{\"label\": \"student_question\"}." | |
| ) | |
| user_prompt = ( | |
| f"Tutor_question: {last_question}\n" | |
| f"Student_message: {user_text}\n" | |
| "Label:" | |
| ) | |
| raw = run_text_llm(sys_prompt, user_prompt, max_new_tokens=64) | |
| m = re.search(r"\{.*\}", raw, re.DOTALL) | |
| if not m: | |
| return "answer" | |
| try: | |
| obj = json.loads(m.group(0)) | |
| label = (obj.get("label") or "").lower() | |
| if label in {"answer", "student_question"}: | |
| return label | |
| except Exception: | |
| pass | |
| return "answer" | |
| def answer_student_question( | |
| user_text: str, | |
| state: GuidelineConversationState, | |
| chat_history: List[Tuple[str, str]], | |
| ) -> str: | |
| """ | |
| Use the same model to answer a side-question in a friendly way. | |
| This does NOT advance the guideline review. | |
| """ | |
| last_q = chat_history[-1][0] if chat_history else "" | |
| qa_snippets = [] | |
| for q, a in state.qa_log[-3:]: | |
| qa_snippets.append(f"Q: {q}\nA: {a}") | |
| qa_str = "\n---\n".join(qa_snippets) if qa_snippets else "(no prior Q&A)" | |
| sys_prompt = ( | |
| "You are a friendly manufacturing / GD&T teaching assistant inside a small app.\n" | |
| "The student may ask meta-questions like 'can I add a fillet here?', " | |
| "'is this draft enough?', or 'what tolerance should I use?'.\n" | |
| "Use the selected DFM/GD&T guidelines, the feature summary, and their answers\n" | |
| "to give concrete, practical advice.\n\n" | |
| "Prefer to reference guidelines by topic (e.g., Wall Thickness, Draft Angle).\n" | |
| "Talk about trade-offs (manufacturability, cost, risk).\n" | |
| "Keep answers short (2β6 sentences).\n" | |
| "Do NOT output JSON; just respond as normal helpful text." | |
| ) | |
| user_parts = [ | |
| "Part description:", | |
| state.description or "(none)", | |
| "\nFeature summary:", | |
| json.dumps(state.feature_summary, indent=2), | |
| "\nSelected guidelines:", | |
| json.dumps(state.selected_guidelines, indent=2), | |
| "\nRecent Q&A:", | |
| qa_str, | |
| "\nLast tutor question:", | |
| last_q or "(none)", | |
| "\nStudent question:", | |
| user_text, | |
| ] | |
| user_prompt = "\n".join(user_parts) | |
| reply = run_text_llm(sys_prompt, user_prompt, max_new_tokens=256) | |
| return reply | |
| def step_conversation( | |
| chat_history: List[Tuple[str, str]], | |
| user_message: str, | |
| state: GuidelineConversationState, | |
| ) -> Tuple[List[Tuple[str, str]], GuidelineConversationState]: | |
| """ | |
| One conversation step for an ANSWER (router already decided). | |
| """ | |
| # Log student's answer into QA log | |
| if chat_history and user_message.strip(): | |
| last_assistant, _ = chat_history[-1] | |
| state.qa_log.append((last_assistant, user_message)) | |
| state.questions_asked += 1 | |
| # Stopping condition | |
| if state.questions_asked >= state.max_questions or not current_guideline(state): | |
| qas_text = "\n".join([f"Q: {q}\nA: {a}" for q, a in state.qa_log]) | |
| eval_json = evaluation_agent_txt( | |
| state.description, | |
| state.selected_guidelines, | |
| qas_text, | |
| state.feature_summary, | |
| ) | |
| friendly_summary = summarize_eval_for_student(eval_json) | |
| chat_history.append((friendly_summary, "")) | |
| return chat_history, state | |
| # Otherwise, determine next question | |
| current = current_guideline(state) | |
| gid = current["guideline_id"] | |
| topic = current["topic"] | |
| questions = get_guideline_questions(gid) | |
| asked_for_this_topic = [q for q, _ in state.qa_log if topic in q] | |
| idx = len(asked_for_this_topic) | |
| if idx >= len(questions): | |
| # move to next guideline | |
| state.current_guideline_idx += 1 | |
| if not current_guideline(state): | |
| return step_conversation(chat_history, user_message, state) | |
| current = current_guideline(state) | |
| gid = current["guideline_id"] | |
| topic = current["topic"] | |
| questions = get_guideline_questions(gid) | |
| idx = 0 | |
| if not questions: | |
| return step_conversation(chat_history, user_message, state) | |
| q_text = questions[idx] | |
| header = ( | |
| f"Now letβs look at {topic}.\n\n" | |
| "For this guideline, weβre checking a few key points from your DFM/GD&T rules. " | |
| "Iβll ask a quick question to see whether your design satisfies it.\n\n" | |
| ) | |
| full_q = header + q_text | |
| chat_history.append((full_q, "")) | |
| return chat_history, state | |
| # --------- helper to convert internal tuples -> Chatbot messages ---------- | |
| def tuples_to_messages(history: List[Tuple[str, str]]) -> List[Dict[str, Any]]: | |
| """ | |
| Convert [(assistant, user), ...] to Chatbot 'messages' format: | |
| [{"role": "assistant", "content": "..."}, | |
| {"role": "user", "content": "..."}, ...] | |
| """ | |
| messages: List[Dict[str, Any]] = [] | |
| for assistant_text, user_text in history: | |
| if assistant_text: | |
| messages.append({"role": "assistant", "content": assistant_text}) | |
| if user_text: | |
| messages.append({"role": "user", "content": user_text}) | |
| return messages | |
| # ============================================================ | |
| # 5. Gradio UI | |
| # ============================================================ | |
| with gr.Blocks(title="DFM / GD&T Manufacturability Tutor") as demo: | |
| gr.Markdown( | |
| """ | |
| # π DFM / GD&T Manufacturability Tutor | |
| 1. Upload **1β3 CAD screenshots or drawings** | |
| 2. *(Optional)* Add a short description of the part | |
| 3. Click **Start review** | |
| 4. Answer a few focused questions β get a guideline-by-guideline summary | |
| This tool is powered by a hosted multimodal model via the Hugging Face Inference API, | |
| so it runs on free CPU hardware without loading big weights in this Space. | |
| """ | |
| ) | |
| state = gr.State(GuidelineConversationState()) | |
| chat_state = gr.State([]) # internal: list[Tuple[str, str]] | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| chat = gr.Chatbot( | |
| label="Conversation", | |
| height=480, | |
| ) | |
| user_box = gr.Textbox( | |
| label="Your answer or question", | |
| placeholder=( | |
| "Answer the current question, or ask something like " | |
| "'can I 3D print this?'" | |
| ), | |
| ) | |
| start_btn = gr.Button("βΆοΈ Start review (or restart)") | |
| with gr.Column(scale=2): | |
| image_input = gr.Image( | |
| type="numpy", | |
| label="Upload 1β3 CAD/drawing screenshots", | |
| ) | |
| description_box = gr.Textbox( | |
| label="(Optional) Short description of the part", | |
| placeholder="e.g., 'Machined plunger for a relief valve with 60Β° cone'", | |
| ) | |
| max_q_slider = gr.Slider( | |
| label="Max questions", | |
| minimum=3, | |
| maximum=12, | |
| value=8, | |
| step=1, | |
| ) | |
| feature_debug = gr.JSON( | |
| label="Feature Summary (debug)", | |
| visible=False, | |
| ) | |
| guideline_debug = gr.JSON( | |
| label="Selected Guidelines (debug)", | |
| visible=False, | |
| ) | |
| # ---------- Event wiring ---------- | |
| def _start(images, desc, max_q): | |
| """ | |
| Gradio callback for 'Start review (or restart)'. | |
| Normalize images, run feature extractor, pick guidelines, | |
| compose intro + first question. | |
| """ | |
| if images is None: | |
| image_list: List[np.ndarray] = [] | |
| elif isinstance(images, list): | |
| image_list = images | |
| else: | |
| image_list = [images] | |
| pil_images = [Image.fromarray(img) for img in image_list] if image_list else [] | |
| feature_summary = extract_visual_features(pil_images) | |
| selected = select_applicable_guidelines( | |
| feature_summary, | |
| desc or "", | |
| max_guidelines=5, | |
| ) | |
| state_obj = GuidelineConversationState( | |
| selected_guidelines=selected, | |
| current_guideline_idx=0, | |
| qa_log=[], | |
| max_questions=int(max_q), | |
| questions_asked=0, | |
| feature_summary=feature_summary, | |
| description=desc or "", | |
| ) | |
| chat_tuples: List[Tuple[str, str]] = [] | |
| intro_msg = build_intro_message( | |
| desc or "", | |
| feature_summary, | |
| selected, | |
| int(max_q), | |
| ) | |
| chat_tuples.append((intro_msg, "")) | |
| # Ask first guideline question | |
| chat_tuples, state_obj = step_conversation(chat_tuples, "", state_obj) | |
| chat_messages = tuples_to_messages(chat_tuples) | |
| return chat_messages, "", feature_summary, selected, state_obj, chat_tuples | |
| def _answer(user_text, tuple_history, state_obj: GuidelineConversationState): | |
| """ | |
| Gradio callback for the textbox submit. | |
| - Route the user turn to 'answer' vs 'student_question' | |
| - If answer β advance guideline flow | |
| - If student_question β chatty side-answer, no state advancement | |
| """ | |
| chat_history: List[Tuple[str, str]] = tuple_history or [] | |
| user_text = (user_text or "").strip() | |
| if not user_text: | |
| chat_messages = tuples_to_messages(chat_history) | |
| return chat_messages, "", state_obj, chat_history | |
| last_question = chat_history[-1][0] if chat_history else "" | |
| label = classify_user_turn(user_text, last_question) | |
| if label == "student_question": | |
| reply = answer_student_question(user_text, state_obj, chat_history) | |
| chat_history.append((reply, "")) | |
| chat_messages = tuples_to_messages(chat_history) | |
| return chat_messages, "", state_obj, chat_history | |
| # label == "answer": attach answer to last question and advance | |
| if chat_history: | |
| last_q, _ = chat_history[-1] | |
| chat_history[-1] = (last_q, user_text) | |
| chat_history, new_state = step_conversation( | |
| chat_history, | |
| user_text, | |
| state_obj, | |
| ) | |
| chat_messages = tuples_to_messages(chat_history) | |
| return chat_messages, "", new_state, chat_history | |
| # Button β start/restart the review | |
| start_btn.click( | |
| _start, | |
| inputs=[image_input, description_box, max_q_slider], | |
| outputs=[chat, user_box, feature_debug, guideline_debug, state, chat_state], | |
| ) | |
| # Textbox submit β route + respond | |
| user_box.submit( | |
| _answer, | |
| inputs=[user_box, chat_state, state], | |
| outputs=[chat, user_box, state, chat_state], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |