Spaces:
Configuration error
Configuration error
| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| MaterialMind – Flask app (form page + results page) | |
| - Priorities via dropdowns (no numbers shown to user) | |
| - Each criterion weight is independent (0..100); NOT normalized | |
| - RAG retrieval + optional Ollama LLM for ranked shortlist with citations | |
| """ | |
| import shutil | |
| from decimal import Decimal | |
| import re, json, textwrap, subprocess | |
| from typing import List, Tuple, Any, Dict | |
| from flask import Flask, request, render_template, redirect, url_for, flash | |
| from flask_cors import CORS | |
| from filelock import FileLock | |
| # RAG helpers (your existing file) | |
| from rag_mini import search, ensure_dirs, DATA_DIR, DEFAULT_TOPK, DEFAULT_MODEL | |
| app = Flask(__name__) | |
| app.secret_key = "change-me" # set a strong secret for production | |
| CORS(app) | |
| BASE_DIR = DATA_DIR.parent | |
| LOCK_PATH = BASE_DIR / ".rag_lock" | |
| # ---------- Utilities ---------- | |
| def has_ollama() -> bool: | |
| return shutil.which("ollama") is not None | |
| def call_ollama(model: str, prompt: str) -> str: | |
| try: | |
| out = subprocess.run(["ollama", "run", model, prompt], | |
| check=True, capture_output=True, text=True) | |
| return out.stdout.strip() | |
| except FileNotFoundError: | |
| return ("[Error] Ollama not found. Install: brew install ollama\n" | |
| "Run: ollama serve &\n" | |
| f"Pull: ollama pull {model}") | |
| except subprocess.CalledProcessError as e: | |
| return f"[Error] ollama run failed: {e.stderr.strip() or e.stdout.strip()}" | |
| def to_dec(x, default: int) -> Decimal: | |
| try: | |
| s = (x or "").strip() | |
| return Decimal(s if s else str(default)) | |
| except Exception: | |
| return Decimal(default) | |
| def _to_float(x): | |
| try: | |
| return float(x) | |
| except Exception: | |
| return None | |
| def normalize_candidates_for_display(cands: List[Dict[str, Any]], max_total: float = 400.0) -> List[Dict[str, Any]]: | |
| """ | |
| Ensure each candidate has: | |
| - score_raw (0..400) | |
| - score_pct (0..100) for UI | |
| Accepts: | |
| - c["score"] as number (0..400) OR fraction (0..1) OR string "350 / 400" or "87%" | |
| - or c["score_pct"] | |
| - or c["subscores"] dict (sums 4× [0..100] = 0..400) | |
| """ | |
| for c in cands: | |
| # If model already supplied a percent, trust it (and clamp) | |
| if "score_pct" in c and c["score_pct"] is not None: | |
| try: | |
| pct = float(c["score_pct"]) | |
| c["score_pct"] = max(0.0, min(100.0, pct)) | |
| # derive a raw for sorting if not provided | |
| c.setdefault("score_raw", c["score_pct"] * 4.0) | |
| continue | |
| except Exception: | |
| pass | |
| raw = None | |
| v = c.get("score") | |
| # direct numeric | |
| if isinstance(v, (int, float)): | |
| f = float(v) | |
| if 0.0 <= f <= 1.5: | |
| raw = max(0.0, min(max_total, f * max_total)) # treat <=1.5 as fraction | |
| else: | |
| raw = max(0.0, min(max_total, f)) | |
| # string patterns | |
| if raw is None and isinstance(v, str): | |
| s = v.strip() | |
| m = re.search(r"^\s*([\d.]+)\s*/\s*([\d.]+)\s*$", s) | |
| if m: | |
| num, den = _to_float(m.group(1)), _to_float(m.group(2)) | |
| if num is not None and den and den > 0: | |
| raw = max_total * (num / den) | |
| if raw is None: | |
| m2 = re.search(r"^\s*([\d.]+)\s*%\s*$", s) | |
| if m2: | |
| p = _to_float(m2.group(1)) | |
| if p is not None: | |
| raw = max_total * (p / 100.0) | |
| if raw is None: | |
| f = _to_float(s) | |
| if f is not None: | |
| if 0.0 <= f <= 1.5: | |
| raw = max_total * f | |
| else: | |
| raw = f | |
| # sum of subscores | |
| if raw is None: | |
| subs = c.get("subscores") or {} | |
| if isinstance(subs, dict) and subs: | |
| ssum = 0.0 | |
| for sv in subs.values(): | |
| fv = _to_float(sv) | |
| if fv is not None: | |
| ssum += max(0.0, min(100.0, fv)) | |
| raw = ssum | |
| if raw is None: | |
| raw = 0.0 | |
| raw = max(0.0, min(max_total, float(raw))) | |
| c["score_raw"] = raw | |
| c["score_pct"] = round((raw / max_total) * 100.0, 1) | |
| # sort by raw descending | |
| cands.sort(key=lambda z: z.get("score_raw", 0.0), reverse=True) | |
| return cands | |
| # ---------- Prompting ---------- | |
| SYSTEM_RULES = """You are MaterialMind, a materials-selection assistant. | |
| Return two things: | |
| 1) JSON with a ranked shortlist: | |
| { | |
| "candidates": [ | |
| { | |
| "name": "string", | |
| "score": 0, // 0..400 (sum of 4 independent 0..100 weighted utilities) | |
| "score_pct": 0, // score/4 -> 0..100 for display | |
| "reasons": ["..."], | |
| "tradeoffs": ["..."], | |
| "citations": ["[1]", "[2]"] | |
| } | |
| ] | |
| } | |
| 2) After the JSON, 3–6 concise bullets on trade-offs. | |
| Rules: | |
| - Use only provided context (no fabrication). Cite with [1], [2] etc. | |
| - Utilities per criterion are in [0,1]. Cost utility increases as cost decreases. | |
| - Weights for performance, stability, cost, availability are independent 0..100 (NOT normalized). | |
| - Prefer pitting/crevice metrics in chloride questions; keep units explicit. | |
| """ | |
| ANSWER_TEMPLATE = """{rules} | |
| User constraints: | |
| - Environment: {environment} | |
| - Temperature: {temperature} | |
| - Min UTS (MPa): {min_uts} | |
| - Max density (g/cm^3): {max_density} | |
| - Budget: {budget} | |
| - Process: {process} | |
| Independent priorities (0..100 each): | |
| - performance={w_perf}, stability={w_stab}, cost={w_cost}, availability={w_avail} | |
| Question: | |
| {question} | |
| Context snippets (numbered): | |
| {context} | |
| Citations: | |
| {citations} | |
| Now, first output ONLY the JSON block, then the short narrative. | |
| """ | |
| def format_context(hits: List[Tuple[str, str]]) -> Tuple[str, str]: | |
| blocks, cites = [], [] | |
| for i, (text, cite) in enumerate(hits, 1): | |
| snippet = textwrap.shorten(text.replace("\n", " "), width=450, placeholder=" …") | |
| blocks.append(f"[{i}] {snippet}") | |
| cites.append(f"[{i}] {cite}") | |
| return "\n".join(blocks), "\n".join(cites) | |
| def extract_json_block(text: str): | |
| m = re.search(r"```json\s*(\{.*?\})\s*```", text, flags=re.S | re.I) | |
| s = m.group(1) if m else None | |
| if not s: | |
| m2 = re.search(r"(\{(?:[^{}]|(?1))*\})", text, flags=re.S) | |
| s = m2.group(1) if m2 else None | |
| if not s: | |
| return None | |
| try: | |
| return json.loads(s) | |
| except Exception: | |
| last = s.rfind("}") | |
| if last != -1: | |
| try: | |
| return json.loads(s[:last+1]) | |
| except Exception: | |
| return None | |
| return None | |
| # ---------- Routes ---------- | |
| def index(): | |
| return render_template("index.html", default_model=DEFAULT_MODEL, default_k=DEFAULT_TOPK) | |
| def recommend(): | |
| # basics | |
| environment = request.form.get("environment", "").strip() or "seawater" | |
| temperature = request.form.get("temperature", "").strip() or "20–25 °C" | |
| min_uts = request.form.get("min_uts", "").strip() or "0" | |
| max_density = request.form.get("max_density", "").strip() or "100" | |
| budget = request.form.get("budget", "").strip() or "open" | |
| process = request.form.get("process", "").strip() or "any" | |
| # hidden numeric weights from dropdowns (0..100 each; independent) | |
| w_perf = to_dec(request.form.get("w_perf"), 75) # e.g., Very high -> 100 | |
| w_stab = to_dec(request.form.get("w_stab"), 100) | |
| w_cost = to_dec(request.form.get("w_cost"), 75) # "High" cost priority -> 100 | |
| w_avail = to_dec(request.form.get("w_avail"), 75) | |
| model = (request.form.get("model", DEFAULT_MODEL) or DEFAULT_MODEL).strip() | |
| try: | |
| k = int(request.form.get("k", DEFAULT_TOPK)) | |
| except Exception: | |
| k = DEFAULT_TOPK | |
| question = ( | |
| f"For {environment} at {temperature}, shortlist materials that meet " | |
| f"UTS ≥ {min_uts} MPa and density ≤ {max_density} g/cm^3. " | |
| f"Consider budget={budget} and process={process}. " | |
| f"Rank by performance, stability, cost, and availability." | |
| ) | |
| hits = search(question, k=k) | |
| if not hits: | |
| flash("No context found. Please add sources and rebuild/update the index.", "error") | |
| return redirect(url_for("index")) | |
| ctx, cites = format_context(hits) | |
| prompt = ANSWER_TEMPLATE.format( | |
| rules=SYSTEM_RULES, environment=environment, temperature=temperature, | |
| min_uts=min_uts, max_density=max_density, budget=budget, process=process, | |
| w_perf=str(int(w_perf)), w_stab=str(int(w_stab)), | |
| w_cost=str(int(w_cost)), w_avail=str(int(w_avail)), | |
| question=question, context=ctx, citations=cites | |
| ) | |
| # Call model (gracefully handle lock or missing ollama) | |
| if not has_ollama(): | |
| raw = "[Ollama not found]\n\n" + prompt | |
| candidates = [] | |
| flash("Ollama not found — showing retrieval context only.", "error") | |
| else: | |
| try: | |
| LOCK_PATH.parent.mkdir(parents=True, exist_ok=True) | |
| with FileLock(str(LOCK_PATH), timeout=1): | |
| raw = call_ollama(model, prompt) | |
| except Exception: | |
| # If lock fails, try without lock instead of crashing | |
| raw = call_ollama(model, prompt) | |
| parsed = extract_json_block(raw) if raw else None | |
| candidates = (parsed or {}).get("candidates", []) if parsed else [] | |
| # Always normalize to have score_pct and sorting | |
| candidates = normalize_candidates_for_display(candidates, max_total=400.0) | |
| return render_template( | |
| "results.html", | |
| candidates=candidates, | |
| citations=cites.splitlines(), | |
| environment=environment, | |
| temperature=temperature, | |
| raw_output=raw, | |
| default_model=model, | |
| default_k=k, | |
| ) | |
| if __name__ == "__main__": | |
| ensure_dirs() | |
| app.run(host="127.0.0.1", port=5000, debug=False) | |