Spaces:

Azizahalq
/

MaterialMind

Sleeping

App Files Files Community

Azizahalq commited on Sep 12, 2025

Commit

b7242c4

1 Parent(s): e562e41

Create app_gradio.py

Browse files

Files changed (1) hide show

app_gradio.py +227 -0

app_gradio.py ADDED Viewed

	@@ -0,0 +1,227 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+MaterialMind (fixed corpus demo)
+- Uses YOUR PDFs from ./sources
+- Builds a tiny in-memory RAG index at startup (FastEmbed + cosine)
+- Cloud LLM scores candidates 0..400 (four 0..100 subscores)
+- Simple Gradio UI (no uploads)
+"""
+import os, re, json, textwrap
+from pathlib import Path
+from typing import List, Tuple, Dict, Any
+import gradio as gr
+import requests
+from rag_utils import (
+    build_index_from_dir, retrieve, format_context_and_cites
+)
+# -------------------- LLM client --------------------
+PROVIDER = os.getenv("LLM_PROVIDER", "openai").lower()     # "openai" | "together"
+API_KEY  = os.getenv("LLM_API_KEY", "")
+MODEL    = os.getenv("LLM_MODEL", "gpt-4o-mini")           # e.g. Together: "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"
+TIMEOUT  = int(os.getenv("LLM_TIMEOUT", "60"))
+def call_llm(system: str, user: str) -> str:
+    if not API_KEY:
+        return "[Error] Missing LLM_API_KEY. Add a secret/env var."
+    if PROVIDER == "together":
+        base = "https://api.together.xyz/v1"
+        headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
+    else:
+        base = "https://api.openai.com/v1"
+        headers = {"Authorization": f"Bearer {API_KEY}", "Content-Type": "application/json"}
+    payload = {
+        "model": MODEL,
+        "messages": [{"role":"system","content":system},{"role":"user","content":user}],
+        "temperature": 0.2,
+    }
+    r = requests.post(f"{base}/chat/completions", headers=headers, json=payload, timeout=TIMEOUT)
+    if r.status_code != 200:
+        return f"[Error] LLM HTTP {r.status_code}: {r.text[:500]}"
+    try:
+        return r.json()["choices"][0]["message"]["content"]
+    except Exception:
+        return f"[Error] Unexpected LLM response: {r.text[:500]}"
+# -------------------- Prompting --------------------
+SYSTEM_RULES = """You are MaterialMind, a general-purpose materials-selection assistant.
+Return TWO things:
+1) A JSON block with EXACT schema:
+{
+  "candidates": [
+    {
+      "name": "string",
+      "score": 0,  // integer 0..400 (sum of four 0..100 subscores)
+      "subscores": { "performance": 0, "stability": 0, "cost": 0, "availability": 0 },
+      "reasons": ["string", "..."],
+      "tradeoffs": ["string", "..."],
+      "citations": ["[1]", "[4]"]
+    }
+  ]
+}
+SCORING (absolute, not weighted):
+- performance (0..100): strength/stiffness/thermal range vs user targets
+- stability   (0..100): corrosion/oxidation/chem/UV/thermal/creep, environment fit
+- cost        (0..100): relative cost vs user budget (If budget is "Not important", set cost=100)
+- availability(0..100): manufacturability, supply forms/lead time
+Total score = performance + stability + cost + availability (0..400). Be conservative; do not invent data.
+2) After the JSON, add 3–6 concise bullets explaining trade-offs.
+Rules:
+- Use ONLY the provided context; cite like [n].
+- If critical info is missing, state what to clarify.
+- Keep units correct; state assumptions if needed.
+"""
+ANSWER_TEMPLATE = """User constraints
+- Application: {environment}
+- Temperature: {temperature}
+- Targets: UTS ≥ {min_uts} MPa, density ≤ {max_density} g/cm^3
+- Budget: {budget}   • Process: {process}
+- Preferences: performance={pref_perf}, stability={pref_stab}, cost={pref_cost}, availability={pref_avail}
+Task
+Shortlist suitable materials and score them 0..400 using the four 0..100 subscores (see rules).
+Explain trade-offs and include citations.
+Context snippets (numbered)
+{context}
+Citations
+{citations}
+Now first output ONLY the JSON block. Then the bullet narrative.
+"""
+def extract_json_block(text: str):
+    m = re.search(r"```json\s*(\{.*?\})\s*```", text, flags=re.S | re.I)
+    s = m.group(1) if m else None
+    if not s:
+        m2 = re.search(r"(\{(?:[^{}]|(?1))*\})", text, flags=re.S)
+        s = m2.group(1) if m2 else None
+    if not s: return None
+    try:
+        return json.loads(s)
+    except Exception:
+        last = s.rfind("}")
+        if last != -1:
+            try: return json.loads(s[:last+1])
+            except Exception: return None
+        return None
+# -------------------- Build index once (your PDFs) --------------------
+SOURCES_DIR = Path(os.getenv("SOURCES_DIR", "sources")).resolve()
+INDEX = build_index_from_dir(SOURCES_DIR)  # texts, metas, embs (L2-normalized)
+# -------------------- UI callback --------------------
+PREF_CHOICES = ["Very high", "High", "Medium", "Low", "Very low"]
+COST_CHOICES = ["Not important", "High", "Medium", "Low", "Very low"]
+def recommend(environment, temperature, min_uts, max_density, budget, process,
+              pref_perf, pref_stab, pref_cost, pref_avail, topk):
+    if INDEX["embs"].shape[0] == 0:
+        return "No context available. Add PDFs to ./sources and redeploy.", None, None
+    # Retrieval
+    q = (f"For {environment or 'general'} at {temperature or 'room temperature'}, shortlist materials that meet "
+         f"UTS ≥ {min_uts or '0'} MPa and density ≤ {max_density or '100'} g/cm^3; "
+         f"consider budget={budget or 'open'}, process={process or 'any'}.")
+    hits = retrieve(INDEX, q, k=int(topk))
+    if not hits:
+        return "No extractable context found (OCR may be needed).", None, None
+    ctx, cites = format_context_and_cites(hits)
+    # LLM
+    prompt = ANSWER_TEMPLATE.format(
+        environment=environment or "general",
+        temperature=temperature or "room temperature",
+        min_uts=min_uts or "0",
+        max_density=max_density or "100",
+        budget=budget or "open",
+        process=process or "any",
+        pref_perf=pref_perf, pref_stab=pref_stab, pref_cost=pref_cost, pref_avail=pref_avail,
+        context=ctx, citations=cites
+    )
+    raw = call_llm(SYSTEM_RULES, prompt)
+    parsed = extract_json_block(raw) if raw else None
+    cands = (parsed or {}).get("candidates", []) if parsed else []
+    # Format outputs
+    if not cands:
+        return raw, None, cites
+    headers = ["Rank","Material","Score","Performance","Stability","Cost","Availability","Top reasons"]
+    rows = []
+    for i, c in enumerate(sorted(cands, key=lambda x: x.get("score",0), reverse=True), 1):
+        ss = c.get("subscores", {})
+        reasons = " • ".join(c.get("reasons", [])[:3])
+        rows.append([i, c.get("name","?"), c.get("score",0),
+                     ss.get("performance","—"), ss.get("stability","—"),
+                     ss.get("cost","—"), ss.get("availability","—"), reasons])
+    # Markdown table
+    table_md = "| " + " | ".join(headers) + " |\n|" + " --- |"*len(headers) + "\n"
+    for r in rows:
+        table_md += "| " + " | ".join(str(x) for x in r) + " |\n"
+    # Cards
+    cards = []
+    for i, c in enumerate(sorted(cands, key=lambda x: x.get("score",0), reverse=True), 1):
+        ss = c.get("subscores", {})
+        card = f"**{i}. {c.get('name','?')}**  \n"
+        card += f"Score {c.get('score',0)} (perf {ss.get('performance','—')}, stab {ss.get('stability','—')}, cost {ss.get('cost','—')}, avail {ss.get('availability','—')})\n\n"
+        if c.get("tradeoffs"):
+            card += "**Trade-offs:**\n- " + "\n- ".join(c["tradeoffs"]) + "\n\n"
+        if c.get("citations"):
+            card += "**Citations:** " + ", ".join(c["citations"])
+        cards.append(card)
+    cards_md = "\n---\n".join(cards)
+    return table_md + "\n\n" + raw, cards_md, cites
+# -------------------- Gradio UI --------------------
+with gr.Blocks(title="MaterialMind") as demo:
+    gr.Markdown("## MaterialMind — ranked materials shortlist with page-level citations")
+    with gr.Row():
+        environment = gr.Textbox(label="Application", placeholder="seawater / sour service / high-T oxidation")
+        temperature = gr.Textbox(label="Temperature", placeholder="e.g., 20–25 °C")
+    with gr.Row():
+        min_uts = gr.Textbox(label="Min UTS (MPa)", value="0")
+        max_density = gr.Textbox(label="Max density (g/cm³)", value="100")
+    with gr.Row():
+        budget = gr.Dropdown(["open","low","medium","high","Not important"], value="open", label="Budget")
+        process = gr.Textbox(label="Process", placeholder="wrought / casting / AM / any", value="any")
+    gr.Markdown("**Priorities (qualitative; scoring is absolute 0..100 each, total 0..400)**")
+    with gr.Row():
+        pref_perf = gr.Dropdown(["Very high","High","Medium","Low","Very low"], value="High", label="Performance")
+        pref_stab = gr.Dropdown(["Very high","High","Medium","Low","Very low"], value="High", label="Stability")
+        pref_cost = gr.Dropdown(["Not important","High","Medium","Low","Very low"], value="Medium", label="Cost")
+        pref_avail = gr.Dropdown(["Very high","High","Medium","Low","Very low"], value="Medium", label="Availability")
+    topk = gr.Slider(3, 10, step=1, value=5, label="Top-k context pages")
+    run_btn = gr.Button("Get ranked shortlist", variant="primary")
+    out_table = gr.Markdown(label="Shortlist & raw model output")
+    out_cards = gr.Markdown(label="Material cards")
+    out_cites = gr.Markdown(label="Citations (source mapping)")
+    run_btn.click(
+        recommend,
+        inputs=[environment, temperature, min_uts, max_density, budget, process,
+                pref_perf, pref_stab, pref_cost, pref_avail, topk],
+        outputs=[out_table, out_cards, out_cites],
+        api_name="recommend"
+    )
+if __name__ == "__main__":
+    demo.launch()