MaterialMind / app.py
Azizahalq's picture
Update app.py
b0b80a0 verified
#!/usr/bin/env python3
import os, re, json, textwrap, traceback
from decimal import Decimal
from typing import List, Tuple
from flask import Flask, request, render_template, url_for
from flask_cors import CORS
from rag_mini import (
search,
ensure_ready,
DEFAULT_TOPK,
rag_debug_info, # for /debug/rag
)
# ------------ LLM config ------------
LLM_PROVIDER = (os.getenv("LLM_PROVIDER") or "openai").strip().lower()
LLM_MODEL = (os.getenv("LLM_MODEL") or "gpt-4o-mini").strip()
LLM_API_KEY = os.getenv("OPENAI_API_KEY") or os.getenv("LLM_API_KEY")
OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL") # optional (Azure/proxy)
app = Flask(__name__)
app.secret_key = os.getenv("FLASK_SECRET_KEY", "change-me-please")
CORS(app)
SYSTEM_RULES = """You are MaterialMind, a materials-selection assistant.
Return two things:
1) JSON with a ranked shortlist:
{
"candidates": [
{
"name": "string",
"score": 0, // 0..400 (sum of 4 independent 0..100 utilities)
"score_pct": 0, // 0..100 normalized display
"reasons": ["..."],
"tradeoffs": ["..."],
"citations": ["[1]", "[2]"]
}
]
}
2) After the JSON, provide 3–6 concise bullets on trade-offs.
Rules:
- Use only the provided context; cite with [1], [2]. No fabrication.
- Utilities per criterion are in [0,1]. Cost utility increases as cost decreases.
- Weights (performance, stability, cost, availability) are independent 0..100 (NOT normalized).
"""
ANSWER_TEMPLATE = """User constraints:
- Environment: {environment}
- Temperature: {temperature}
- Min UTS (MPa): {min_uts}
- Max density (g/cm^3): {max_density}
- Budget: {budget}
- Process: {process}
Independent priorities (0..100 each):
- performance={w_perf}, stability={w_stab}, cost={w_cost}, availability={w_avail}
Question:
For {environment} at {temperature}, shortlist materials that meet UTS ≥ {min_uts} MPa and density ≤ {max_density} g/cm^3.
Consider budget={budget} and process={process}. Rank by performance, stability, cost, and availability.
Context snippets (numbered):
{context}
Citations:
{citations}
Now, first output ONLY the JSON block (no preamble). Then the short narrative.
"""
# ---------- helpers ----------
def to_dec(x, default:int)->Decimal:
try:
s = (x or "").strip()
return Decimal(s if s else str(default))
except Exception:
return Decimal(default)
def format_context(hits: List[Tuple[str, str]]):
blocks, cites = [], []
for i,(text,cite) in enumerate(hits,1):
snippet = textwrap.shorten((text or "").replace("\n"," "), width=450, placeholder=" …")
blocks.append(f"[{i}] {snippet}")
cites.append(f"[{i}] {cite}")
return "\n".join(blocks), "\n".join(cites)
def extract_json_block(text:str):
if not text:
return None
m = re.search(r"```json\s*(\{.*?\})\s*```", text, flags=re.S|re.I)
blob = m.group(1) if m else None
if not blob:
s = text
start = s.find("{")
while start != -1:
depth = 0
for j in range(start, len(s)):
ch = s[j]
if ch == "{": depth += 1
elif ch == "}":
depth -= 1
if depth == 0:
blob = s[start:j+1]
break
if blob: break
start = s.find("{", start+1)
if not blob:
return None
try:
return json.loads(blob)
except Exception:
return None
# ---------- LLM caller ----------
def call_llm_cloud(system:str, user:str)->str:
prov = LLM_PROVIDER; model = LLM_MODEL
if prov in ("openai","oai"):
from openai import OpenAI
client = OpenAI(api_key=LLM_API_KEY, base_url=OPENAI_BASE_URL or None)
r = client.chat.completions.create(
model=model,
temperature=0.2,
max_tokens=1200,
messages=[{"role":"system","content":system},
{"role":"user","content":user}],
)
return r.choices[0].message.content
elif prov in ("together","tg"):
from together import Together
client = Together(api_key=LLM_API_KEY)
r = client.chat.completions.create(
model=model, temperature=0.2, max_tokens=1200,
messages=[{"role":"system","content":system},{"role":"user","content":user}],
)
return r.choices[0].message.content
else:
from huggingface_hub import InferenceClient
hf_token = LLM_API_KEY or os.getenv("HUGGINGFACEHUB_API_TOKEN")
client = InferenceClient(model=model, token=hf_token)
try:
out = client.chat_completion(
messages=[{"role":"system","content":system},{"role":"user","content":user}],
max_tokens=1200, temperature=0.2,
)
return out.choices[0].message["content"]
except Exception:
return client.text_generation(
prompt=f"{system}\n\n{user}\n", max_new_tokens=1200, temperature=0.2
)
# ---------- routes ----------
@app.get("/healthz")
def healthz():
return {
"ok": True,
"provider": LLM_PROVIDER,
"model": LLM_MODEL,
"has_api_key": bool(LLM_API_KEY),
}, 200
@app.get("/debug/rag")
def debug_rag():
return rag_debug_info(), 200
@app.get("/")
def index():
return render_template("index.html", default_k=DEFAULT_TOPK)
@app.post("/recommend")
def recommend():
try:
environment = request.form.get("environment","").strip() or "seawater"
temperature = request.form.get("temperature","").strip() or "20–25 °C"
min_uts = request.form.get("min_uts","").strip() or "0"
max_density = request.form.get("max_density","").strip() or "100"
budget = request.form.get("budget","").strip() or "open"
process = request.form.get("process","").strip() or "any"
w_perf = to_dec(request.form.get("w_perf"), 75)
w_stab = to_dec(request.form.get("w_stab"), 100)
w_cost = to_dec(request.form.get("w_cost"), 75)
w_avail = to_dec(request.form.get("w_avail"), 75)
try: k = int(request.form.get("k", DEFAULT_TOPK))
except: k = DEFAULT_TOPK
question = (f"For {environment} at {temperature}, shortlist materials that meet "
f"UTS ≥ {min_uts} MPa and density ≤ {max_density} g/cm^3. "
f"Consider budget={budget} and process={process}. "
f"Rank by performance, stability, cost, and availability.")
# RAG search (never crash UI)
try:
hits = search(question, k=k)
rag_error = ""
except Exception as e:
app.logger.exception("RAG search failed")
hits = []
rag_error = f"RAG error: {type(e).__name__}: {e}"
ctx, cites = format_context(hits)
user_prompt = ANSWER_TEMPLATE.format(
environment=environment, temperature=temperature,
min_uts=min_uts, max_density=max_density, budget=budget, process=process,
w_perf=str(int(w_perf)), w_stab=str(int(w_stab)),
w_cost=str(int(w_cost)), w_avail=str(int(w_avail)),
context=ctx, citations=cites
)
# LLM call (never crash UI)
raw = ""
try:
raw = call_llm_cloud(SYSTEM_RULES, user_prompt)
except Exception as e:
app.logger.exception("LLM call failed")
raw = f"ERROR calling LLM ({LLM_PROVIDER}:{LLM_MODEL}): {type(e).__name__}: {e}"
parsed = extract_json_block(raw) if raw else None
candidates = (parsed or {}).get("candidates", []) if parsed else []
if rag_error:
raw = f"{rag_error}\n\n{raw}"
return render_template(
"results.html",
candidates=candidates,
citations=(cites.splitlines() if cites else []),
environment=environment,
temperature=temperature,
raw_output=raw or "",
)
except Exception as e:
app.logger.exception("recommend() hard failure")
tb = traceback.format_exc()
return render_template(
"results.html",
candidates=[],
citations=[],
environment="(unknown)",
temperature="(unknown)",
raw_output=f"FATAL: {type(e).__name__}: {e}\n\n{tb}",
), 200
if __name__ == "__main__":
ensure_ready()
port = int(os.getenv("PORT", "7860"))
app.run(host="0.0.0.0", port=port, debug=False)