Spaces:

Azizahalq
/

MaterialMind

Sleeping

App Files Files Community

MaterialMind / app.py

Azizahalq

Update app.py

b0b80a0 verified 4 months ago

raw

history blame contribute delete

8.62 kB

	#!/usr/bin/env python3
	import os, re, json, textwrap, traceback
	from decimal import Decimal
	from typing import List, Tuple

	from flask import Flask, request, render_template, url_for
	from flask_cors import CORS

	from rag_mini import (
	search,
	ensure_ready,
	DEFAULT_TOPK,
	rag_debug_info, # for /debug/rag
	)

	# ------------ LLM config ------------
	LLM_PROVIDER = (os.getenv("LLM_PROVIDER") or "openai").strip().lower()
	LLM_MODEL = (os.getenv("LLM_MODEL") or "gpt-4o-mini").strip()
	LLM_API_KEY = os.getenv("OPENAI_API_KEY") or os.getenv("LLM_API_KEY")
	OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL") # optional (Azure/proxy)

	app = Flask(__name__)
	app.secret_key = os.getenv("FLASK_SECRET_KEY", "change-me-please")
	CORS(app)

	SYSTEM_RULES = """You are MaterialMind, a materials-selection assistant.
	Return two things:
	1) JSON with a ranked shortlist:
	{
	"candidates": [
	{
	"name": "string",
	"score": 0, // 0..400 (sum of 4 independent 0..100 utilities)
	"score_pct": 0, // 0..100 normalized display
	"reasons": ["..."],
	"tradeoffs": ["..."],
	"citations": ["[1]", "[2]"]
	}
	]
	}
	2) After the JSON, provide 3–6 concise bullets on trade-offs.
	Rules:
	- Use only the provided context; cite with [1], [2]. No fabrication.
	- Utilities per criterion are in [0,1]. Cost utility increases as cost decreases.
	- Weights (performance, stability, cost, availability) are independent 0..100 (NOT normalized).
	"""

	ANSWER_TEMPLATE = """User constraints:
	- Environment: {environment}
	- Temperature: {temperature}
	- Min UTS (MPa): {min_uts}
	- Max density (g/cm^3): {max_density}
	- Budget: {budget}
	- Process: {process}
	Independent priorities (0..100 each):
	- performance={w_perf}, stability={w_stab}, cost={w_cost}, availability={w_avail}
	Question:
	For {environment} at {temperature}, shortlist materials that meet UTS ≥ {min_uts} MPa and density ≤ {max_density} g/cm^3.
	Consider budget={budget} and process={process}. Rank by performance, stability, cost, and availability.
	Context snippets (numbered):
	{context}
	Citations:
	{citations}
	Now, first output ONLY the JSON block (no preamble). Then the short narrative.
	"""

	# ---------- helpers ----------
	def to_dec(x, default:int)->Decimal:
	try:
	s = (x or "").strip()
	return Decimal(s if s else str(default))
	except Exception:
	return Decimal(default)

	def format_context(hits: List[Tuple[str, str]]):
	blocks, cites = [], []
	for i,(text,cite) in enumerate(hits,1):
	snippet = textwrap.shorten((text or "").replace("\n"," "), width=450, placeholder=" …")
	blocks.append(f"[{i}] {snippet}")
	cites.append(f"[{i}] {cite}")
	return "\n".join(blocks), "\n".join(cites)

	def extract_json_block(text:str):
	if not text:
	return None
	m = re.search(r"```json\s(\{.?\})\s*```", text, flags=re.S\|re.I)
	blob = m.group(1) if m else None
	if not blob:
	s = text
	start = s.find("{")
	while start != -1:
	depth = 0
	for j in range(start, len(s)):
	ch = s[j]
	if ch == "{": depth += 1
	elif ch == "}":
	depth -= 1
	if depth == 0:
	blob = s[start:j+1]
	break
	if blob: break
	start = s.find("{", start+1)
	if not blob:
	return None
	try:
	return json.loads(blob)
	except Exception:
	return None

	# ---------- LLM caller ----------
	def call_llm_cloud(system:str, user:str)->str:
	prov = LLM_PROVIDER; model = LLM_MODEL
	if prov in ("openai","oai"):
	from openai import OpenAI
	client = OpenAI(api_key=LLM_API_KEY, base_url=OPENAI_BASE_URL or None)
	r = client.chat.completions.create(
	model=model,
	temperature=0.2,
	max_tokens=1200,
	messages=[{"role":"system","content":system},
	{"role":"user","content":user}],
	)
	return r.choices[0].message.content
	elif prov in ("together","tg"):
	from together import Together
	client = Together(api_key=LLM_API_KEY)
	r = client.chat.completions.create(
	model=model, temperature=0.2, max_tokens=1200,
	messages=[{"role":"system","content":system},{"role":"user","content":user}],
	)
	return r.choices[0].message.content
	else:
	from huggingface_hub import InferenceClient
	hf_token = LLM_API_KEY or os.getenv("HUGGINGFACEHUB_API_TOKEN")
	client = InferenceClient(model=model, token=hf_token)
	try:
	out = client.chat_completion(
	messages=[{"role":"system","content":system},{"role":"user","content":user}],
	max_tokens=1200, temperature=0.2,
	)
	return out.choices[0].message["content"]
	except Exception:
	return client.text_generation(
	prompt=f"{system}\n\n{user}\n", max_new_tokens=1200, temperature=0.2
	)

	# ---------- routes ----------
	@app.get("/healthz")
	def healthz():
	return {
	"ok": True,
	"provider": LLM_PROVIDER,
	"model": LLM_MODEL,
	"has_api_key": bool(LLM_API_KEY),
	}, 200

	@app.get("/debug/rag")
	def debug_rag():
	return rag_debug_info(), 200

	@app.get("/")
	def index():
	return render_template("index.html", default_k=DEFAULT_TOPK)

	@app.post("/recommend")
	def recommend():
	try:
	environment = request.form.get("environment","").strip() or "seawater"
	temperature = request.form.get("temperature","").strip() or "20–25 °C"
	min_uts = request.form.get("min_uts","").strip() or "0"
	max_density = request.form.get("max_density","").strip() or "100"
	budget = request.form.get("budget","").strip() or "open"
	process = request.form.get("process","").strip() or "any"

	w_perf = to_dec(request.form.get("w_perf"), 75)
	w_stab = to_dec(request.form.get("w_stab"), 100)
	w_cost = to_dec(request.form.get("w_cost"), 75)
	w_avail = to_dec(request.form.get("w_avail"), 75)

	try: k = int(request.form.get("k", DEFAULT_TOPK))
	except: k = DEFAULT_TOPK

	question = (f"For {environment} at {temperature}, shortlist materials that meet "
	f"UTS ≥ {min_uts} MPa and density ≤ {max_density} g/cm^3. "
	f"Consider budget={budget} and process={process}. "
	f"Rank by performance, stability, cost, and availability.")

	# RAG search (never crash UI)
	try:
	hits = search(question, k=k)
	rag_error = ""
	except Exception as e:
	app.logger.exception("RAG search failed")
	hits = []
	rag_error = f"RAG error: {type(e).__name__}: {e}"

	ctx, cites = format_context(hits)

	user_prompt = ANSWER_TEMPLATE.format(
	environment=environment, temperature=temperature,
	min_uts=min_uts, max_density=max_density, budget=budget, process=process,
	w_perf=str(int(w_perf)), w_stab=str(int(w_stab)),
	w_cost=str(int(w_cost)), w_avail=str(int(w_avail)),
	context=ctx, citations=cites
	)

	# LLM call (never crash UI)
	raw = ""
	try:
	raw = call_llm_cloud(SYSTEM_RULES, user_prompt)
	except Exception as e:
	app.logger.exception("LLM call failed")
	raw = f"ERROR calling LLM ({LLM_PROVIDER}:{LLM_MODEL}): {type(e).__name__}: {e}"

	parsed = extract_json_block(raw) if raw else None
	candidates = (parsed or {}).get("candidates", []) if parsed else []

	if rag_error:
	raw = f"{rag_error}\n\n{raw}"

	return render_template(
	"results.html",
	candidates=candidates,
	citations=(cites.splitlines() if cites else []),
	environment=environment,
	temperature=temperature,
	raw_output=raw or "",
	)
	except Exception as e:
	app.logger.exception("recommend() hard failure")
	tb = traceback.format_exc()
	return render_template(
	"results.html",
	candidates=[],
	citations=[],
	environment="(unknown)",
	temperature="(unknown)",
	raw_output=f"FATAL: {type(e).__name__}: {e}\n\n{tb}",
	), 200

	if __name__ == "__main__":
	ensure_ready()
	port = int(os.getenv("PORT", "7860"))
	app.run(host="0.0.0.0", port=port, debug=False)