Spaces:

Playingyoyo
/

Monomerizer

Running

App Files Files Community

Monomerizer / app.py

Playingyoyo

Update app.py

aff43ab verified 26 days ago

raw

history blame contribute delete

20 kB

	import os
	os.environ["GRADIO_MCP_SERVER"] = "True"

	# Monkey-patch gradio_client schema bug (safe for Gradio 5 - no-op if fixed)
	try:
	import gradio_client.utils as _gc_utils
	_orig_gt = _gc_utils.get_type
	def _p_gt(s):
	return str(s) if not isinstance(s, dict) else _orig_gt(s)
	_gc_utils.get_type = _p_gt
	_orig_js = _gc_utils._json_schema_to_python_type
	def _p_js(s, d=None):
	return str(s) if not isinstance(s, dict) else _orig_js(s, d)
	_gc_utils._json_schema_to_python_type = _p_js
	except Exception:
	pass

	import gradio as gr
	import subprocess, tempfile, os, glob, shutil, base64, json, re, csv, ast
	from PIL import Image
	from css import FULL_CSS, BUBBLES_HTML
	from theme import create_theme

	# Try importing cairosvg for SVG→PNG conversion
	try:
	import cairosvg
	HAS_CAIROSVG = True
	except ImportError:
	HAS_CAIROSVG = False


	# ── Helpers ──

	def load_legend_html():
	p = os.path.join(os.path.dirname(os.path.abspath(__file__)), "legend.png")
	if os.path.exists(p):
	with open(p, "rb") as f:
	d = base64.b64encode(f.read()).decode()
	return f'<div style="display:flex;align-items:flex-end;justify-content:center;height:100%;"><img src="data:image/png;base64,{d}" style="max-width:100%;"></div>'
	return ""


	def load_background_css():
	p = os.path.join(os.path.dirname(os.path.abspath(__file__)), "background.png")
	if os.path.exists(p):
	with open(p, "rb") as f:
	d = base64.b64encode(f.read()).decode()
	return f"""
	body, .gradio-container {{
	background-image: url('data:image/png;base64,{d}') !important;
	background-size: cover !important;
	background-position: center !important;
	background-attachment: fixed !important;
	}}
	"""
	return ""


	def load_example_image():
	p = os.path.join(os.path.dirname(os.path.abspath(__file__)), "example.png")
	if os.path.exists(p):
	with open(p, "rb") as f:
	d = base64.b64encode(f.read()).decode()
	return f'<div style="text-align:center;padding:4px;"><img src="data:image/png;base64,{d}" style="max-width:100%;height:auto;"></div>'
	return "<p style='color:#bbb;text-align:center;padding:12px;font-size:12px;'>Run to see visualizations</p>"


	def load_dictionary():
	dp = os.path.join(os.path.dirname(os.path.abspath(__file__)), "dictionary.txt")
	m = {}
	if os.path.exists(dp):
	with open(dp, "r") as f:
	for r in csv.DictReader(f, delimiter="\t"):
	aid = r.get("ID", "").strip()
	pub = r.get("Pubchem", "").strip()
	chm = r.get("ChEMBL", "").strip()
	smi = r.get("SMILES", "").strip()
	if not aid:
	continue
	if pub and pub not in ("NULL", "['NULL']"):
	m[aid] = pub; continue
	if chm and chm not in ("NULL", "['NULL']"):
	try:
	cl = ast.literal_eval(chm)
	if isinstance(cl, list):
	for c in cl:
	if c and c not in ("NULL", "['NULL']"):
	m[aid] = c; break
	else:
	m[aid] = smi if smi else aid
	else:
	m[aid] = smi if smi else aid
	except Exception:
	m[aid] = smi if smi else aid
	continue
	m[aid] = smi if smi else aid
	return m


	def tokenize_sequence(seq):
	return re.findall(r'[XZ]\d+(?:ter)?\|\[UNK\]\|[A-WY?]', seq)


	def format_results(output_dir, id_to_name):
	raw2std = {}
	mf = os.path.join(output_dir, "nc_raw2standard.txt")
	if os.path.exists(mf):
	with open(mf, "r") as f:
	for r in csv.DictReader(f, delimiter="\t"):
	ri, si = r.get("raw_ID", "").strip(), r.get("standard_ID", "").strip()
	if ri and si: raw2std[ri] = si

	sequences = []
	sf = os.path.join(output_dir, "standard", "sequences_standardized.txt")
	if os.path.exists(sf):
	with open(sf, "r") as f:
	for r in csv.DictReader(f, delimiter="\t"):
	mi, sq = r.get("ID", "").strip(), r.get("SEQUENCE", "").strip()
	if mi and sq: sequences.append((mi, sq))

	std_smi = {}
	snf = os.path.join(output_dir, "standard", "nc_standardized.txt")
	if os.path.exists(snf):
	with open(snf, "r") as f:
	for r in csv.DictReader(f, delimiter="\t"):
	si, sm = r.get("ID", "").strip(), r.get("SMILES", "").strip()
	if si and sm: std_smi[si] = sm

	def resolve(sid):
	if sid in id_to_name: return id_to_name[sid]
	if sid in std_smi: return std_smi[sid]
	return sid

	# Mapping
	lines, seen = [], set()
	for ri, si in sorted(raw2std.items()):
	if si != "[UNK]":
	lines.append(f"{ri} → {resolve(si)}"); seen.add(si)
	all_ids = set()
	for _, sq in sequences:
	for t in tokenize_sequence(sq):
	if t[0] in "XZ": all_ids.add(t)
	for si in sorted(all_ids):
	if si not in seen: lines.append(f"{si} → {resolve(si)}")
	mt = "\n".join(lines) if lines else "No non-canonical amino acids found."

	# Sequences
	slines = []
	for mi, sq in sequences:
	tokens = tokenize_sequence(sq)
	named = [resolve(t) if t[0] in "XZ" else ("Unknown" if t in ("?","[UNK]") else id_to_name.get(t, t)) for t in tokens]
	slines.append(f"{mi}: {' --- '.join(named)}")
	st = "\n".join(slines) if slines else "No sequences generated."
	return mt, st


	# ── Image helpers ──

	def svg_to_png(svg_path, png_path, scale=2):
	"""Convert an SVG file to PNG. Returns True on success."""
	if HAS_CAIROSVG:
	try:
	cairosvg.svg2png(url=svg_path, write_to=png_path, scale=scale)
	return True
	except Exception:
	pass
	# Fallback: try Inkscape CLI
	try:
	subprocess.run(
	["inkscape", svg_path, "--export-type=png", f"--export-filename={png_path}",
	f"--export-dpi={96 * scale}"],
	capture_output=True, timeout=30,
	)
	return os.path.exists(png_path)
	except Exception:
	pass
	# Fallback: try rsvg-convert
	try:
	subprocess.run(
	["rsvg-convert", "-z", str(scale), "-o", png_path, svg_path],
	capture_output=True, timeout=30,
	)
	return os.path.exists(png_path)
	except Exception:
	pass
	return False


	def stitch_images(image_paths, padding=20, bg_color="white"):
	"""Stitch multiple images vertically into one composite PNG.
	Returns the output path, or None if no valid images."""
	imgs = []
	for p in image_paths:
	try:
	img = Image.open(p).convert("RGBA")
	imgs.append(img)
	except Exception:
	continue
	if not imgs:
	return None

	max_w = max(img.width for img in imgs)
	total_h = sum(img.height for img in imgs) + padding * (len(imgs) - 1)

	composite = Image.new("RGBA", (max_w, total_h), bg_color)
	y = 0
	for img in imgs:
	# Center horizontally
	x = (max_w - img.width) // 2
	composite.paste(img, (x, y), img)
	y += img.height + padding

	# Convert to RGB for PNG output (no alpha)
	composite_rgb = Image.new("RGB", composite.size, bg_color)
	composite_rgb.paste(composite, mask=composite.split()[3])

	out = os.path.join("/tmp", "monomerizer_composite.png")
	composite_rgb.save(out, "PNG")
	return out


	def collect_and_stitch(output_dir):
	"""Collect all molecule images from output_dir, convert SVGs to PNGs,
	stitch into one composite image. Returns path to composite PNG or None."""
	png_paths = []

	# Convert SVGs to PNGs
	svgs = sorted([f for f in glob.glob(os.path.join(output_dir, "*", ".svg"), recursive=True)
	if "legend" not in os.path.basename(f).lower()])
	for i, sf in enumerate(svgs, 1):
	png_out = os.path.join("/tmp", f"mono_converted_{i}.png")
	if svg_to_png(sf, png_out):
	png_paths.append(png_out)

	# Also collect any existing PNGs from pipeline
	existing_pngs = sorted(glob.glob(os.path.join(output_dir, "*", ".png"), recursive=True))
	for pf in existing_pngs:
	stable = os.path.join("/tmp", f"mono_existing_{os.path.basename(pf)}")
	shutil.copy2(pf, stable)
	png_paths.append(stable)

	if not png_paths:
	return None

	return stitch_images(png_paths)


	# ── Core pipeline runner (shared by UI and MCP) ──

	def _run_pipeline(smiles_text):
	"""Run the Monomerizer pipeline on SMILES input.
	Returns (mapping, sequences, output_dir) or raises."""
	if not smiles_text or not smiles_text.strip():
	raise ValueError("No input provided.")

	lines = [l.strip() for l in smiles_text.strip().split("\n") if l.strip()]
	if lines and lines[0].upper() == "SMILES":
	lines = lines[1:]
	if not lines:
	raise ValueError("No SMILES found in input.")

	tmp = tempfile.NamedTemporaryFile(mode="w", suffix=".txt", delete=False, dir="/tmp")
	tmp.write("SMILES\n")
	for l in lines:
	tmp.write(l + "\n")
	tmp.close()
	od = tempfile.mkdtemp(prefix="mono_", dir="/tmp")

	res = subprocess.run(
	["python3", "run_pipeline.py", "--input_file", tmp.name, "--output_dir", od, "-draw"],
	capture_output=True, text=True, timeout=600,
	cwd=os.path.dirname(os.path.abspath(__file__)),
	)

	idn = load_dictionary()
	mt, st = format_results(od, idn)

	if res.returncode != 0 and res.stderr:
	mt = f"Pipeline warning:\n{res.stderr}\n\n{mt}"

	return mt, st, od


	# ══════════════════════════════════════════════════════
	# MCP API function — returns text + single composite image
	# ══════════════════════════════════════════════════════

	def analyze_smiles(smiles_text: str) -> tuple[str, str \| None]:
	"""Decompose peptide/peptidomimetic SMILES into amino acid sequences using Monomerizer.

	Takes one or more SMILES strings (one per line) representing peptides or
	peptidomimetics (including those with non-canonical amino acids), and returns:
	- A mapping of non-canonical amino acid IDs to their chemical names
	- Named amino acid sequences for each input molecule
	- A composite image of all molecular structure visualizations

	Args:
	smiles_text: One or more SMILES strings separated by newlines.

	Returns:
	A tuple of (formatted text results, path to composite visualization image).
	The text contains the non-canonical amino acid mapping and sequences.
	The image shows all molecular structures stitched vertically.
	"""
	try:
	mt, st, od = _run_pipeline(smiles_text)
	except ValueError as e:
	return str(e), None
	except subprocess.TimeoutExpired:
	return "Pipeline timed out after 10 minutes.", None
	except Exception as e:
	return f"Error: {e}", None

	# Build formatted text response
	parts = []
	parts.append("═══ Non-canonical Amino Acid Mapping ═══")
	parts.append(mt)
	parts.append("")
	parts.append("═══ Sequences ═══")
	parts.append(st)

	# Build composite image from all molecule visualizations
	composite_path = collect_and_stitch(od)
	if composite_path:
	parts.append("\n═══ Molecular visualization returned as image ═══")

	return "\n".join(parts), composite_path


	# ── UI pipeline function ──

	def run_monomerizer(smiles_text):
	try:
	mt, st, od = _run_pipeline(smiles_text)
	except ValueError as e:
	return str(e), "", "", "0 / 0", "[]", "[]"
	except subprocess.TimeoutExpired:
	return "⚠️ Timed out.", "", "", "0 / 0", "[]", "[]"
	except Exception as e:
	return f"❌ {e}", "", "", "0 / 0", "[]", "[]"

	svgs = sorted([f for f in glob.glob(os.path.join(od, "*", ".svg"), recursive=True)
	if "legend" not in os.path.basename(f).lower()])
	pngs = sorted(glob.glob(os.path.join(od, "*", ".png"), recursive=True))

	imgs, spaths = [], []
	for i, sf in enumerate(svgs, 1):
	try:
	with open(sf, "r") as f: sc = f.read()
	imgs.append(f'<div style="text-align:center;padding:6px;"><p style="font-size:13px;font-weight:bold;color:#555;margin-bottom:4px;">{i}</p><div>{sc}</div></div>')
	sp = os.path.join("/tmp", f"mono_svg_{i}.svg")
	shutil.copy2(sf, sp); spaths.append(sp)
	except: pass
	for i, pf in enumerate(pngs, len(svgs)+1):
	try:
	with open(pf, "rb") as f: b = base64.b64encode(f.read()).decode()
	imgs.append(f'<div style="text-align:center;padding:6px;"><p style="font-size:13px;font-weight:bold;color:#555;margin-bottom:4px;">{i}</p><img src="data:image/png;base64,{b}" style="max-width:100%;"></div>')
	except: pass

	ij, sj = json.dumps(imgs), json.dumps(spaths)
	if imgs:
	return mt, st, imgs[0], f"1 / {len(imgs)}", ij, sj
	return mt, st, "<p style='color:#bbb;text-align:center;'>No visualizations.</p>", "0 / 0", ij, sj


	def navigate(direction, ij, sj, ctr):
	try:
	imgs = json.loads(ij) if ij else []
	sp = json.loads(sj) if sj else []
	except: return "", "0 / 0", None
	if not imgs: return "<p style='color:#bbb;text-align:center;'>No visualizations.</p>", "0 / 0", None
	try: cur = int(ctr.split("/")[0].strip()) - 1
	except: cur = 0
	t = len(imgs)
	cur = (cur + (1 if direction == "next" else -1)) % t
	return imgs[cur], f"{cur+1} / {t}", sp[cur] if cur < len(sp) else None


	def get_svg(sj, ctr):
	try:
	sp = json.loads(sj) if sj else []
	cur = int(ctr.split("/")[0].strip()) - 1
	if 0 <= cur < len(sp): return sp[cur]
	except: pass
	return None


	# ── Defaults ──

	example_svg_html = load_example_image()

	example_smiles = """NC(=O)[C@@H]1C[C@H](NC(=O)C(F)(F)F)CN1C(=O)[C@H](N)CCC/N=C(\\N)N[N+](=O)[O-]
	CC[C@H](C)[C@H](NC(=O)[C@H](C)NC(=O)[C@H](CCCNC(=N)N)NC(=O)OCc1ccccc1)[C@@H](O)CC(=O)NC1CCCCC1
	CC(C)C[C@H](NC(=O)[C@@H]1CCCN1C(=O)[C@H](N)Cc1ccccc1)C(=O)NCC(=O)N1CCC[C@H]1C(=O)NCC(=O)Nc1ccc(N(CCCl)CCCl)cc1""".strip()


	# ── UI ──

	with gr.Blocks(title="Monomerizer", theme=create_theme(), css=FULL_CSS + load_background_css()) as demo:
	images_state = gr.State("")
	svg_paths_state = gr.State("")

	# Header
	gr.HTML("""
	<div style="text-align:center;padding:10px 0 4px 0;">
	<h1 style="margin:0;font-size:36px;color:#fff;text-shadow:0 3px 12px rgba(0,0,0,0.4);">🧬 Monomerizer</h1>
	<p style="margin:4px 0 0 0;font-size:15px;color:rgba(255,255,255,0.95);text-shadow:0 2px 8px rgba(0,0,0,0.4);">
	Convert SMILES to amino acid sequences
	<a href="https://chemrxiv.org/engage/chemrxiv/article-details/67ecf39181d2151a02aad52a" target="_blank" style="color:#fff;font-weight:700;">📄 Paper</a> ·
	<a href="https://github.com/tsudalab/Monomerizer" target="_blank" style="color:#fff;font-weight:700;">💻 GitHub</a> ·
	<a href="https://huggingface.co/Playingyoyo/GPepT" target="_blank" style="color:#fff;font-weight:700;">🤖 GPepT</a>
	</p>
	<p style="margin:3px 0 0 0;font-size:13px;color:rgba(255,255,255,0.85);text-shadow:0 1px 6px rgba(0,0,0,0.4);font-style:italic;">
	Also works with peptidomimetics containing non-canonical amino acids
	</p>
	</div>
	""")

	# Input
	gr.HTML('<p class="section-label">⬇ Input</p>')
	with gr.Group(elem_classes=["input-card"]):
	with gr.Row():
	smiles_input = gr.Textbox(
	label="SMILES (one per line. Delete examples to enter yours)", lines=2, max_lines=4,
	placeholder="Paste SMILES here, one per line...",
	value=example_smiles, elem_classes=["smiles-input"], scale=4,
	)
	run_btn = gr.Button("🚀 Run", variant="primary", size="lg", elem_classes=["run-btn"], scale=1)

	# Output
	gr.HTML('<p class="section-label">⬆ Output</p>')

	with gr.Group(elem_classes=["viz-card"]):
	with gr.Row():
	with gr.Column(scale=4):
	with gr.Row():
	prev_btn = gr.Button("◀", size="sm", scale=1, min_width=40, elem_classes=["nav-btn"])
	counter_display = gr.Textbox(value="0 / 0", show_label=False, interactive=False, scale=1, container=False, elem_classes=["compact-text"])
	next_btn = gr.Button("▶", size="sm", scale=1, min_width=40, elem_classes=["nav-btn"])
	save_svg_btn = gr.Button("💾 Save", size="sm", scale=1, min_width=70, elem_classes=["save-btn"])
	output_html = gr.HTML(value=example_svg_html)
	svg_download = gr.File(label="Download SVG", visible=False)
	with gr.Column(scale=1):
	lh = load_legend_html()
	if lh: gr.HTML(lh)

	with gr.Group(elem_classes=["output-card"]):
	with gr.Row():
	output_mapping = gr.Textbox(label="Non-canonical Amino Acid Mapping", lines=4, show_copy_button=True, elem_classes=["compact-text"])
	output_sequences = gr.Textbox(label="Sequences", lines=4, show_copy_button=True, elem_classes=["compact-text"])

	# Footer
	gr.HTML("""<div class="footer-note">
	⚠️ Labels are specific to this batch. Run on
	<a href="https://github.com/tsudalab/Monomerizer" target="_blank">GitHub</a>
	to make IDs compatible with our foundation model
	<a href="https://huggingface.co/Playingyoyo/GPepT" target="_blank">GPepT</a>.
	</div>""")

	# ══════════════════════════════════════════════════
	# MCP API ENDPOINT — returns text + composite image
	# gr.Image serializes as a native image for MCP clients
	# ══════════════════════════════════════════════════

	with gr.Row(visible=False):
	api_input = gr.Textbox()
	api_output = gr.Textbox()
	api_image_output = gr.Image(type="filepath")
	api_btn = gr.Button()

	api_btn.click(
	fn=analyze_smiles,
	inputs=[api_input],
	outputs=[api_output, api_image_output],
	api_name="analyze_smiles",
	)

	# ── UI Events (all hidden from MCP) ──
	run_btn.click(fn=run_monomerizer, inputs=[smiles_input],
	outputs=[output_mapping, output_sequences, output_html, counter_display, images_state, svg_paths_state],
	api_name=False)
	prev_btn.click(fn=lambda i,s,c: navigate("prev",i,s,c), inputs=[images_state, svg_paths_state, counter_display],
	outputs=[output_html, counter_display, svg_download], api_name=False)
	next_btn.click(fn=lambda i,s,c: navigate("next",i,s,c), inputs=[images_state, svg_paths_state, counter_display],
	outputs=[output_html, counter_display, svg_download], api_name=False)
	save_svg_btn.click(fn=get_svg, inputs=[svg_paths_state, counter_display], outputs=[svg_download], api_name=False).then(
	fn=lambda: gr.update(visible=True), outputs=[svg_download], api_name=False)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860, mcp_server=True)