Spaces:

PickyBinders
/

stoic-space

Sleeping

App Files Files Community

stoic-space / app.py

Ninjani

add jsons

d04dc1f 2 months ago

raw

history blame contribute delete

7.62 kB

	import functools
	import json
	import os
	import tempfile
	import time

	import torch
	import gradio as gr
	import pandas as pd
	from loguru import logger

	from stoic.model import Stoic
	from stoic.predict_stoichiometry import _build_af3_input_json

	MAX_CHAINS = 26


	@functools.lru_cache(maxsize=1)
	def get_model():
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	logger.info(f"Loading model on {device}")
	model = Stoic.from_pretrained("PickyBinders/stoic")
	model = model.to(device).eval()
	logger.info("Model loaded")
	return model


	def predict(sequences_text: str, top_n: int, return_weights: bool):
	sequences = [s.strip() for s in sequences_text.strip().split("\n") if s.strip()]
	if not sequences:
	raise gr.Error("Please enter at least one protein sequence.")
	if len(sequences) > MAX_CHAINS:
	raise gr.Error(f"Maximum {MAX_CHAINS} unique chains supported.")

	model = get_model()
	start = time.time()
	with torch.no_grad():
	raw = model.predict_stoichiometry(
	sequences, top_n=top_n, return_residue_weights=return_weights
	)
	elapsed = time.time() - start

	if return_weights:
	results, residue_predictions = raw
	else:
	results = raw

	chain_labels = [chr(ord("A") + i) for i in range(len(sequences))]

	header = "\| Rank \| " + " \| ".join(f"Chain {l}" for l in chain_labels) + " \| Stoichiometry \| Score \| Probability \|"
	separator = "\|------\|" + "\|".join("-----" for _ in chain_labels) + "\|---------------\|-------\|-------------\|"
	stoich_csv_rows = []
	rows = []
	for rank, candidate in enumerate(results, 1):
	copies = [candidate.get(seq, 0) for seq in sequences]
	stoich = "".join(f"{l}<sub>{c}</sub>" for l, c in zip(chain_labels, copies))
	score = candidate.get("rank", 0)
	prob = candidate.get("probability", 0)
	row = f"\| {rank} \| " + " \| ".join(str(c) for c in copies) + f" \| {stoich} \| {score:.2f} \| {prob:.2e} \|"
	rows.append(row)
	stoich_csv_rows.append({
	"Rank": rank,
	**{f"Chain {l}": c for l, c in zip(chain_labels, copies)},
	"Score": score,
	"Probability": prob,
	})

	table = "\n".join([header, separator] + rows)

	legend_lines = ["\n\nSequences:"]
	for label, seq in zip(chain_labels, sequences):
	preview = seq[:50] + "..." if len(seq) > 50 else seq
	legend_lines.append(f"- Chain {label}: `{preview}`")

	stoich_md = table + "\n".join(legend_lines)
	stoich_csv_path = _save_csv(pd.DataFrame(stoich_csv_rows), "stoichiometry_results.csv")

	af3_json_paths = _save_af3_jsons(results)

	plot_updates = [gr.update(value=None, visible=False)] * MAX_CHAINS
	weights_csv_update = gr.update(value=None, visible=False)

	if return_weights:
	chain_dfs = build_chain_dfs(residue_predictions, chain_labels)
	for i, (label, df) in enumerate(chain_dfs.items()):
	plot_updates[i] = gr.update(value=df, visible=True)
	all_weights_df = pd.concat(chain_dfs.values(), ignore_index=True)
	all_weights_df = all_weights_df[all_weights_df["Type"] == "Prediction"].drop(columns=["Type"])
	all_weights_df = all_weights_df[["Chain", "Position", "Weight"]]
	weights_csv_path = _save_csv(all_weights_df, "residue_weights.csv")
	weights_csv_update = gr.update(value=weights_csv_path, visible=True)

	return (
	stoich_md,
	f"{elapsed:.2f}s",
	gr.update(value=stoich_csv_path, visible=True),
	gr.update(value=af3_json_paths, visible=True),
	weights_csv_update,
	*plot_updates,
	)


	def _save_csv(df: pd.DataFrame, filename: str) -> str:
	path = os.path.join(tempfile.gettempdir(), filename)
	df.to_csv(path, index=False)
	return path


	def _save_af3_jsons(results: list[dict]) -> list[str]:
	"""Generate AF3-style JSON files for each stoichiometry candidate."""
	paths = []
	for rank, candidate in enumerate(results, 1):
	af3_json = _build_af3_input_json(f"stoic_rank{rank}", [candidate])
	path = os.path.join(tempfile.gettempdir(), f"stoic_rank{rank}_af3.json")
	with open(path, "w") as f:
	json.dump(af3_json, f, indent=2)
	paths.append(path)
	return paths


	def build_chain_dfs(residue_predictions, chain_labels):
	pred_residues = residue_predictions["pred_residues"]
	attention_mask = residue_predictions["attention_mask"]
	seqs = residue_predictions["sequences"]

	chain_dfs = {}
	for i, seq in enumerate(seqs):
	mask = ~(attention_mask[i].astype(bool))
	weights = pred_residues[i][mask]
	n_res = len(weights)
	records = [
	{"Position": pos, "Weight": float(w), "Type": "Prediction"}
	for pos, w in enumerate(weights, 1)
	]
	chain_name = f"Chain {chain_labels[i]}"
	records.append({"Position": 1, "Weight": 0.5, "Type": "Threshold"})
	records.append({"Position": n_res, "Weight": 0.5, "Type": "Threshold"})
	df = pd.DataFrame(records)
	df["Chain"] = chain_name
	chain_dfs[chain_name] = df
	return chain_dfs


	with gr.Blocks(title="Stoic - Protein Stoichiometry Prediction") as app:
	gr.Markdown(
	"# Stoic\n"
	"Fast and accurate protein stoichiometry prediction\n\n"
	"Enter one protein sequence per line (one per unique chain type). "
	"Stoic predicts how many copies of each chain are present in the assembled complex."
	)

	with gr.Row():
	with gr.Column():
	sequences_input = gr.Textbox(
	label="Protein Sequences (one per line)",
	placeholder="MKTLLILTLFLAIAASSASA...\nMGSSHHHHHHSSGLVPR...",
	lines=6,
	)
	top_n = gr.Slider(
	minimum=1, maximum=10, value=3, step=1,
	label="Number of candidates to return",
	)
	return_weights = gr.Checkbox(
	label="Return residue-level interface prediction weights",
	value=False,
	)
	btn = gr.Button("Predict Stoichiometry", variant="primary")

	with gr.Column():
	results_output = gr.Markdown(value="Results will appear here.")
	run_time = gr.Textbox(label="Runtime")

	with gr.Row():
	stoich_csv_download = gr.File(
	label="Download Stoichiometry Results (CSV)",
	visible=False,
	)
	af3_json_download = gr.File(
	label="Download AF3 Input JSON(s)",
	file_count="multiple",
	visible=False,
	)
	weights_csv_download = gr.File(
	label="Download Residue Weights (CSV)",
	visible=False,
	)

	chain_plots = []
	for i in range(MAX_CHAINS):
	chain_plots.append(
	gr.LinePlot(
	x="Position",
	y="Weight",
	color="Type",
	color_map={"Prediction": "#636EFA", "Threshold": "#BBBBBB"},
	x_title="Residue Position",
	y_title="Weight",
	y_lim=[0, 1],
	label=f"Chain {chr(ord('A') + i)} Interface Weights",
	visible=False,
	)
	)

	btn.click(
	predict,
	inputs=[sequences_input, top_n, return_weights],
	outputs=[
	results_output,
	run_time,
	stoich_csv_download,
	af3_json_download,
	weights_csv_download,
	*chain_plots,
	],
	)

	get_model()
	app.launch()