Spaces:

Ym420
/

peptide-function-classification

Running

App Files Files Community

peptide-function-classification / app.py

Ym420

Update app.py

fe30bd1 verified 5 months ago

raw

history blame

5.14 kB

	import gradio as gr
	import joblib
	from huggingface_hub import hf_hub_download
	import pandas as pd
	import numpy as np
	from collections import Counter

	# --- Download model from HF Hub ---
	repo_id = "Ym420/Peptide-Function"
	model_filename = "xgb_multilabel_model_full.pkl"

	model_path = hf_hub_download(repo_id=repo_id, filename=model_filename)
	model_package = joblib.load(model_path)

	# --- Unwrap model dict ---
	model_dict = model_package['model'] # dict: {'Gram+': XGBClassifier, ...}
	feature_columns = model_package['feature_columns']

	# --- Metadata (all restored) ---
	aa_list = model_package.get('aa_list', [])
	dipeptides = model_package.get('dipeptides', [])
	hydrophobicity_scale = model_package.get('hydrophobicity_scale', {})
	eisenberg_scale = model_package.get('eisenberg_scale', {})
	aa_mass = model_package.get('aa_mass', {})
	aa_charge = model_package.get('aa_charge', {})
	aa_boman = model_package.get('aa_boman', {})
	aa_flexibility = model_package.get('aa_flexibility', {})
	aa_polarizability = model_package.get('aa_polarizability', {})
	aa_aliphatic = model_package.get('aa_aliphatic', {})
	aa_deltaG = model_package.get('aa_deltaG', {})

	# --- Dynamic TARGET_CELLS ---
	TARGET_CELLS = list(model_dict.keys()) # automatically detects all targets

	# --- Feature extraction (future-proof) ---
	def extract_features_app(seq: str) -> pd.DataFrame:
	seq = seq.upper()

	# --- 1. Dipeptide composition ---
	count = Counter([seq[i:i+2] for i in range(len(seq)-1)])
	total = max(len(seq)-1, 1)
	dipep_features = {dp: count.get(dp, 0) / total for dp in dipeptides}

	# --- 2. Physicochemical features ---
	def g(aa, table): return table.get(aa, 0)
	def h(dp, table): return (g(dp[0], table) + g(dp[1], table)) / 2.0

	dipeptides_seq = [seq[i:i+2] for i in range(len(seq)-1)]

	if len(seq) < 2:
	physchem_features = {
	'mw': 0, 'charge': 0, 'hydro': 0, 'aromatic': 0, 'pI': 0,
	'instability': 0, 'hydro_moment': 0, 'aliphatic': 0,
	'boman': 0, 'flexibility': 0, 'polarizability': 0, 'deltag': 0
	}
	else:
	mw = np.mean([h(dp, aa_mass) for dp in dipeptides_seq])
	charge = np.mean([h(dp, aa_charge) for dp in dipeptides_seq])
	hydro = np.mean([h(dp, hydrophobicity_scale) for dp in dipeptides_seq])
	aromatic = np.mean([(dp[0] in 'FWY') + (dp[1] in 'FWY') for dp in dipeptides_seq]) / 2.0
	pI = np.mean([h(dp, {aa: 7 + (int(aa in 'KRH') - int(aa in 'DE')) for aa in aa_list}) for dp in dipeptides_seq])
	instability = np.mean([((dp[0] in 'DEKR') + (dp[1] in 'DEKR')) / 2.0 for dp in dipeptides_seq])
	hydro_moment = np.sqrt(np.mean([(h(dp, eisenberg_scale))**2 for dp in dipeptides_seq]))
	aliphatic = np.mean([h(dp, aa_aliphatic) for dp in dipeptides_seq])
	boman = np.mean([h(dp, aa_boman) for dp in dipeptides_seq])
	flexibility = np.mean([h(dp, aa_flexibility) for dp in dipeptides_seq])
	polarizability = np.mean([h(dp, aa_polarizability) for dp in dipeptides_seq])
	deltag = np.mean([h(dp, aa_deltaG) for dp in dipeptides_seq])

	physchem_features = {
	'mw': mw, 'charge': charge, 'hydro': hydro, 'aromatic': aromatic, 'pI': pI,
	'instability': instability, 'hydro_moment': hydro_moment, 'aliphatic': aliphatic,
	'boman': boman, 'flexibility': flexibility, 'polarizability': polarizability, 'deltag': deltag
	}

	# --- Combine features ---
	all_features = {dipep_features, physchem_features}

	# --- Align with feature_columns ---
	df = pd.DataFrame([[all_features.get(col, 0) for col in feature_columns]], columns=feature_columns)
	df = df.astype('float32')
	return df

	# --- Prediction function ---
	def predict_peptide(sequence: str):
	seq = "".join(sequence.split()).upper()
	if not seq:
	return []

	X = extract_features_app(seq)

	table = []
	for target in TARGET_CELLS:
	clf = model_dict.get(target)
	if clf is not None:
	prob = clf.predict_proba(X)[0][1] # positive-class probability
	table.append([target, round(float(prob), 4)])
	else:
	table.append([target, None])
	return table

	# --- Gradio Interface ---
	custom_css = """
	footer, .footer {display:none !important;}
	"""

	with gr.Blocks(css=custom_css, theme="default") as demo:
	gr.Markdown("## Peptide Antimicrobial Predictor\nEnter a peptide sequence to predict efficacy/toxicity.")

	seq_input = gr.Textbox(label="Enter Peptide Sequence")

	with gr.Row():
	predict_btn = gr.Button("Predict", variant="primary")
	clear_btn = gr.Button("Clear")

	table_output = gr.Dataframe(
	headers=["Target Cell", "Probability of Efficacy/Toxicity"],
	datatype=["str","number"],
	interactive=False
	)

	predict_btn.click(fn=predict_peptide, inputs=seq_input, outputs=table_output)
	clear_btn.click(fn=lambda: ("", []), outputs=[seq_input, table_output])

	# API endpoint for iOS app
	gr.api(predict_peptide, api_name="predict_peptide")

	if __name__ == "__main__":
	demo.launch(show_error=True)