Spaces:

Fredaaaaaa
/

newww

Runtime error

App Files Files Community

newww / app.py

Fredaaaaaa

Update app.py

a05f3a1 verified 8 months ago

raw

history blame contribute delete

4.95 kB

	import joblib
	import numpy as np
	from pubchempy import get_compounds
	from rdkit import Chem
	from rdkit.Chem import AllChem
	import gradio as gr

	print("Loading model and preprocessors...")
	try:
	# Load saved model and preprocessors
	model = joblib.load('random_forest_model.joblib')
	scaler = joblib.load('standard_scaler.joblib')
	le = joblib.load('label_encoder.joblib')
	print(f"Model loaded successfully. Type: {type(model).__name__}")
	except Exception as e:
	print(f"Error loading model: {e}")
	raise

	# Define numerical columns from training (match exactly)
	numerical_cols = [
	'molecular_weight', 'molecular_weight_2', 'xlogp', 'xlogp_2', 'tpsa', 'tpsa_2',
	'rotatable_bond_count', 'rotatable_bond_count_2', 'h_bond_donor_count', 'h_bond_donor_count_2',
	'h_bond_acceptor_count', 'h_bond_acceptor_count_2', 'complexity', 'complexity_2',
	'charge', 'charge_2', 'exact_mass', 'exact_mass_2'
	]

	# Preprocessing function
	def get_morgan_fingerprint(smiles, radius=2, n_bits=512):
	try:
	mol = Chem.MolFromSmiles(smiles)
	if mol is None:
	return np.zeros(n_bits)
	fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
	return np.array(fp)
	except:
	return np.zeros(n_bits)

	# Function to extract features from PubChem
	def extract_features(drug1_name, drug2_name):
	# Fetch compounds from PubChem
	compounds1 = get_compounds(drug1_name, 'name')
	compounds2 = get_compounds(drug2_name, 'name')
	if not compounds1 or not compounds2:
	return None, f"One or both drugs not found: {drug1_name}, {drug2_name}"

	compound1 = compounds1[0] # Take the first match
	compound2 = compounds2[0]

	# Extract PubChem properties
	props1 = {
	'molecular_weight': compound1.molecular_weight if compound1.molecular_weight else 0,
	'xlogp': compound1.xlogp if compound1.xlogp else 0,
	'tpsa': compound1.tpsa if compound1.tpsa else 0,
	'rotatable_bond_count': compound1.rotatable_bond_count if compound1.rotatable_bond_count else 0,
	'h_bond_donor_count': compound1.h_bond_donor_count if compound1.h_bond_donor_count else 0,
	'h_bond_acceptor_count': compound1.h_bond_acceptor_count if compound1.h_bond_acceptor_count else 0,
	'complexity': compound1.complexity if compound1.complexity else 0,
	'charge': 0, # PubChem doesn't provide direct charge, assume 0
	'exact_mass': compound1.exact_mass if compound1.exact_mass else 0
	}
	props2 = {
	'molecular_weight_2': compound2.molecular_weight if compound2.molecular_weight else 0,
	'xlogp_2': compound2.xlogp if compound2.xlogp else 0,
	'tpsa_2': compound2.tpsa if compound2.tpsa else 0,
	'rotatable_bond_count_2': compound2.rotatable_bond_count if compound2.rotatable_bond_count else 0,
	'h_bond_donor_count_2': compound2.h_bond_donor_count if compound2.h_bond_donor_count else 0,
	'h_bond_acceptor_count_2': compound2.h_bond_acceptor_count if compound2.h_bond_acceptor_count else 0,
	'complexity_2': compound2.complexity if compound2.complexity else 0,
	'charge_2': 0, # Assume 0
	'exact_mass_2': compound2.exact_mass if compound2.exact_mass else 0
	}

	# Combine properties into a single feature vector
	features = [props1.get(col, 0) for col in numerical_cols[:9]] + [props2.get(col, 0) for col in numerical_cols[9:]]

	# Get SMILES for fingerprints
	smiles1 = compound1.canonical_smiles
	smiles2 = compound2.canonical_smiles
	fp1 = get_morgan_fingerprint(smiles1)
	fp2 = get_morgan_fingerprint(smiles2)

	# Combine all features with padding for BioBERT (768 dimensions)
	X = np.hstack([np.array(features).reshape(1, -1), fp1.reshape(1, -1), fp2.reshape(1, -1), np.zeros((1, 768))])

	return X, None, smiles1, smiles2

	# Function to predict severity
	def predict_severity(drug1, drug2):
	# Fetch drug features from PubChem
	X, error, smiles1, smiles2 = extract_features(drug1, drug2)
	if error:
	return error

	# Scale and predict
	X_scaled = scaler.transform(X)
	prediction = model.predict(X_scaled)
	severity = le.inverse_transform(prediction)[0]
	probabilities = model.predict_proba(X_scaled)[0]

	# Format output with SMILES
	result = f"Predicted Severity: {severity}\n"
	result += f"Drug 1 SMILES: {smiles1}\n"
	result += f"Drug 2 SMILES: {smiles2}\n"
	result += "Prediction Probabilities:\n"
	for i, label in enumerate(le.classes_):
	result += f" {label}: {probabilities[i]:.2%}\n"
	return result

	# Gradio Interface
	interface = gr.Interface(
	fn=predict_severity,
	inputs=[gr.Textbox(label="Drug 1"), gr.Textbox(label="Drug 2")],
	outputs="text",
	live=True,
	title="Drug Interaction Severity Predictor",
	description="Enter two drug names to predict the severity of their interaction."
	)

	# Launch the interface
	interface.launch()