Spaces:

shubham142000
/

UbiquitinAI

Sleeping

App Files Files Community

UbiquitinAI / prediction.py

shubham142000

Update prediction.py

3d44518 verified almost 2 years ago

raw

history blame contribute delete

1.86 kB

	import pandas as pd
	import joblib

	def predict_sequence_label(sequence):
	"""
	Predict the label for a given amino acid sequence using the saved Random Forest model.

	Parameters:
	sequence (str): A string representing the amino acid sequence.

	Returns:
	int: The predicted label (0 or 1).
	"""

	def compute_aac_features(sequence):
	"""
	Compute the Amino Acid Composition (AAC) features for a given sequence.

	Parameters:
	sequence (str): A string representing the amino acid sequence.

	Returns:
	pd.DataFrame: DataFrame containing the AAC features for the sequence.
	"""
	# Define the 20 standard amino acids
	amino_acids = 'ACDEFGHIKLMNPQRSTVWY'

	# Initialize a dictionary to hold the counts of each amino acid
	aac_counts = {f"AAC_{aa}": 0 for aa in amino_acids}

	# Calculate the length of the sequence
	seq_length = len(sequence)

	# Count the occurrences of each amino acid in the sequence
	for aa in sequence:
	if f"AAC_{aa}" in aac_counts:
	aac_counts[f"AAC_{aa}"] += 1

	# Convert counts to frequencies
	aac_features = {aa: count / seq_length for aa, count in aac_counts.items()}

	# Convert the AAC features to a DataFrame
	aac_features_df = pd.DataFrame([aac_features])

	return aac_features_df

	# Compute AAC features from the sequence
	aac_features_df = compute_aac_features(sequence)

	# Load the saved Random Forest model
	saved_model = joblib.load('model.joblib')

	# Predict using the loaded model
	prediction = saved_model.predict(aac_features_df)

	return prediction[0]

	# Example usage:
	# sequence = "YOUR_AMINO_ACID_SEQUENCE_HERE"
	# print(predict_sequence_label(sequence))