Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import joblib | |
| def predict_sequence_label(sequence): | |
| """ | |
| Predict the label for a given amino acid sequence using the saved Random Forest model. | |
| Parameters: | |
| sequence (str): A string representing the amino acid sequence. | |
| Returns: | |
| int: The predicted label (0 or 1). | |
| """ | |
| def compute_aac_features(sequence): | |
| """ | |
| Compute the Amino Acid Composition (AAC) features for a given sequence. | |
| Parameters: | |
| sequence (str): A string representing the amino acid sequence. | |
| Returns: | |
| pd.DataFrame: DataFrame containing the AAC features for the sequence. | |
| """ | |
| # Define the 20 standard amino acids | |
| amino_acids = 'ACDEFGHIKLMNPQRSTVWY' | |
| # Initialize a dictionary to hold the counts of each amino acid | |
| aac_counts = {f"AAC_{aa}": 0 for aa in amino_acids} | |
| # Calculate the length of the sequence | |
| seq_length = len(sequence) | |
| # Count the occurrences of each amino acid in the sequence | |
| for aa in sequence: | |
| if f"AAC_{aa}" in aac_counts: | |
| aac_counts[f"AAC_{aa}"] += 1 | |
| # Convert counts to frequencies | |
| aac_features = {aa: count / seq_length for aa, count in aac_counts.items()} | |
| # Convert the AAC features to a DataFrame | |
| aac_features_df = pd.DataFrame([aac_features]) | |
| return aac_features_df | |
| # Compute AAC features from the sequence | |
| aac_features_df = compute_aac_features(sequence) | |
| # Load the saved Random Forest model | |
| saved_model = joblib.load('model.joblib') | |
| # Predict using the loaded model | |
| prediction = saved_model.predict(aac_features_df) | |
| return prediction[0] | |
| # Example usage: | |
| # sequence = "YOUR_AMINO_ACID_SEQUENCE_HERE" | |
| # print(predict_sequence_label(sequence)) | |