Spaces:

PiKaHa
/

Rubisco_Kinetics-Prediction

Sleeping

App Files Files Community

PiKaHa commited on Nov 18, 2024

Commit

e372ce4

1 Parent(s): b3237f2

Update app.py with transformer embeddings and prediction pipeline

Browse files

Files changed (1) hide show

app.py +32 -24

app.py CHANGED Viewed

@@ -29,14 +29,9 @@ torch.backends.cudnn.benchmark = False
 def load_model(model_path):
     print(f"Loading model from {model_path}...")
-    #print(f"Loading model from {model_path} using TFSMLayer...")
-    #return TFSMLayer(model_path, call_endpoint="serving_default")
-    #return tf.keras.models.load_model(model_path)
     return tf.saved_model.load(model_path)
-# Load Random Forest models and configurations
 print("Loading models...")
 plant_models = {
     "Specificity": {"model": joblib.load("Specificity.pkl"), "esm_model": "facebook/esm1b_t33_650M_UR50S", "layer": 6},
@@ -66,7 +61,11 @@ def get_embedding(sequence, esm_model_name, layer):
         hidden_states = outputs.hidden_states  # Retrieve all hidden states
         embedding = hidden_states[layer].mean(dim=1).numpy()  # Average pooling
-    return embedding
 def predict_with_gpflow(model, X):
@@ -79,27 +78,35 @@ def predict_with_gpflow(model, X):
     # Return mean and variance as numpy arrays
     return mean.numpy().flatten(), variance.numpy().flatten()
-# Function to predict based on user choice
-def predict(sequence, prediction_type):
-    # Select the appropriate model set
-    selected_models = plant_models if prediction_type == "Plant-Specific" else general_models
-    def process_target(target):
-        esm_model_name = selected_models[target]["esm_model"]
-        layer = selected_models[target]["layer"]
-        model = selected_models[target]["model"]
-        # Generate embedding
-        embedding = get_embedding(sequence, esm_model_name, layer)
-        if prediction_type == "Plant-Specific":
-            # Random Forest prediction
-            prediction = model.predict(embedding)[0]
-            return target, round(prediction, 2)
-        else:
-            # GPflow prediction
-            mean, variance = predict_with_gpflow(model, embedding)
-            return target, round(mean[0], 2), round(variance[0], 2)
     # Predict for all targets in parallel
     with ThreadPoolExecutor() as executor:
@@ -121,6 +128,7 @@ def predict(sequence, prediction_type):
     return formatted_results
 # Define Gradio interface
 print("Creating Gradio interface...")
 interface = gr.Interface(

 def load_model(model_path):
     print(f"Loading model from {model_path}...")
     return tf.saved_model.load(model_path)
 print("Loading models...")
 plant_models = {
     "Specificity": {"model": joblib.load("Specificity.pkl"), "esm_model": "facebook/esm1b_t33_650M_UR50S", "layer": 6},
         hidden_states = outputs.hidden_states  # Retrieve all hidden states
         embedding = hidden_states[layer].mean(dim=1).numpy()  # Average pooling
+    # Convert to DataFrame with named columns
+    feature_columns = {f"D{i+1}": embedding[0, i] for i in range(embedding.shape[1])}
+    embedding_df = pd.DataFrame([feature_columns])
+    return embedding_df.values, embedding_df
 def predict_with_gpflow(model, X):
     # Return mean and variance as numpy arrays
     return mean.numpy().flatten(), variance.numpy().flatten()
+def process_target(target):
+    """
+    Process a single target for prediction using transformer embeddings and the specified model.
+    """
+    # Get model and embedding details
+    esm_model_name = selected_models[target]["esm_model"]
+    layer = selected_models[target]["layer"]
+    model = selected_models[target]["model"]
+    # Generate embeddings in the required format
+    embedding, _ = get_embedding(sequence, esm_model_name, layer)
+    if prediction_type == "Plant-Specific":
+        # Random Forest prediction
+        y_pred = model.predict(embedding)[0]
+        return target, round(y_pred, 2)
+    else:
+        # GPflow prediction
+        y_pred, y_uncertainty = predict_with_gpflow(model, embedding)
+        return target, round(y_pred[0], 2), round(y_uncertainty[0], 2)
+def predict(sequence, prediction_type):
+    """
+    Predicts Specificity, kcatC, and KC for the given sequence and prediction type.
+    """
+    # Select the appropriate model set
+    selected_models = plant_models if prediction_type == "Plant-Specific" else general_models
     # Predict for all targets in parallel
     with ThreadPoolExecutor() as executor:
     return formatted_results
 # Define Gradio interface
 print("Creating Gradio interface...")
 interface = gr.Interface(