Spaces:

hiyata
/

HostClassifier

Running

App Files Files Community

hiyata commited on Feb 27, 2025

Commit

287ec7d

verified ·

1 Parent(s): 0e88365

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -46

app.py CHANGED Viewed

@@ -85,61 +85,77 @@ def sequence_to_kmer_vector(sequence: str, k: int = 4) -> np.ndarray:
 # 3. SHAP-VALUE (ABLATION) CALCULATION
 ###############################################################################
-def calculate_shap_values(model, x_tensor, baseline=None, steps=50):
     """
-    Calculate feature attributions using Integrated Gradients.
     Args:
         model: A PyTorch model.
         x_tensor: Input tensor of shape (1, num_features).
-        baseline: Tensor of the same shape as x_tensor to use as the reference.
-                  If None, defaults to a tensor of zeros.
-        steps: Number of steps in the Riemann approximation of the integral.
     Returns:
-        attributions: A numpy array of shape (num_features,) with feature attributions.
-        full_prob: The model's predicted probability for the target class (human)
-                   when using the actual input.
     """
     model.eval()
-    if baseline is None:
-        baseline = torch.zeros_like(x_tensor)
-    # Compute the model's prediction for the full input.
-    with torch.no_grad():
-        full_output = model(x_tensor)
-        full_probs = torch.softmax(full_output, dim=1)
-        full_prob = full_probs[0, 1].item()  # Probability for 'human'
-    # Generate interpolated inputs between the baseline and the actual input.
-    scaled_inputs = [
-        baseline + (float(i) / steps) * (x_tensor - baseline)
-        for i in range(steps + 1)
-    ]
-    scaled_inputs = torch.cat(scaled_inputs, dim=0)  # Shape: (steps+1, num_features)
-    scaled_inputs.requires_grad = True
-    # Forward pass: compute model outputs for all interpolated inputs.
-    outputs = model(scaled_inputs)  # Shape: (steps+1, num_classes)
-    probs = torch.softmax(outputs, dim=1)[:, 1]  # Probability for 'human'
-    # Backward pass: compute gradients of the probability with respect to inputs.
-    grads = torch.autograd.grad(
-        outputs=probs,
-        inputs=scaled_inputs,
-        grad_outputs=torch.ones_like(probs),
-        create_graph=False,
-        retain_graph=False
-    )[0]  # Shape: (steps+1, num_features)
-    # Approximate the integral using the trapezoidal rule.
-    avg_grads = (grads[:-1] + grads[1:]) / 2.0  # Average gradient between steps.
-    integrated_grad = avg_grads.mean(dim=0, keepdim=True)  # Mean over all steps.
-    # Scale the integrated gradients by the difference between the input and baseline.
-    attributions = (x_tensor - baseline) * integrated_grad
-    return attributions.squeeze().cpu().numpy(), full_prob

 # 3. SHAP-VALUE (ABLATION) CALCULATION
 ###############################################################################
+def calculate_shap_values(model, x_tensor, baselines=None, steps=100, n_baselines=5):
     """
+    Calculate feature attributions using Integrated Gradients with multiple baselines.
     Args:
         model: A PyTorch model.
         x_tensor: Input tensor of shape (1, num_features).
+        baselines: A list of baseline tensors, each of shape (1, num_features).
+                   If None, defaults to n_baselines copies of the zero vector.
+        steps: Number of interpolation steps between the baseline and the input.
+        n_baselines: Number of baselines to use if baselines is None.
     Returns:
+        avg_attributions: A numpy array of shape (num_features,) with averaged feature attributions.
+        avg_full_prob: The model's predicted probability for the target class ('human')
+                       computed on the full input, averaged over baselines.
     """
     model.eval()
+    # If no baselines are provided, generate a list of zero-vectors.
+    if baselines is None:
+        baselines = [torch.zeros_like(x_tensor) for _ in range(n_baselines)]
+    elif not isinstance(baselines, list):
+        baselines = [baselines]
+    all_attributions = []
+    full_probs = []
+    # For each baseline, compute integrated gradients.
+    for baseline in baselines:
+        # Compute the model's full prediction using the actual input.
+        with torch.no_grad():
+            full_output = model(x_tensor)
+            full_prob = torch.softmax(full_output, dim=1)[0, 1].item()
+            full_probs.append(full_prob)
+        # Create interpolated inputs from baseline to x_tensor.
+        scaled_inputs = [
+            baseline + (float(i) / steps) * (x_tensor - baseline)
+            for i in range(steps + 1)
+        ]
+        scaled_inputs = torch.cat(scaled_inputs, dim=0)  # Shape: (steps+1, num_features)
+        scaled_inputs.requires_grad = True
+        # Forward pass: compute outputs and target class probabilities for all interpolated inputs.
+        outputs = model(scaled_inputs)
+        probs = torch.softmax(outputs, dim=1)[:, 1]  # Probabilities for 'human' class
+        # Backward pass: compute gradients of the probabilities with respect to inputs.
+        grads = torch.autograd.grad(
+            outputs=probs,
+            inputs=scaled_inputs,
+            grad_outputs=torch.ones_like(probs),
+            create_graph=False,
+            retain_graph=False
+        )[0]  # Shape: (steps+1, num_features)
+        # Approximate the integral using the trapezoidal rule.
+        avg_grads = (grads[:-1] + grads[1:]) / 2.0  # Average gradients between successive steps.
+        integrated_grad = avg_grads.mean(dim=0, keepdim=True)  # Mean over all steps.
+        # Multiply by the input difference to get attributions.
+        attributions = (x_tensor - baseline) * integrated_grad  # Shape: (1, num_features)
+        all_attributions.append(attributions)
+    # Average attributions over all baselines.
+    avg_attributions = torch.stack(all_attributions, dim=0).mean(dim=0)
+    avg_full_prob = np.mean(full_probs)
+    return avg_attributions.squeeze().cpu().detach().numpy(), avg_full_prob