Spaces:
Running
Running
| import torch | |
| import torch.nn as nn | |
| from transformers import AutoModel | |
| from peft import get_peft_model, LoraConfig, TaskType | |
| class VanillaBaselineModel(nn.Module): | |
| """ | |
| A simple baseline deep learning model that takes the entire trajectory | |
| and predicts if a hallucination occurred. | |
| It does NOT use the AgentSight Context Encoder or Dual Heads. | |
| """ | |
| def __init__(self, encoder_name="microsoft/deberta-v3-base"): | |
| super().__init__() | |
| base_encoder = AutoModel.from_pretrained(encoder_name, torch_dtype=torch.float32) | |
| base_encoder.gradient_checkpointing_enable() | |
| # Apply LoRA to the base encoder | |
| peft_config = LoraConfig( | |
| task_type=TaskType.FEATURE_EXTRACTION, | |
| r=8, | |
| lora_alpha=32, | |
| lora_dropout=0.1, | |
| target_modules=["query_proj", "value_proj"] | |
| ) | |
| self.encoder = get_peft_model(base_encoder, peft_config) | |
| self.encoder.print_trainable_parameters() | |
| enc_dim = self.encoder.config.hidden_size | |
| # Simple binary classification head | |
| self.classifier = nn.Sequential( | |
| nn.Linear(enc_dim, 64), | |
| nn.ReLU(), | |
| nn.Dropout(0.1), | |
| nn.Linear(64, 1) # Raw logits for BCEWithLogitsLoss | |
| ) | |
| def forward(self, input_ids, attention_mask): | |
| # Forward pass through DeBERTa (N_steps, max_len) | |
| outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask) | |
| # Use the representation of the [CLS] token (the first token) for each step | |
| cls_repr = outputs.last_hidden_state[:, 0, :] # Shape: (N_steps, enc_dim) | |
| # MEAN POOLING across all steps to create a single Trajectory Representation | |
| # This guarantees gradients flow backwards through ALL steps, unlike max() pooling! | |
| traj_repr = cls_repr.mean(dim=0).unsqueeze(0) # Shape: (1, enc_dim) | |
| # Predict hallucination for the entire sequence | |
| logits = self.classifier(traj_repr).squeeze(-1) # Shape: (1,) | |
| return logits | |