Vera-ZWY
/

my_hydra_model

+{
+  "architectures": [
+    "HydraModel"
+  ],
+  "backbone_model_name": "answerdotai/ModernBERT-base",
+  "model_type": "hydra",
+  "num_of_head": 7,
+  "output_size": 1,
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.3"
+}

hydra_model.py ADDED Viewed

	@@ -0,0 +1,632 @@

+import torch
+import torch.nn as nn
+from transformers import PreTrainedModel, PretrainedConfig, AutoModel, AutoTokenizer
+from transformers import AutoModelForSequenceClassification
+from transformers.modeling_outputs import SequenceClassifierOutput
+from typing import Optional, List, Dict, Union, Tuple
+from huggingface_hub import HfApi
+import os
+import json
+class HydraConfig(PretrainedConfig):
+    """Configuration class for Hydra model."""
+    model_type = "hydra"
+    def __init__(
+        self,
+        backbone_model_name: str = "answerdotai/ModernBERT-base",
+        num_of_heads: int = 7,
+        hidden_size: int = 768,
+        output_size: int = 1,
+        label_dict: Dict[str, int] = None,
+        threshold: float = 0.5,
+        **kwargs
+    ):
+        super().__init__(**kwargs)
+        self.backbone_model_name = backbone_model_name
+        self.num_of_heads = num_of_heads
+        self.hidden_size = hidden_size
+        self.output_size = output_size
+        self.label_dict = label_dict if label_dict else {}
+        self.threshold = threshold
+# We'll use the standard SequenceClassifierOutput instead of a custom output class
+class HydraForSequenceClassification(PreTrainedModel):
+    """
+    Hydra model for sequence classification with multiple heads.
+    This model can be loaded with the `AutoModelForSequenceClassification` class.
+    """
+    config_class = HydraConfig
+    _auto_class = "AutoModelForSequenceClassification"
+    def __init__(self, config):
+        super().__init__(config)
+        self.config = config
+        # Load backbone
+        self.backbone = AutoModel.from_pretrained(config.backbone_model_name)
+        # Initialize the heads
+        self.heads = nn.ModuleList([
+            self.get_classifier(config.hidden_size, config.output_size)
+            for _ in range(config.num_of_heads)
+        ])
+        # Initialize weights
+        self.post_init()
+    def weights_init(self, m):
+        if isinstance(m, nn.Linear):
+            nn.init.kaiming_uniform_(m.weight.data)
+    def get_classifier(self, input_size, output_size):
+        mlp = nn.Sequential(
+            nn.Linear(in_features=input_size, out_features=input_size, bias=True),
+            nn.Linear(in_features=input_size, out_features=output_size, bias=True),
+        )
+        # Apply weight initialization
+        for module in mlp:
+            if isinstance(module, nn.Linear):
+                self.weights_init(module)
+        return mlp
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        head_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+    ):
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        # Get embeddings from backbone
+        outputs = self.backbone(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask,
+            inputs_embeds=inputs_embeds,
+            output_attentions=output_attentions,
+            output_hidden_states=output_hidden_states,
+            return_dict=return_dict
+        )
+        # Mean pooling
+        token_embeddings = outputs[0]
+        mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size())
+        sum_embeddings = torch.sum(token_embeddings * mask_expanded, 1)
+        sum_mask = torch.sum(mask_expanded, 1)
+        mean_embeddings = sum_embeddings / sum_mask
+        # Apply each head
+        head_outputs = [head(mean_embeddings) for head in self.heads]
+        logits = torch.cat(head_outputs, dim=-1)
+        # Calculate loss if labels provided
+        loss = None
+        if labels is not None:
+            # You would implement your loss function here
+            # For now, we'll just use a placeholder
+            loss = torch.tensor(0.0)
+        # Handle return format
+        if not return_dict:
+            output = (logits,)
+            if loss is not None:
+                output = (loss,) + output
+            return output + (outputs.hidden_states if hasattr(outputs, "hidden_states") else None,)
+        return SequenceClassifierOutput(
+            loss=loss,
+            logits=logits,
+            hidden_states=outputs.hidden_states if hasattr(outputs, "hidden_states") else None,
+            attentions=outputs.attentions if hasattr(outputs, "attentions") else None,
+        )
+    @classmethod
+    def convert_checkpoint_to_hf_model(cls,
+                                   checkpoint_path,
+                                   backbone_model_name="answerdotai/ModernBERT-base",
+                                   label_dict=None,
+                                   threshold=0.5,
+                                   save_directory=None):
+        """
+        Convert a checkpoint to a Hugging Face model.
+        Args:
+            checkpoint_path: Path to the checkpoint file
+            backbone_model_name: Name of the backbone model
+            label_dict: Dictionary mapping labels to indices
+            threshold: Threshold for classification
+            save_directory: Directory to save the model
+        Returns:
+            HydraForSequenceClassification: The converted model
+        """
+        # Load the checkpoint
+        checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'))
+        # Get backbone information
+        backbone = AutoModel.from_pretrained(backbone_model_name)
+        hidden_size = backbone.config.hidden_size
+        # Create config
+        config = HydraConfig(
+            backbone_model_name=backbone_model_name,
+            num_of_heads=len(label_dict) if label_dict else 1,
+            hidden_size=hidden_size,
+            output_size=1,
+            label_dict=label_dict,
+            threshold=threshold
+        )
+        # Create model with this config
+        model = cls(config)
+        # Load state dict
+        model.load_state_dict(checkpoint)
+        # Save if directory provided
+        if save_directory:
+            # Save model
+            model.save_pretrained(save_directory)
+            # Save tokenizer
+            tokenizer = AutoTokenizer.from_pretrained(backbone_model_name)
+            tokenizer.save_pretrained(save_directory)
+            # Save label dictionary in a special file
+            if label_dict:
+                with open(os.path.join(save_directory, "label_dict.json"), "w") as f:
+                    json.dump(label_dict, f)
+        return model
+    def get_labels_from_logits(self, logits):
+        """
+        Convert logits to labels based on threshold.
+        Args:
+            logits: Tensor of shape (batch_size, num_labels)
+        Returns:
+            list: List of predicted labels for each sample
+        """
+        # Convert logits to probabilities
+        probabilities = torch.sigmoid(logits)
+        # Convert to binary predictions using threshold
+        predictions = (probabilities >= self.config.threshold).int()
+        # Map predictions to labels
+        predicted_labels = []
+        for i in range(predictions.shape[0]):
+            sample_labels = [
+                label for label, idx in self.config.label_dict.items()
+                if predictions[i, idx] == 1
+            ]
+            # Handle special cases based on the model type
+            if len(sample_labels) == 0:
+                # Look for the "None" or "Not" label based on whether we have Emotion/Anxiety/Anger models
+                for none_label in ["Emotionless", "Not Anxiety", "No Anger", "Not Anger"]:
+                    if none_label in self.config.label_dict:
+                        sample_labels.append(none_label)
+                        break
+            elif len(sample_labels) > 1:
+                # Remove the "None" label if multiple labels are predicted
+                for none_label in ["Emotionless", "Not Anxiety", "No Anger", "Not Anger"]:
+                    if none_label in sample_labels:
+                        sample_labels.remove(none_label)
+                        break
+            predicted_labels.append(sample_labels)
+        return predicted_labels
+# Register Hydra with AutoModelForSequenceClassification
+# Use the simpler registration method
+from transformers.models.auto.configuration_auto import CONFIG_MAPPING
+from transformers.models.auto.modeling_auto import MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING
+CONFIG_MAPPING.register("hydra", HydraConfig)
+MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING.register(HydraConfig, HydraForSequenceClassification)
+def convert_and_push_models_to_hub(
+    repo_id,
+    ekman_filename,
+    anxiety_filename,
+    staxi_filename,
+    anger_filename,
+    access_token
+):
+    """
+    Convert all checkpoint models and push them to the Hub
+    """
+    # Define label dictionaries
+    ekman_label_dict = {
+        "Anger": 0, "Disgust": 1, "Fear": 2, "Happiness": 3,
+        "Sadness": 4, "Surprise": 5, "Emotionless": 6
+    }
+    anxiety_label_dict = {
+        "GAD": 0, "Panic Disorder": 1, "Social Anxiety Disorder": 2,
+        "Specific Phobias": 3, "Agoraphobia": 4, "Separation Anxiety Disorder": 5,
+        "Selective Mutism": 6, "Not Anxiety": 7
+    }
+    staxi_label_dict = {
+        "State Anger": 0, "Trait Anger": 1, "Anger Expression-Out": 2,
+        "Anger Expression-In": 3, "Anger Control-Out": 4, "Anger Control-In": 5,
+        "No Anger": 6
+    }
+    anger_label_dict = {
+        "Passive Anger": 0, "Volatile Anger": 1, "Fear-Based Anger": 2,
+        "Frustration-Based Anger": 3, "Pain-Based Anger": 4, "Chronic Anger": 5,
+        "Manipulative Anger": 6, "Overwhelmed Anger": 7, "Physiological Anger": 8,
+        "Righteous Anger": 9, "Not Anger": 10
+    }
+    # Define thresholds
+    ekman_threshold = 0.5
+    anxiety_threshold = 0.4
+    staxi_threshold = 0.4
+    anger_threshold = 0.4
+    # Download checkpoints from original repo
+    from huggingface_hub import hf_hub_download
+    ekman_path = hf_hub_download(repo_id=repo_id, filename=ekman_filename, token=access_token)
+    anxiety_path = hf_hub_download(repo_id=repo_id, filename=anxiety_filename, token=access_token)
+    staxi_path = hf_hub_download(repo_id=repo_id, filename=staxi_filename, token=access_token)
+    anger_path = hf_hub_download(repo_id=repo_id, filename=anger_filename, token=access_token)
+    # New repo IDs for the models
+    username = repo_id.split('/')[0]  # Assuming repo_id is in format "username/repo-name"
+    ekman_repo = f"{username}/hydra-ekman-emotions"
+    anxiety_repo = f"{username}/hydra-anxiety-disorders"
+    staxi_repo = f"{username}/hydra-staxi-anger"
+    anger_repo = f"{username}/hydra-anger-types"
+    # Convert and push each model
+    api = HfApi()
+    # Create temporary directories for the models
+    import tempfile
+    import shutil
+    # Ekman model
+    ekman_dir = tempfile.mkdtemp()
+    ekman_model = HydraForSequenceClassification.convert_checkpoint_to_hf_model(
+        ekman_path,
+        label_dict=ekman_label_dict,
+        threshold=ekman_threshold,
+        save_directory=ekman_dir
+    )
+    # Create a model card for the Ekman model
+    with open(os.path.join(ekman_dir, "README.md"), "w") as f:
+        f.write(f"""# Hydra Ekman Emotions Model
+This model identifies Ekman's 6 basic emotions plus "Emotionless" in text.
+## Model Details
+- **Model Type:** Hydra (Multi-headed classification model)
+- **Backbone:** ModernBERT
+- **Labels:** {list(ekman_label_dict.keys())}
+- **Threshold:** {ekman_threshold}
+## Usage
+```python
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("{ekman_repo}")
+model = AutoModelForSequenceClassification.from_pretrained("{ekman_repo}")
+# Preprocess text
+text = "I'm feeling really happy today!"
+inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
+# Get predictions
+outputs = model(**inputs)
+logits = outputs.logits
+# Get labels (using the helper function)
+predicted_labels = model.get_labels_from_logits(logits)
+print(f"Predicted emotions: {', '.join(predicted_labels[0])}")
+```
+## License
+This model is available for research and commercial use.
+""")
+    # Push to Hub
+    api.create_repo(ekman_repo, exist_ok=True)
+    api.upload_folder(
+        folder_path=ekman_dir,
+        repo_id=ekman_repo,
+        token=access_token
+    )
+    # Cleanup
+    shutil.rmtree(ekman_dir)
+    # Repeat for other models (similar process)
+    # Anxiety model
+    anxiety_dir = tempfile.mkdtemp()
+    anxiety_model = HydraForSequenceClassification.convert_checkpoint_to_hf_model(
+        anxiety_path,
+        label_dict=anxiety_label_dict,
+        threshold=anxiety_threshold,
+        save_directory=anxiety_dir
+    )
+    # Create model card
+    with open(os.path.join(anxiety_dir, "README.md"), "w") as f:
+        f.write(f"""# Hydra Anxiety Disorders Model
+This model identifies different types of anxiety disorders in text.
+## Model Details
+- **Model Type:** Hydra (Multi-headed classification model)
+- **Backbone:** ModernBERT
+- **Labels:** {list(anxiety_label_dict.keys())}
+- **Threshold:** {anxiety_threshold}
+## Usage
+```python
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("{anxiety_repo}")
+model = AutoModelForSequenceClassification.from_pretrained("{anxiety_repo}")
+# Example usage code
+# ...
+```
+## License
+This model is available for research and commercial use.
+""")
+    # Push to Hub
+    api.create_repo(anxiety_repo, exist_ok=True)
+    api.upload_folder(
+        folder_path=anxiety_dir,
+        repo_id=anxiety_repo,
+        token=access_token
+    )
+    # Cleanup
+    shutil.rmtree(anxiety_dir)
+    # STAXI model
+    staxi_dir = tempfile.mkdtemp()
+    staxi_model = HydraForSequenceClassification.convert_checkpoint_to_hf_model(
+        staxi_path,
+        label_dict=staxi_label_dict,
+        threshold=staxi_threshold,
+        save_directory=staxi_dir
+    )
+    # Create model card
+    with open(os.path.join(staxi_dir, "README.md"), "w") as f:
+        f.write(f"""# Hydra STAXI Anger Model
+This model identifies different types of anger based on the STAXI framework.
+## Model Details
+- **Model Type:** Hydra (Multi-headed classification model)
+- **Backbone:** ModernBERT
+- **Labels:** {list(staxi_label_dict.keys())}
+- **Threshold:** {staxi_threshold}
+## Usage
+```python
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("{staxi_repo}")
+model = AutoModelForSequenceClassification.from_pretrained("{staxi_repo}")
+# Example usage code
+# ...
+```
+## License
+This model is available for research and commercial use.
+""")
+    # Push to Hub
+    api.create_repo(staxi_repo, exist_ok=True)
+    api.upload_folder(
+        folder_path=staxi_dir,
+        repo_id=staxi_repo,
+        token=access_token
+    )
+    # Cleanup
+    shutil.rmtree(staxi_dir)
+    # Anger model
+    anger_dir = tempfile.mkdtemp()
+    anger_model = HydraForSequenceClassification.convert_checkpoint_to_hf_model(
+        anger_path,
+        label_dict=anger_label_dict,
+        threshold=anger_threshold,
+        save_directory=anger_dir
+    )
+    # Create model card
+    with open(os.path.join(anger_dir, "README.md"), "w") as f:
+        f.write(f"""# Hydra Anger Types Model
+This model identifies different types of anger expressions in text.
+## Model Details
+- **Model Type:** Hydra (Multi-headed classification model)
+- **Backbone:** ModernBERT
+- **Labels:** {list(anger_label_dict.keys())}
+- **Threshold:** {anger_threshold}
+## Usage
+```python
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("{anger_repo}")
+model = AutoModelForSequenceClassification.from_pretrained("{anger_repo}")
+# Example usage code
+# ...
+```
+## License
+This model is available for research and commercial use.
+""")
+    # Push to Hub
+    api.create_repo(anger_repo, exist_ok=True)
+    api.upload_folder(
+        folder_path=anger_dir,
+        repo_id=anger_repo,
+        token=access_token
+    )
+    # Cleanup
+    shutil.rmtree(anger_dir)
+    # Return the repo names for reference
+    return {
+        "ekman_model": ekman_repo,
+        "anxiety_model": anxiety_repo,
+        "staxi_model": staxi_repo,
+        "anger_model": anger_repo
+    }
+# Example helper function to use with the standard Hugging Face models
+def classify_text(model_name, text):
+    """
+    Classify text using a standard Hugging Face model loading pattern.
+    Args:
+        model_name: Name of the model on Hugging Face
+        text: Text to classify
+    Returns:
+        list: Predicted labels
+    """
+    # Load model and tokenizer using Auto classes
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSequenceClassification.from_pretrained(model_name)
+    # Preprocess the input text
+    encoded_input = tokenizer(
+        text,
+        padding="max_length",
+        truncation=True,
+        max_length=1024,
+        return_tensors="pt"
+    )
+    # Set model to evaluation mode
+    model.eval()
+    # Run inference
+    with torch.no_grad():
+        outputs = model(**encoded_input)
+        logits = outputs.logits
+        # Get predicted labels
+        predicted_labels = model.get_labels_from_logits(logits)
+    return predicted_labels[0]  # Return first sample's labels
+# Example of how to process a batch using standard HF patterns
+def process_dataframe(df, model_name, text_column1, text_column2=None):
+    """
+    Process a DataFrame with a standard Hugging Face model.
+    Args:
+        df: DataFrame to process
+        model_name: Name of the model on Hugging Face
+        text_column1: Name of the first text column
+        text_column2: Name of the second text column (optional)
+    Returns:
+        list: List of labels for each row
+    """
+    # Load model and tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSequenceClassification.from_pretrained(model_name)
+    results = []
+    for _, row in df.iterrows():
+        # Skip rows with missing values
+        if pd.isnull(row[text_column1]) or (text_column2 and pd.isnull(row[text_column2])):
+            results.append(None)
+            continue
+        # Prepare text input
+        if text_column2:
+            text = f"{row[text_column1]} [SEP] {row[text_column2]}"
+        else:
+            text = row[text_column1]
+        # Skip special tokens
+        if text_column2 and row[text_column2] in ["[removed]", "[deleted]"]:
+            results.append(None)
+            continue
+        # Classify text
+        encoded_input = tokenizer(
+            text,
+            padding="max_length",
+            truncation=True,
+            max_length=1024,
+            return_tensors="pt"
+        )
+        # Run inference
+        model.eval()
+        with torch.no_grad():
+            outputs = model(**encoded_input)
+            logits = outputs.logits
+            predicted_labels = model.get_labels_from_logits(logits)
+            results.append(", ".join(predicted_labels[0]))
+    return results