Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

README.md +84 -0
config.json +26 -0
demo.py +56 -0
model.safetensors +3 -0
special_tokens_map.json +7 -0
tokenizer.json +0 -0
tokenizer_config.json +56 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,84 @@

+---
+language:
+- en
+tags:
+- text-classification
+- edtech
+- feedback-validation
+- bert
+- pytorch
+license: mit
+datasets:
+- custom-edtech-feedback
+metrics:
+- accuracy
+- precision
+- recall
+- f1
+---
+# EdTech Feedback Validation Model
+## Model Description
+This model is designed to validate user feedback in EdTech applications by determining whether a given feedback text aligns with a selected reason. It uses a BERT-based architecture for text pair classification.
+## Intended Uses & Limitations
+### Primary Use Case
+- Validating user feedback in educational technology applications
+- Ensuring feedback text aligns with predefined reason categories
+- Improving user experience by providing accurate feedback categorization
+### Limitations
+- Trained on English text only
+- Requires both feedback text and reason text as input
+- Binary classification (aligned/not aligned)
+## Training and Evaluation Data
+The model was trained on a custom dataset containing:
+- Training samples: 2,061 feedback-reason pairs
+- Evaluation samples: 9,000 feedback-reason pairs
+- All training samples were positive (aligned) examples
+- Evaluation set contains both positive and negative examples
+## Usage
+```python
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import torch
+# Load model and tokenizer
+model_name = "your-username/edtech-feedback-validation"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSequenceClassification.from_pretrained(model_name)
+# Example usage
+text = "this is an amazing app for online classes!"
+reason = "good app for conducting online classes"
+# Tokenize inputs
+inputs = tokenizer(text, reason, return_tensors="pt", padding=True, truncation=True)
+# Get prediction
+with torch.no_grad():
+    outputs = model(**inputs)
+    probabilities = torch.softmax(outputs.logits, dim=1)
+    prediction = torch.argmax(probabilities, dim=1).item()
+    confidence = probabilities[0][prediction].item()
+print(f"Prediction: {prediction} (Aligned: {prediction == 1})")
+print(f"Confidence: {confidence:.3f}")
+```
+## Model Architecture
+- Base Model: BERT (bert-base-uncased)
+- Task: Text Pair Classification
+- Output: Binary classification (0: Not Aligned, 1: Aligned)
+- Training Framework: PyTorch
+## License
+This model is released under the MIT License.

config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "dtype": "float32",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "transformers_version": "4.56.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

demo.py ADDED Viewed

	@@ -0,0 +1,56 @@

+#!/usr/bin/env python3
+"""
+Demo script for EdTech Feedback Validation Model
+"""
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+def load_model(model_name):
+    """Load the model and tokenizer"""
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForSequenceClassification.from_pretrained(model_name)
+    return tokenizer, model
+def predict_alignment(text, reason, tokenizer, model):
+    """Predict whether text aligns with reason"""
+    # Tokenize inputs
+    inputs = tokenizer(
+        text,
+        reason,
+        return_tensors="pt",
+        padding=True,
+        truncation=True,
+        max_length=512
+    )
+    # Get prediction
+    with torch.no_grad():
+        outputs = model(**inputs)
+        probabilities = torch.softmax(outputs.logits, dim=1)
+        prediction = torch.argmax(probabilities, dim=1).item()
+        confidence = probabilities[0][prediction].item()
+    return prediction, confidence
+if __name__ == "__main__":
+    # Example usage
+    model_name = "your-username/edtech-feedback-validation"
+    # Load model
+    tokenizer, model = load_model(model_name)
+    # Test examples
+    test_cases = [
+        ("this is an amazing app for online classes!", "good app for conducting online classes"),
+        ("i cannot login to zoom", "help"),
+        ("very practical and easy to use", "app is user-friendly")
+    ]
+    for text, reason in test_cases:
+        prediction, confidence = predict_alignment(text, reason, tokenizer, model)
+        result = "ALIGNED" if prediction == 1 else "NOT ALIGNED"
+        print(f"Text: {text}")
+        print(f"Reason: {reason}")
+        print(f"Result: {result} (Confidence: {confidence:.3f})")
+        print("-" * 50)

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ddf5c176d5141cee3262e645655e2ee5a7653e71a8f522b80e3c6703233b048f
+size 437958648

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff