Initial upload

by marcovise - opened Nov 4, 2025

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+61416

-0

Files changed (8) hide show

__pycache__/labels_mapping.cpython-310.pyc +0 -0
config.json +54 -0
labels_mapping.py +70 -0
pytorch_model_qint8.bin +3 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
vocab.txt +0 -0

__pycache__/labels_mapping.cpython-310.pyc ADDED Viewed

Binary file (3.2 kB). View file

config.json ADDED Viewed

	@@ -0,0 +1,54 @@

+{
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "dtype": "float32",
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4",
+    "5": "LABEL_5",
+    "6": "LABEL_6",
+    "7": "LABEL_7",
+    "8": "LABEL_8",
+    "9": "LABEL_9",
+    "10": "LABEL_10",
+    "11": "LABEL_11",
+    "12": "LABEL_12"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_10": 10,
+    "LABEL_11": 11,
+    "LABEL_12": 12,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4,
+    "LABEL_5": 5,
+    "LABEL_6": 6,
+    "LABEL_7": 7,
+    "LABEL_8": 8,
+    "LABEL_9": 9
+  },
+  "max_position_embeddings": 1024,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.53.2",
+  "vocab_size": 30522
+}

labels_mapping.py ADDED Viewed

	@@ -0,0 +1,70 @@

+INTENT_CATEGORIES_LIST = [
+    "academic_help",
+    "personal_writing_or_communication",
+    "writing_and_editing",
+    "creative_writing_and_role_play",
+    "general_guidance_and_info",
+    "programming_and_data_analysis",
+    "creative_ideation",
+    "purchasable_products",
+    "greetings_and_chitchat",
+    "relationships_and_personal_reflection",
+    "media_generation_or_analysis",
+    "other",
+    "other_obscene_or_illegal"
+]
+INTENT_CATEGORIES_MAPPING = {
+    "A": {
+        "name": "academic_help",
+        "description": "Students getting help with homework, assignments, tests, or studying. Key indicators: multiple problems/questions in a row, test/quiz format (multiple choice, true/false, select correct answer), textbook-style questions, requests for step-by-step solutions or translations, academic subject matter (math, science, world languages, history, etc.) in a learning context, asking for explanations of academic concepts. Use this even if not explicitly stated as homework"
+    },
+    "B": {
+        "name": "personal_writing_or_communication",
+        "description": "Draft, edit, or improve personal/professional emails, messages, social media posts, letters, or workplace communications. The focus is on REAL correspondence to actual people (boss, colleague, client, friend)"
+    },
+    "C": {
+        "name": "writing_and_editing",
+        "description": "Create, edit, or improve nonfiction or instructional writing: essays, reports, arguments, articles, blog posts, or educational materials (lesson plans, assignments, summaries). If the focus is logic, structure, or conveying factual information, consider using this category."
+    },
+    "D": {
+        "name": "creative_writing_and_role_play",
+        "description": "Create poems, stories, fictional narratives, scripts, dialogues, or character-based roleplays. Look for tone, emotion, or imaginative context.If the writing involves characters, world-building, roleplay, sci-fi or fantasy, or other storytelling, consider using this category."
+    },
+    "E": {
+        "name": "general_guidance_and_info",
+        "description": "Provide step-by-step guidance, practical advice, or factual information about how or why something works. Combines procedural 'how-to' help with general knowledge or curiosity."
+    },
+    "F": {
+        "name": "programming_and_data_analysis",
+        "description": "Write or debug code or work with data/programming tools. Covers technical problem solving in computing, IT, or analytics contexts."
+    },
+    "G": {
+        "name": "creative_ideation",
+        "description": "Generate new ideas, brainstorm concepts, discover new topics or related resources, or create names/slogans."
+    },
+    "H": {
+        "name": "purchasable_products",
+        "description": "Ask about products, services, or prices."
+    },
+    "I": {
+        "name": "greetings_and_chitchat",
+        "description": "Small talk or casual chat, asking about the assistant's day."
+    },
+    "J": {
+        "name": "relationships_and_personal_reflection",
+        "description": "Discuss emotions, relationships, or introspection. Typically but not strictly non-sexual content."
+    },
+    "K": {
+        "name": "media_generation_or_analysis",
+        "description": "Create, edit, analyze, or retrieve visual/audio/media content (images, photos, videos)."
+    },
+    "L": {
+        "name": "other",
+        "description": "If there is no indication of what the user wants or if there is an intent that is not listed above; should be rare. e.g. suspicious requests, attempts to extract sensitive information."
+    },
+    "M": {
+        "name": "other_obscene_or_illegal",
+        "description": "If the user is making obscene or illegal requests (including violence, drugs, bigotry, hate speech, etc); should be rare."
+    }
+}

pytorch_model_qint8.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f694b8711d28c1725c5e0ea2e691e93791d5a1c5ae72e82227f54b20cd45a8ce
+size 140296810

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff