AcBART2 / config.json
IsField's picture
Upload folder using huggingface_hub
cea672f verified
{
"_name_or_path": "/mnt/data4/luyiheng/BARTfinetune/lr_5e-05/checkpoint-195",
"_num_labels": 3,
"activation_dropout": 0.0,
"activation_function": "gelu",
"add_final_layer_norm": false,
"architectures": [
"BartForSequenceClassification"
],
"attention_dropout": 0.0,
"bos_token_id": 0,
"classif_dropout": 0.0,
"classifier_dropout": 0.0,
"d_model": 1024,
"decoder_attention_heads": 16,
"decoder_ffn_dim": 4096,
"decoder_layerdrop": 0.0,
"decoder_layers": 12,
"decoder_start_token_id": 2,
"dropout": 0.1,
"encoder_attention_heads": 16,
"encoder_ffn_dim": 4096,
"encoder_layerdrop": 0.0,
"encoder_layers": 12,
"eos_token_id": 2,
"forced_eos_token_id": 2,
"gradient_checkpointing": false,
"id2label": {
"0": "Accessibility",
"1": "Architectures",
"2": "Artificial intelligence",
"3": "Arts and humanities",
"4": "Collaborative and social computing",
"5": "Communication hardware, interfaces and storage",
"6": "Computational complexity and cryptography",
"7": "Computers in other domains",
"8": "Continuous mathematics",
"9": "Cross-computing tools and techniques",
"10": "Cryptography",
"11": "Data management systems",
"12": "Database and storage security",
"13": "Dependable and fault-tolerant systems and networks",
"14": "Design and analysis of algorithms",
"15": "Discrete mathematics",
"16": "Distributed computing methodologies",
"17": "Document management and text processing",
"18": "Document types",
"19": "Education",
"20": "Electronic commerce",
"21": "Electronic design automation",
"22": "Embedded and cyber-physical systems",
"23": "Emerging technologies",
"24": "Enterprise computing",
"25": "Formal languages and automata theory",
"26": "Formal methods and theory of security",
"27": "Hardware test",
"28": "Hardware validation",
"29": "Human and societal aspects of security and privacy",
"30": "Human computer interaction (HCI)",
"31": "Information retrieval",
"32": "Information storage systems",
"33": "Information systems applications",
"34": "Information theory",
"35": "Integrated circuits",
"36": "Interaction design",
"37": "Intrusion/anomaly detection and malware mitigation",
"38": "Logic",
"39": "Machine learning",
"40": "Mathematical analysis",
"41": "Mathematical software",
"42": "Modeling and simulation",
"43": "Models of computation",
"44": "Network algorithms",
"45": "Network architectures",
"46": "Network components",
"47": "Network performance evaluation",
"48": "Network properties",
"49": "Network protocols",
"50": "Network security",
"51": "Network services",
"52": "Network types",
"53": "Parallel computing methodologies",
"54": "Power and energy",
"55": "Probability and statistics",
"56": "Randomness, geometry and discrete structures",
"57": "Real-time systems",
"58": "Robustness",
"59": "Security in hardware",
"60": "Security services",
"61": "Semantics and reasoning",
"62": "Software and application security",
"63": "Symbolic and algebraic manipulation",
"64": "Systems security",
"65": "Theory and algorithms for application domains",
"66": "Ubiquitous and mobile computing",
"67": "Very large scale integration design",
"68": "Visualization",
"69": "World Wide Web"
},
"init_std": 0.02,
"is_encoder_decoder": true,
"label2id": {
"Accessibility": 0,
"Architectures": 1,
"Artificial intelligence": 2,
"Arts and humanities": 3,
"Collaborative and social computing": 4,
"Communication hardware, interfaces and storage": 5,
"Computational complexity and cryptography": 6,
"Computers in other domains": 7,
"Continuous mathematics": 8,
"Cross-computing tools and techniques": 9,
"Cryptography": 10,
"Data management systems": 11,
"Database and storage security": 12,
"Dependable and fault-tolerant systems and networks": 13,
"Design and analysis of algorithms": 14,
"Discrete mathematics": 15,
"Distributed computing methodologies": 16,
"Document management and text processing": 17,
"Document types": 18,
"Education": 19,
"Electronic commerce": 20,
"Electronic design automation": 21,
"Embedded and cyber-physical systems": 22,
"Emerging technologies": 23,
"Enterprise computing": 24,
"Formal languages and automata theory": 25,
"Formal methods and theory of security": 26,
"Hardware test": 27,
"Hardware validation": 28,
"Human and societal aspects of security and privacy": 29,
"Human computer interaction (HCI)": 30,
"Information retrieval": 31,
"Information storage systems": 32,
"Information systems applications": 33,
"Information theory": 34,
"Integrated circuits": 35,
"Interaction design": 36,
"Intrusion/anomaly detection and malware mitigation": 37,
"Logic": 38,
"Machine learning": 39,
"Mathematical analysis": 40,
"Mathematical software": 41,
"Modeling and simulation": 42,
"Models of computation": 43,
"Network algorithms": 44,
"Network architectures": 45,
"Network components": 46,
"Network performance evaluation": 47,
"Network properties": 48,
"Network protocols": 49,
"Network security": 50,
"Network services": 51,
"Network types": 52,
"Parallel computing methodologies": 53,
"Power and energy": 54,
"Probability and statistics": 55,
"Randomness, geometry and discrete structures": 56,
"Real-time systems": 57,
"Robustness": 58,
"Security in hardware": 59,
"Security services": 60,
"Semantics and reasoning": 61,
"Software and application security": 62,
"Symbolic and algebraic manipulation": 63,
"Systems security": 64,
"Theory and algorithms for application domains": 65,
"Ubiquitous and mobile computing": 66,
"Very large scale integration design": 67,
"Visualization": 68,
"World Wide Web": 69
},
"max_position_embeddings": 1024,
"model_type": "bart",
"normalize_before": false,
"num_hidden_layers": 12,
"output_past": false,
"pad_token_id": 1,
"problem_type": "single_label_classification",
"scale_embedding": false,
"torch_dtype": "float32",
"transformers_version": "4.49.0",
"use_cache": true,
"vocab_size": 50265
}