model push

Files changed (10) hide show

.gitattributes +0 -1
.gitignore +45 -0
README.md +100 -3
chat_template.jinja +29 -0
config.json +42 -0
generation_config.json +6 -0
model_4bit_config.json +33 -0
special_tokens_map.json +29 -0
tokenizer.json +0 -0
tokenizer_config.json +48 -0

.gitattributes CHANGED Viewed

@@ -25,7 +25,6 @@
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text

 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,45 @@

+# IDE / editor
+.vs/
+.vscode/
+.idea/
+*.suo
+*.user
+# Jupyter
+.ipynb_checkpoints/
+*.ipynb
+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+.Python
+*.egg-info/
+dist/
+build/
+*.egg
+# Env
+.env
+.venv/
+venv/
+env/
+# OS
+.DS_Store
+Thumbs.db
+# Model artifacts (large files — use Git LFS or HF upload tools)
+*.bin
+*.pt
+*.pth
+*.ckpt
+*.h5
+*.onnx
+*.safetensors
+# Logs and outputs
+*.log
+Outputs/
+Saved/

README.md CHANGED Viewed

@@ -1,3 +1,100 @@
----
-license: mit
----

+---
+license: mit
+language:
+- en
+tags:
+- text-classification
+- deception-detection
+- few-shot-learning
+- retrieval-augmented-generation
+- in-context-learning
+- mistral
+- 4-bit
+- bitsandbytes
+- quantized
+pipeline_tag: text-generation
+base_model: Intel/neural-chat-7b-v3-3
+---
+# RADDICL 2.0 — Quantized LLM
+This is the 4-bit NF4 quantized LLM component of **RADDICL 2.0** (**R**etrieval **A**ugmented **D**eception **D**etection through **I**n-**C**ontext **L**earning), a domain-agnostic deception detection system.
+- **Base model**: [`Intel/neural-chat-7b-v3-3`](https://huggingface.co/Intel/neural-chat-7b-v3-3) (Mistral 7B architecture)
+- **Quantization**: 4-bit NF4 via [BitsAndBytes](https://github.com/TimDettmers/bitsandbytes), double quantization enabled
+- **Compute dtype**: `float16`
+- **Total parameters**: 3.75B (~3.74 GB estimated memory footprint)
+For the full RAG pipeline and demo, see [`cdenq/raddicl2-demo`](https://huggingface.co/spaces/cdenq/raddicl2-demo).
+---
+## Model Details
+| Property | Value |
+|---|---|
+| Architecture | `MistralForCausalLM` |
+| Base model | `Intel/neural-chat-7b-v3-3` |
+| Quantization method | BitsAndBytes `nf4`, double quant |
+| Compute dtype | `float16` |
+| Max position embeddings | 32768 |
+| Sliding window | 4096 |
+| Vocab size | 32000 |
+| Attention | SDPA |
+---
+## How to Load
+```python
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from huggingface_hub import snapshot_download
+# Download model files
+model_path = snapshot_download(repo_id="cdenq/raddicl2-demo-model")
+# Load tokenizer
+tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True)
+# Load quantized model (quantization config is embedded in config.json)
+model = AutoModelForCausalLM.from_pretrained(
+    model_path,
+    device_map="auto",
+    trust_remote_code=True,
+)
+```
+The `quantization_config` is already embedded in `config.json`, so no extra `BitsAndBytesConfig` is needed when loading.
+---
+## Intended Use
+This model is the generation component of the RADDICL 2.0 deception detection pipeline. Given a structured few-shot prompt (constructed by the RADDICL 2.0 RAG pipeline), it produces a classification label (`deceptive` / `non-deceptive`) and step-by-step reasoning.
+It is not intended to be used as a standalone general-purpose chat model.
+---
+## Citation
+RADDICL 2.0 extends the original RADDICL system. If you use this work, please cite:
+```bibtex
+@inproceedings{boumber2024raddicl,
+    title     = {LLMs for Explainable Few-shot Deception Detection},
+    author    = {Boumber, Dayne and Denq, Christopher and Verma, Rakesh},
+    booktitle = {Proceedings of the ACM Web Conference},
+    year      = {2024},
+    doi       = {10.1145/3643651.3659898}
+}
+```
+*(Citation for RADDICL 2.0 will be updated upon publication.)*
+---
+## Acknowledgments
+Developed by Christopher Denq and Dr. Rakesh Verma at the [ReDAS Lab](https://github.com/ReDASers/), University of Houston.
+Supported in part by the NSF REU CS grant and the University of Houston.

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,29 @@

+{%- if messages[0]['role'] == 'system' %}
+    {%- set system_message = messages[0]['content'] %}
+    {%- set loop_messages = messages[1:] %}
+{%- else %}
+    {%- set loop_messages = messages %}
+{%- endif %}
+{%- for message in loop_messages %}
+    {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}
+        {{- raise_exception('After the optional system message, conversation roles must alternate user/assistant/user/assistant/...') }}
+    {%- endif %}
+    {%- if loop.first and system_message is defined %}
+            {{- '### System:
+' + system_message + '
+' }}
+           {%- endif %}
+     {%- if message['role'] == 'user' %}
+  {{- '### User:
+' + message['content'] + '
+' }}
+       {%- elif message['role'] == 'assistant' %}
+        {{- '### Assistant:
+' + message['content'] + eos_token + '
+'}}
+    {%- else %}
+        {{- raise_exception('Only user and assistant roles are supported, with the exception of an initial optional system message!') }}
+    {%- endif %}
+{%- endfor %}{% if add_generation_prompt %}{{ '### Assistant:
+' }}{% endif %}

config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "architectures": [
+    "MistralForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "head_dim": null,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "model_type": "mistral",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 8,
+  "pad_token_id": 0,
+  "quantization_config": {
+    "_load_in_4bit": true,
+    "_load_in_8bit": false,
+    "bnb_4bit_compute_dtype": "float16",
+    "bnb_4bit_quant_storage": "uint8",
+    "bnb_4bit_quant_type": "nf4",
+    "bnb_4bit_use_double_quant": true,
+    "llm_int8_enable_fp32_cpu_offload": false,
+    "llm_int8_has_fp16_weight": false,
+    "llm_int8_skip_modules": null,
+    "llm_int8_threshold": 6.0,
+    "load_in_4bit": true,
+    "load_in_8bit": false,
+    "quant_method": "bitsandbytes"
+  },
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 10000.0,
+  "sliding_window": 4096,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.53.3",
+  "use_cache": true,
+  "vocab_size": 32000
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.53.3"
+}

model_4bit_config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "model_name": "Intel/neural-chat-7b-v3-3",
+  "quantization_config": {
+    "use_quantization": true,
+    "quantization_mode": "4bit"
+  },
+  "device_config": {
+    "device_type": "cpu",
+    "max_memory": {
+      "0": "15GB",
+      "cpu": "9GB"
+    },
+    "low_cpu_mem_usage": false
+  },
+  "model_params": {
+    "attn_implementation": "sdpa",
+    "pad_token_id": 0,
+    "trust_remote_code": true
+  },
+  "model_info": {
+    "total_params": 3752071168,
+    "trainable_params": 262410240,
+    "dtype": "torch.float16",
+    "estimated_memory_gb": 3.7387773990631104,
+    "quantization_mode": "4bit",
+    "dtype_variants": [
+      "torch.float16",
+      "torch.uint8"
+    ],
+    "gpu_memory_allocated_gb": 0.1322178840637207,
+    "gpu_memory_reserved_gb": 0.158203125
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "additional_special_tokens": [
+    "<unk>",
+    "<s>",
+    "</s>"
+  ],
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<unk>",
+    "<s>",
+    "</s>"
+  ],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": true
+}