Initial clean commit: all files at root with proper LFS tracking

Files changed (12) hide show

.gitattributes +5 -0
README.md +68 -0
adapter_config.json +3 -0
adapter_model.safetensors +3 -0
added_tokens.json +3 -0
chat_template.jinja +89 -0
merges.txt +3 -0
special_tokens_map.json +3 -0
tokenizer.json +3 -0
tokenizer_config.json +3 -0
training_args.bin +3 -0
vocab.json +3 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,5 @@

+*.safetensors filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text
+*.txt filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,68 @@

+---
+license: apache-2.0
+base_model: Qwen/Qwen3-0.6B
+tags:
+- legal
+- sft
+- lora
+- trl
+- eula
+datasets:
+- AxelDlv00/EULAI
+language:
+- fr
+pipeline_tag: text-generation
+---
+# EULAI - Fine-tuned Qwen3-0.6B for EULA Analysis
+This model is a fine-tuned version of **Qwen/Qwen3-0.6B** using the **LoRA (Low-Rank Adaptation)** method. It specializes in analyzing contracts (EULAs) and privacy policies to extract structured risk-based summaries.
+## Training Configuration
+Details extracted from the training configuration:
+- **Base Model**: `Qwen/Qwen3-0.6B`
+- **Method**: PEFT / LoRA
+- **LoRA Hyperparameters**:
+  - `r`: 16
+  - `alpha`: 32
+  - `dropout`: 0.05
+  - `target_modules`: q_proj, k_proj, v_proj, o_proj, gate_proj, up_proj, down_proj
+- **Training**:
+  - `epochs`: 3
+  - `learning_rate`: 1e-4
+  - `max_length`: 2048
+  - `packing`: True
+## Usage (Inference)
+To use this model, you need to load the adapter on top of the original base model.
+```python
+from peft import PeftModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+model_id = "Qwen/Qwen3-0.6B"
+adapter_id = "AxelDlv00/EULAI-Qwen3-0.6B-SFT"
+tokenizer = AutoTokenizer.from_pretrained(adapter_id)
+model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16, device_map="auto")
+model = PeftModel.from_pretrained(model, adapter_id)
+prompt = "We collect your GPS data continuously even when the application is closed."
+messages = [{"role": "user", "content": prompt}]
+inputs = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to("cuda")
+outputs = model.generate(**inputs, max_new_tokens=512, temperature=0.1)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+```
+## Output Format
+The model generates structured bullet points in this format:
+```
+- [BLOCKER/BAD/GOOD/NEUTRAL] : Title : Explanation
+```

adapter_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:572abc485612fde6b0515edd5e8a9925d34cc5ce22993710a2a5bc34a4ddf231
+size 1046

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3965bd58e5653f8e6dccd0a2be42b0d4c50d6534e11d8bad27899f21c8e49121
+size 40422168

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c0284b582e14987fbd3d5a2cb2bd139084371ed9acbae488829a1c900833c680
+size 707

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,89 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}

merges.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8831e4f1a044471340f7c0a83d7bd71306a5b867e95fd870f74d0c5308a904d5
+size 1671853

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76862e765266b85aa9459767e33cbaf13970f327a0e88d1c65846c2ddd3a1ecd
+size 613

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
+size 11422654

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:74daa41f95613a4fb144c7183ba826bbdc36bec1f39a36a57422598e62fe3a31
+size 5406

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ddd5f31b76b93f0f5dec227353a1dd1f51750bfa7c80909a4eff935ad05158e
+size 6353

vocab.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
+size 2776833