heuristixai commited on
Commit
8640c33
·
verified ·
1 Parent(s): 06c5040

Upload 7 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
adapter_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 16,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": null,
25
+ "peft_type": "LORA",
26
+ "peft_version": "0.18.1",
27
+ "qalora_group_size": 16,
28
+ "r": 8,
29
+ "rank_pattern": {},
30
+ "revision": null,
31
+ "target_modules": [
32
+ "q_proj",
33
+ "v_proj"
34
+ ],
35
+ "target_parameters": null,
36
+ "task_type": "CAUSAL_LM",
37
+ "trainable_token_indices": null,
38
+ "use_dora": false,
39
+ "use_qalora": false,
40
+ "use_rslora": false
41
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86fdf271def69f020af33f1172225c9d62855e617a11dd7607265c23282d347e
3
+ size 2175168
final_metrics.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Final Training Metrics — Heuristix Reflection LoRA (v1)
2
+
3
+ ## Base Model
4
+ - **Model:** Qwen2.5-0.5B-Instruct
5
+ - **Quantization:** 4-bit (bitsandbytes NF4)
6
+
7
+ ## Dataset
8
+ - **Reflection training samples:** 120 examples
9
+ - **Format:** Question → Initial Answer → Self-Critique → Revised Answer
10
+
11
+ ## Training Configuration
12
+ - **Epochs:** 3
13
+ - **LoRA Rank (r):** 8
14
+ - **LoRA Alpha:** 16
15
+ - **LoRA Dropout:** 0.05
16
+
17
+ ## Resource Usage
18
+ - **Peak VRAM usage:** ~2.8 GB
19
+ - **Total training time:** ~20.44 minutes
20
+
21
+ ## Final Training Result
22
+ - **Final training loss:** 2.278
23
+
24
+ ## Notes
25
+ This LoRA adapter was trained to induce **self-reflection behavior** in a compact
26
+ language model. The training demonstrates that reflection-formatted supervision
27
+ can be learned with **low VRAM usage and short training time**, making the
28
+ approach feasible on consumer-grade GPUs.
29
+
30
+ ---
31
+ **Status:** Day 5 complete ✔
32
+ **Project:** Heuristix Self-Reflective LoRA (Research Preview)
readme.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HeuristixAI Self-Reflect Qwen 0.5B (v1)
2
+ This repository contains LoRA adapters trained to induce
3
+ self-reflective reasoning behavior in a compact language model.
4
+ ## Base Model
5
+ Qwen/Qwen2.5-0.5B-Instruct
6
+ ## Method
7
+ Parameter-efficient fine-tuning (LoRA) on reflection-formatted data:
8
+ Prompt → Initial Answer → Self-Critique → Revised Answer
9
+ ## Capabilities
10
+ - Structured reasoning
11
+ - Self-critique behavior
12
+ - Reduced hallucination
13
+ - Improved logical consistency
14
+ ## Training Setup
15
+ - LoRA r=8, alpha=16, dropout=0.05
16
+ - 4-bit NF4 quantization
17
+ - Dataset size: 120 reflection examples
18
+ - Peak VRAM: ~2.8 GB
19
+ - Training time: ~20 minutes (GTX 1650)
20
+ ## Usage
21
+ See reflection_lora_v1_demo.py for example inference.
22
+ ## License
23
+ Adapters released for research use.
24
+ ---
25
+ Developed by HeuristixAI.
reflection_lora_v1_demo.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
3
+ from peft import PeftModel
4
+
5
+ BASE_MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
6
+ LORA_PATH = "heuristix_reflection_lora_v1"
7
+
8
+ bnb_config = BitsAndBytesConfig(
9
+ load_in_4bit=True,
10
+ bnb_4bit_compute_dtype=torch.float16,
11
+ bnb_4bit_use_double_quant=True,
12
+ bnb_4bit_quant_type="nf4",
13
+ )
14
+
15
+ print("Loading tokenizer...")
16
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
17
+
18
+ print("Loading base model...")
19
+ base_model = AutoModelForCausalLM.from_pretrained(
20
+ BASE_MODEL,
21
+ quantization_config=bnb_config,
22
+ device_map="auto",
23
+ trust_remote_code=True,
24
+ )
25
+
26
+ print("Loading reflection LoRA adapter...")
27
+ model = PeftModel.from_pretrained(base_model, LORA_PATH)
28
+
29
+ print("Model ready!\n")
30
+
31
+
32
+ def generate(prompt):
33
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
34
+
35
+ outputs = model.generate(
36
+ **inputs,
37
+ max_new_tokens=350, # ↑ allow full reflection
38
+ temperature=0.7, # smoother text
39
+ top_p=0.9, # better continuation
40
+ do_sample=True, # prevents early cutoff
41
+ repetition_penalty=1.1
42
+ )
43
+
44
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
45
+
46
+ # ---- Interactive reflection demo ----
47
+ while True:
48
+ user_q = input("\nEnter a question (or type 'exit'): ")
49
+
50
+ if user_q.lower() == "exit":
51
+ break
52
+
53
+ reflection_prompt = f"""
54
+ Answer the question, then critique your answer, then give a revised answer.
55
+
56
+ Question: {user_q}
57
+
58
+ Format:
59
+ Initial Answer:
60
+ Self-Critique:
61
+ Revised Answer:
62
+ """
63
+
64
+ result = generate(reflection_prompt)
65
+
66
+ print("\n=== REFLECTION OUTPUT ===")
67
+ print(result)
68
+ print("=" * 40)
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d429fe753aea0ff87a94e86396d5508abb0d1d0e1f7a0d47c787ff72e0bf2691
3
+ size 11422170
tokenizer_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": [
9
+ "<|im_start|>",
10
+ "<|im_end|>",
11
+ "<|object_ref_start|>",
12
+ "<|object_ref_end|>",
13
+ "<|box_start|>",
14
+ "<|box_end|>",
15
+ "<|quad_start|>",
16
+ "<|quad_end|>",
17
+ "<|vision_start|>",
18
+ "<|vision_end|>",
19
+ "<|vision_pad|>",
20
+ "<|image_pad|>",
21
+ "<|video_pad|>"
22
+ ],
23
+ "is_local": false,
24
+ "model_max_length": 131072,
25
+ "pad_token": "<|endoftext|>",
26
+ "split_special_tokens": false,
27
+ "tokenizer_class": "Qwen2Tokenizer",
28
+ "unk_token": null
29
+ }