LbbbbbY commited on
Commit
0408017
·
verified ·
1 Parent(s): 540a57b

Upload 80 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. finlora_hf_submission/.DS_Store +0 -0
  3. finlora_hf_submission/README——finlora.md +651 -0
  4. finlora_hf_submission/SUBMISSION_SUMMARY.md +171 -0
  5. finlora_hf_submission/__pycache__/comprehensive_evaluation.cpython-313.pyc +0 -0
  6. finlora_hf_submission/__pycache__/incremental_evaluation.cpython-313.pyc +0 -0
  7. finlora_hf_submission/__pycache__/inference.cpython-313.pyc +0 -0
  8. finlora_hf_submission/__pycache__/missing_tests.cpython-313.pyc +0 -0
  9. finlora_hf_submission/__pycache__/robust_incremental.cpython-313.pyc +0 -0
  10. finlora_hf_submission/inference.py +294 -0
  11. finlora_hf_submission/models/.DS_Store +0 -0
  12. finlora_hf_submission/models/financebench_llama_3_1_8b_8bits_r8/README.md +136 -0
  13. finlora_hf_submission/models/financebench_llama_3_1_8b_8bits_r8/adapter_config.json +35 -0
  14. finlora_hf_submission/models/financebench_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
  15. finlora_hf_submission/models/finer_llama_3_1_8b_8bits_r8/README.md +135 -0
  16. finlora_hf_submission/models/finer_llama_3_1_8b_8bits_r8/adapter_config.json +35 -0
  17. finlora_hf_submission/models/finer_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
  18. finlora_hf_submission/models/formula_llama_3_1_8b_8bits_r8/README.md +124 -0
  19. finlora_hf_submission/models/formula_llama_3_1_8b_8bits_r8/adapter_config.json +35 -0
  20. finlora_hf_submission/models/formula_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
  21. finlora_hf_submission/models/headline_llama_3_1_8b_8bits_r8/README.md +198 -0
  22. finlora_hf_submission/models/headline_llama_3_1_8b_8bits_r8/adapter_config.json +30 -0
  23. finlora_hf_submission/models/headline_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
  24. finlora_hf_submission/models/ner_llama_3_1_8b_8bits_r8/README.md +198 -0
  25. finlora_hf_submission/models/ner_llama_3_1_8b_8bits_r8/adapter_config.json +30 -0
  26. finlora_hf_submission/models/ner_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
  27. finlora_hf_submission/models/sentiment_llama_3_1_8b_8bits_r8/README.md +198 -0
  28. finlora_hf_submission/models/sentiment_llama_3_1_8b_8bits_r8/adapter_config.json +35 -0
  29. finlora_hf_submission/models/sentiment_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
  30. finlora_hf_submission/models/xbrl_extract_llama_3_1_8b_8bits_r8/README.md +124 -0
  31. finlora_hf_submission/models/xbrl_extract_llama_3_1_8b_8bits_r8/adapter_config.json +35 -0
  32. finlora_hf_submission/models/xbrl_extract_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
  33. finlora_hf_submission/models/xbrl_term_llama_3_1_8b_8bits_r8/README.md +123 -0
  34. finlora_hf_submission/models/xbrl_term_llama_3_1_8b_8bits_r8/adapter_config.json +35 -0
  35. finlora_hf_submission/models/xbrl_term_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
  36. finlora_hf_submission/models/xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits-r8/README.md +198 -0
  37. finlora_hf_submission/models/xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits-r8/adapter_config.json +30 -0
  38. finlora_hf_submission/models/xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits-r8/adapter_model.safetensors +3 -0
  39. finlora_hf_submission/models_4bit/.DS_Store +0 -0
  40. finlora_hf_submission/models_4bit/financebench_llama_3_1_8b_4bits_r4/README.md +136 -0
  41. finlora_hf_submission/models_4bit/financebench_llama_3_1_8b_4bits_r4/adapter_config.json +35 -0
  42. finlora_hf_submission/models_4bit/financebench_llama_3_1_8b_4bits_r4/adapter_model.safetensors +3 -0
  43. finlora_hf_submission/models_4bit/finer_llama_3_1_8b_4bits_r4/README.md +198 -0
  44. finlora_hf_submission/models_4bit/finer_llama_3_1_8b_4bits_r4/adapter_config.json +35 -0
  45. finlora_hf_submission/models_4bit/finer_llama_3_1_8b_4bits_r4/adapter_model.safetensors +3 -0
  46. finlora_hf_submission/models_4bit/formula_llama_3_1_8b_4bits_r4/README.md +124 -0
  47. finlora_hf_submission/models_4bit/formula_llama_3_1_8b_4bits_r4/adapter_config.json +35 -0
  48. finlora_hf_submission/models_4bit/formula_llama_3_1_8b_4bits_r4/adapter_model.safetensors +3 -0
  49. finlora_hf_submission/models_4bit/headline_llama_3_1_8b_4bits_r4/README.md +198 -0
  50. finlora_hf_submission/models_4bit/headline_llama_3_1_8b_4bits_r4/adapter_config.json +30 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ finlora_hf_submission/rag_system/cfa_complete_rag.faiss filter=lfs diff=lfs merge=lfs -text
finlora_hf_submission/.DS_Store ADDED
Binary file (6.15 kB). View file
 
finlora_hf_submission/README——finlora.md ADDED
@@ -0,0 +1,651 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FinLoRA: Financial Large Language Models with LoRA Adaptation
2
+
3
+ ## Overview
4
+
5
+ FinLoRA is a comprehensive framework for fine-tuning large language models on financial tasks using Low-Rank Adaptation (LoRA). This project provides trained LoRA adapters for various financial NLP tasks including sentiment analysis, named entity recognition, headline classification, XBRL processing, and CFA knowledge integration.
6
+
7
+ ## Model Architecture
8
+
9
+ - **Base Model**: Meta-Llama-3.1-8B-Instruct
10
+ - **Adaptation Method**: LoRA (Low-Rank Adaptation)
11
+ - **Quantization**: 8-bit and 4-bit quantization support
12
+ - **Tasks**: Financial sentiment analysis, NER, classification, XBRL processing, CFA knowledge integration
13
+
14
+ ## Available Models
15
+
16
+ ### Core Financial Models
17
+ - `sentiment_llama_3_1_8b_8bits_r8` - Financial sentiment analysis
18
+ - `ner_llama_3_1_8b_8bits_r8` - Named entity recognition
19
+ - `headline_llama_3_1_8b_8bits_r8` - Financial headline classification
20
+ - `xbrl_extract_llama_3_1_8b_8bits_r8` - XBRL tag extraction
21
+ - `xbrl_term_llama_3_1_8b_8bits_r8` - XBRL terminology processing
22
+
23
+ ### Advanced Models
24
+ - `financebench_llama_3_1_8b_8bits_r8` - Comprehensive financial benchmark
25
+ - `finer_llama_3_1_8b_8bits_r8` - Financial NER
26
+ - `formula_llama_3_1_8b_8bits_r8` - Financial formula processing
27
+
28
+ ### RAG Knowledge Base
29
+ - CFA RAG knowledge base (FAISS index + JSONL data)
30
+ - FinTagging RAG knowledge base (FAISS index + JSONL data)
31
+ - RAG system scripts and configuration files
32
+
33
+ ## Quick Start (5 minutes)
34
+
35
+ ### 1. Environment Setup
36
+ ```bash
37
+ # Clone the repository
38
+ git clone <repository-url>
39
+ cd FinLora——RAG
40
+
41
+ # Create and activate environment
42
+ conda env create -f FinLoRA/environment.yml
43
+ conda activate finenv
44
+ ```
45
+
46
+ ### 2. Test a Single Model
47
+ ```python
48
+ # Quick test script
49
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
50
+ from peft import PeftModel
51
+ import torch
52
+
53
+ # Check if CUDA is available
54
+ device = "cuda" if torch.cuda.is_available() else "cpu"
55
+ print(f"Using device: {device}")
56
+
57
+ # Load model (replace with your model path)
58
+ model_path = "FinLoRA/lora_adapters/8bits_r8/sentiment_llama_3_1_8b_8bits_r8"
59
+ base_model = "meta-llama/Llama-3.1-8B-Instruct"
60
+
61
+ # Load tokenizer
62
+ tokenizer = AutoTokenizer.from_pretrained(base_model)
63
+ if tokenizer.pad_token is None:
64
+ tokenizer.pad_token = tokenizer.eos_token
65
+
66
+ # Configure quantization based on device
67
+ if device == "cuda":
68
+ bnb_config = BitsAndBytesConfig(load_in_8bit=True)
69
+ base_model = AutoModelForCausalLM.from_pretrained(
70
+ base_model, quantization_config=bnb_config, device_map="auto"
71
+ )
72
+ else:
73
+ # CPU mode - no quantization
74
+ base_model = AutoModelForCausalLM.from_pretrained(
75
+ base_model, device_map="cpu", torch_dtype=torch.float32
76
+ )
77
+
78
+ # Load LoRA adapter
79
+ model = PeftModel.from_pretrained(base_model, model_path)
80
+
81
+ # Test inference
82
+ def quick_test(text):
83
+ inputs = tokenizer(text, return_tensors="pt")
84
+ with torch.no_grad():
85
+ outputs = model.generate(**inputs, max_new_tokens=50, temperature=0.7)
86
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
87
+
88
+ # Test
89
+ result = quick_test("Classify sentiment: 'The stock market is performing well today.'")
90
+ print(result)
91
+ ```
92
+
93
+ ### 3. Run Full Evaluation
94
+ ```bash
95
+ cd testdata
96
+ python comprehensive_evaluation.py
97
+ ```
98
+
99
+ ## Environment Setup
100
+
101
+ ### Quest Cluster Environment (Original Development)
102
+
103
+ The original development was done on Northwestern University's Quest cluster with:
104
+ - **OS**: Linux 4.18.0-553.64.1.el8_10.x86_64
105
+ - **GPU**: NVIDIA H100 80GB HBM3
106
+ - **CUDA**: Version 12.8
107
+ - **Environment**: `finenv` conda environment
108
+
109
+ ### Option 1: Using Conda (Recommended)
110
+
111
+ ```bash
112
+ # Create environment from provided environment.yml
113
+ conda env create -f FinLoRA/environment.yml
114
+
115
+ # Activate environment
116
+ conda activate finenv
117
+
118
+ # Install additional requirements
119
+ pip install -r FinLoRA/requirements.txt
120
+ ```
121
+
122
+ ### Option 2: Manual Installation
123
+
124
+ #### For GPU Users:
125
+ ```bash
126
+ # Create new conda environment
127
+ conda create -n finlora python=3.11
128
+
129
+ # Activate environment
130
+ conda activate finlora
131
+
132
+ # Install PyTorch with CUDA support
133
+ conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
134
+
135
+ # Install core dependencies
136
+ pip install transformers==4.45.2
137
+ pip install datasets==2.19.1
138
+ pip install peft==0.13.2
139
+ pip install bitsandbytes==0.44.1
140
+ pip install accelerate==1.0.0
141
+ pip install deepspeed==0.15.2
142
+ pip install sentence-transformers
143
+ pip install faiss-cpu
144
+ pip install scikit-learn
145
+ pip install pandas numpy
146
+ ```
147
+
148
+ #### For CPU-Only Users:
149
+ ```bash
150
+ # Create new conda environment
151
+ conda create -n finlora python=3.11
152
+
153
+ # Activate environment
154
+ conda activate finlora
155
+
156
+ # Install PyTorch CPU version
157
+ conda install pytorch torchvision torchaudio cpuonly -c pytorch
158
+
159
+ # Install core dependencies (CPU-compatible versions)
160
+ pip install transformers==4.45.2
161
+ pip install datasets==2.19.1
162
+ pip install peft==0.13.2
163
+ pip install accelerate==1.0.0
164
+ pip install sentence-transformers
165
+ pip install faiss-cpu
166
+ pip install scikit-learn
167
+ pip install pandas numpy
168
+ ```
169
+
170
+ ### Option 3: Alternative Platforms
171
+
172
+ #### Google Colab
173
+ ```python
174
+ # Install dependencies
175
+ !pip install transformers==4.45.2
176
+ !pip install datasets==2.19.1
177
+ !pip install peft==0.13.2
178
+ !pip install bitsandbytes==0.44.1
179
+ !pip install accelerate==1.0.0
180
+ !pip install sentence-transformers
181
+ !pip install faiss-cpu
182
+ !pip install scikit-learn
183
+
184
+ # Check GPU availability
185
+ import torch
186
+ print(f"CUDA available: {torch.cuda.is_available()}")
187
+ if torch.cuda.is_available():
188
+ print(f"GPU: {torch.cuda.get_device_name(0)}")
189
+ print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
190
+ ```
191
+
192
+ #### AWS EC2 / Azure / Local GPU
193
+ ```bash
194
+ # Install NVIDIA drivers and CUDA toolkit
195
+ # Then follow Option 1 or 2 above
196
+ ```
197
+
198
+ #### CPU-Only Mode
199
+ ```python
200
+ # Complete CPU-only model loading example
201
+ from transformers import AutoTokenizer, AutoModelForCausalLM
202
+ from peft import PeftModel
203
+ import torch
204
+
205
+ # Force CPU usage
206
+ device = "cpu"
207
+ torch.set_default_device(device)
208
+
209
+ # Load tokenizer
210
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
211
+ if tokenizer.pad_token is None:
212
+ tokenizer.pad_token = tokenizer.eos_token
213
+
214
+ # Load base model for CPU (no quantization)
215
+ base_model = AutoModelForCausalLM.from_pretrained(
216
+ "meta-llama/Llama-3.1-8B-Instruct",
217
+ device_map="cpu",
218
+ torch_dtype=torch.float32,
219
+ low_cpu_mem_usage=True
220
+ )
221
+
222
+ # Load LoRA adapter
223
+ model = PeftModel.from_pretrained(base_model, "path/to/lora/adapter")
224
+
225
+ # Test inference
226
+ def cpu_predict(text):
227
+ inputs = tokenizer(text, return_tensors="pt")
228
+ with torch.no_grad():
229
+ outputs = model.generate(**inputs, max_new_tokens=50, temperature=0.7)
230
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
231
+
232
+ # Test
233
+ result = cpu_predict("Classify sentiment: 'The market is performing well.'")
234
+ print(result)
235
+ ```
236
+
237
+ ## Usage Instructions
238
+
239
+ ### 1. Basic Model Loading and Inference
240
+
241
+ ```python
242
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
243
+ from peft import PeftModel
244
+ import torch
245
+
246
+ # Check device availability
247
+ device = "cuda" if torch.cuda.is_available() else "cpu"
248
+ print(f"Using device: {device}")
249
+
250
+ # Load tokenizer
251
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
252
+ if tokenizer.pad_token is None:
253
+ tokenizer.pad_token = tokenizer.eos_token
254
+
255
+ # Configure model loading based on device
256
+ if device == "cuda":
257
+ # GPU mode with quantization
258
+ bnb_config = BitsAndBytesConfig(
259
+ load_in_8bit=True,
260
+ llm_int8_threshold=6.0
261
+ )
262
+ base_model = AutoModelForCausalLM.from_pretrained(
263
+ "meta-llama/Llama-3.1-8B-Instruct",
264
+ quantization_config=bnb_config,
265
+ device_map="auto",
266
+ torch_dtype=torch.float16,
267
+ trust_remote_code=True
268
+ )
269
+ else:
270
+ # CPU mode without quantization
271
+ base_model = AutoModelForCausalLM.from_pretrained(
272
+ "meta-llama/Llama-3.1-8B-Instruct",
273
+ device_map="cpu",
274
+ torch_dtype=torch.float32,
275
+ low_cpu_mem_usage=True
276
+ )
277
+
278
+ # Load LoRA adapter
279
+ model = PeftModel.from_pretrained(base_model, "path/to/lora/adapter")
280
+
281
+ # Example inference
282
+ def predict(text, max_length=256):
283
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
284
+ with torch.no_grad():
285
+ outputs = model.generate(
286
+ **inputs,
287
+ max_new_tokens=max_length,
288
+ temperature=0.7,
289
+ do_sample=True,
290
+ pad_token_id=tokenizer.eos_token_id
291
+ )
292
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
293
+
294
+ # Test the model
295
+ result = predict("Classify the sentiment of this financial text: 'The company's revenue increased by 15% this quarter.'")
296
+ print(result)
297
+ ```
298
+
299
+ ### 2. Comprehensive Evaluation
300
+
301
+ For testing all models on financial datasets:
302
+
303
+ ```bash
304
+ # Navigate to testdata directory
305
+ cd testdata
306
+
307
+ # Run comprehensive evaluation (works on any platform)
308
+ python comprehensive_evaluation.py
309
+
310
+ # For Quest cluster users only:
311
+ # sbatch submit_comprehensive_evaluation.sh
312
+ ```
313
+
314
+ **Note**: The evaluation script automatically detects your environment and adjusts accordingly:
315
+ - **GPU available**: Uses CUDA with quantization
316
+ - **CPU only**: Uses CPU mode without quantization
317
+ - **Memory constraints**: Automatically reduces batch size
318
+
319
+ ### 3. Individual Model Testing
320
+
321
+ ```python
322
+ # Test specific financial tasks
323
+ from testdata.comprehensive_evaluation import FinLoRAPredictor
324
+
325
+ # Initialize predictor
326
+ predictor = FinLoRAPredictor("path/to/model")
327
+
328
+ # Load model
329
+ predictor.load_model()
330
+
331
+ # Test sentiment analysis
332
+ result = predictor.predict("Analyze the sentiment of: 'Stock prices are declining rapidly.'", max_length=50)
333
+ print(result)
334
+ ```
335
+
336
+ ### 4. RAG System Usage
337
+
338
+ The project includes RAG knowledge bases for enhanced financial understanding:
339
+
340
+ ```python
341
+ # Load RAG system
342
+ from FinLoRA.rag.cfa_rag_system import CFARAGSystem
343
+
344
+ # Initialize RAG system
345
+ rag_system = CFARAGSystem()
346
+
347
+ # Query CFA knowledge base
348
+ query = "What are the key principles of portfolio management?"
349
+ results = rag_system.query(query, top_k=5)
350
+
351
+ # Use with LoRA models for enhanced responses
352
+ enhanced_response = rag_system.generate_enhanced_response(query, model)
353
+ ```
354
+
355
+ ## Data Input Formats for Testing
356
+
357
+ ### 1. Financial Sentiment Analysis
358
+ **Input Format:**
359
+ ```python
360
+ text = "The company's quarterly earnings exceeded expectations by 20%."
361
+ prompt = f"Classify the sentiment of this financial text as positive, negative, or neutral:\n\nText: {text}\n\nSentiment:"
362
+ ```
363
+
364
+ **Expected Output:**
365
+ - `"positive"` - for positive financial sentiment
366
+ - `"negative"` - for negative financial sentiment
367
+ - `"neutral"` - for neutral financial sentiment
368
+
369
+ **Test Examples:**
370
+ - "Stock prices are soaring to new heights." → `positive`
371
+ - "Revenue declined by 15% this quarter." → `negative`
372
+ - "The company maintained stable performance." → `neutral`
373
+
374
+ ### 2. Named Entity Recognition
375
+ **Input Format:**
376
+ ```python
377
+ text = "Apple Inc. reported revenue of $394.3 billion in 2022."
378
+ prompt = f"Extract financial entities from the following text:\n\nText: {text}\n\nEntities:"
379
+ ```
380
+
381
+ **Expected Output:**
382
+ - Company names, financial figures, dates, and financial terms
383
+ - Structured entity extraction with context
384
+
385
+ ### 3. XBRL Processing
386
+ **Input Format:**
387
+ ```python
388
+ text = "Total assets: $1,234,567,890. Current assets: $456,789,123."
389
+ prompt = f"Extract XBRL tags from the following financial statement:\n\nStatement: {text}\n\nXBRL Tags:"
390
+ ```
391
+
392
+ **Expected Output:**
393
+ - Structured XBRL tag extraction
394
+ - Financial statement element identification
395
+
396
+ ### 4. CFA Knowledge Integration
397
+ **Input Format:**
398
+ ```python
399
+ question = "Explain the concept of weighted average cost of capital (WACC)."
400
+ prompt = f"Answer this CFA-related question using your knowledge base:\n\nQuestion: {question}\n\nAnswer:"
401
+ ```
402
+
403
+ **Expected Output:**
404
+ - Comprehensive explanation with CFA knowledge
405
+ - Structured financial concepts and formulas
406
+
407
+ ### 5. Headline Classification
408
+ **Input Format:**
409
+ ```python
410
+ headline = "Federal Reserve announces interest rate cut"
411
+ prompt = f"Classify this financial headline:\n\nHeadline: {headline}\n\nClassification:"
412
+ ```
413
+
414
+ **Expected Output:**
415
+ - Financial news category classification
416
+ - Market impact assessment
417
+
418
+ ## Running Without Quest GPU
419
+
420
+ ### Option 1: Local GPU Setup
421
+ ```bash
422
+ # Check GPU availability
423
+ nvidia-smi
424
+
425
+ # Install CUDA toolkit (if not already installed)
426
+ conda install cudatoolkit=11.8
427
+
428
+ # Run evaluation with GPU
429
+ cd testdata
430
+ python comprehensive_evaluation.py
431
+ ```
432
+
433
+ ### Option 2: CPU-Only Mode
434
+ ```bash
435
+ # Run evaluation on CPU (slower but works without GPU)
436
+ cd testdata
437
+ python comprehensive_evaluation.py
438
+ ```
439
+
440
+ The evaluation script will automatically detect CPU mode and adjust settings accordingly.
441
+
442
+ ### Option 3: Cloud Platforms
443
+
444
+ #### Google Colab
445
+ ```python
446
+ # Upload the project files to Colab
447
+ # Then run:
448
+ !cd testdata && python comprehensive_evaluation.py
449
+ ```
450
+
451
+ #### AWS EC2 / Azure / Local GPU
452
+ ```bash
453
+ # Install NVIDIA drivers and CUDA toolkit first
454
+ # Then follow the environment setup above
455
+ cd testdata
456
+ python comprehensive_evaluation.py
457
+ ```
458
+
459
+ #### Hugging Face Spaces
460
+ ```python
461
+ # Deploy as a web application
462
+ # The model will run on Hugging Face's infrastructure
463
+ ```
464
+
465
+ ### Option 4: Docker with GPU Support
466
+ ```bash
467
+ # Build Docker image
468
+ docker build -t finlora .
469
+
470
+ # Run with GPU support
471
+ docker run --gpus all -it finlora python comprehensive_evaluation.py
472
+
473
+ # Run without GPU (CPU mode)
474
+ docker run -it finlora python comprehensive_evaluation.py
475
+ ```
476
+
477
+ ### Performance Expectations
478
+
479
+ | Environment | Expected Speed | Memory Usage | Notes |
480
+ |-------------|----------------|--------------|-------|
481
+ | Quest H100 | Fastest | ~16GB | Original development environment |
482
+ | Local GPU (RTX 4090) | Fast | ~12GB | High-end consumer GPU |
483
+ | Google Colab T4 | Medium | ~8GB | Free tier available |
484
+ | Google Colab V100 | Fast | ~16GB | Pro tier required |
485
+ | CPU Only | Slow | ~32GB | Requires significant RAM |
486
+ | AWS/Azure GPU | Fast | Variable | Depends on instance type |
487
+
488
+ ## Evaluation Results
489
+
490
+ The models have been evaluated on multiple financial datasets:
491
+
492
+ ### Performance Metrics
493
+ - **Financial Phrasebank**: F1=0.333, Accuracy=0.500
494
+ - **NER Classification**: F1=0.889, Accuracy=0.800
495
+ - **Headline Classification**: F1=0.697, Accuracy=0.700
496
+ - **XBRL Tag Extraction**: Accuracy=0.200
497
+ - **FIQA Sentiment Analysis**: F1=0.727, Accuracy=0.700
498
+
499
+ ### Dataset Coverage
500
+ - BloombergGPT tasks: Financial Phrasebank, FIQA SA, Headline, NER, ConvFinQA
501
+ - XBRL tasks: Tag extraction, Value extraction, Formula construction, Formula calculation
502
+ - CFA integration: Level 1 and Level 2 knowledge base
503
+
504
+ ## File Structure
505
+
506
+ ```
507
+ FinLoRA/
508
+ ├── lora_adapters/ # Trained LoRA adapters
509
+ │ ├── 8bits_r8/ # 8-bit quantized models
510
+ │ ├── 4bits_r4/ # 4-bit quantized models
511
+ │ └── fp16_r8/ # Full precision models
512
+ ├── testdata/ # Evaluation scripts and data
513
+ │ ├── comprehensive_evaluation.py
514
+ │ ├── incremental_evaluation.py
515
+ │ └── submit_*.sh # SLURM submission scripts
516
+ ├── rag/ # RAG system components
517
+ ├── data/ # Training and test data
518
+ ├── environment.yml # Conda environment specification
519
+ └── requirements.txt # Python dependencies
520
+ ```
521
+
522
+ ## Environment Verification
523
+
524
+ Before running the models, verify your environment setup:
525
+
526
+ ```python
527
+ # Environment verification script
528
+ import torch
529
+ import transformers
530
+ import peft
531
+ import datasets
532
+ import sys
533
+
534
+ print("=== Environment Verification ===")
535
+ print(f"Python version: {sys.version}")
536
+ print(f"PyTorch version: {torch.__version__}")
537
+ print(f"CUDA available: {torch.cuda.is_available()}")
538
+ print(f"CUDA version: {torch.version.cuda}")
539
+ print(f"Transformers version: {transformers.__version__}")
540
+ print(f"PEFT version: {peft.__version__}")
541
+ print(f"Datasets version: {datasets.__version__}")
542
+
543
+ if torch.cuda.is_available():
544
+ print(f"GPU count: {torch.cuda.device_count()}")
545
+ for i in range(torch.cuda.device_count()):
546
+ print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
547
+ print(f"GPU {i} memory: {torch.cuda.get_device_properties(i).total_memory / 1e9:.1f} GB")
548
+ else:
549
+ print("Running in CPU mode")
550
+
551
+ print("=== Model Path Verification ===")
552
+ import os
553
+ model_paths = [
554
+ "FinLoRA/lora_adapters/8bits_r8/sentiment_llama_3_1_8b_8bits_r8",
555
+ "FinLoRA/lora_adapters/8bits_r8/ner_llama_3_1_8b_8bits_r8",
556
+ "FinLoRA/lora_adapters/8bits_r8/headline_llama_3_1_8b_8bits_r8"
557
+ ]
558
+
559
+ for path in model_paths:
560
+ exists = os.path.exists(path)
561
+ print(f"{path}: {'✓' if exists else '✗'}")
562
+ ```
563
+
564
+ ## Troubleshooting
565
+
566
+ ### Common Issues
567
+
568
+ 1. **CUDA Out of Memory**
569
+ ```python
570
+ # Reduce batch size or use gradient checkpointing
571
+ model.gradient_checkpointing_enable()
572
+
573
+ # Or use CPU mode
574
+ device = "cpu"
575
+ ```
576
+
577
+ 2. **Model Loading Errors**
578
+ ```python
579
+ # Check model path and permissions
580
+ import os
581
+ print(os.path.exists("path/to/model"))
582
+
583
+ # Check if base model can be loaded
584
+ from transformers import AutoTokenizer
585
+ tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
586
+ ```
587
+
588
+ 3. **Dependency Conflicts**
589
+ ```bash
590
+ # Create fresh environment
591
+ conda create -n finlora_new python=3.11
592
+ conda activate finlora_new
593
+ pip install -r requirements.txt
594
+ ```
595
+
596
+ 4. **CPU Mode Issues**
597
+ ```python
598
+ # Ensure CPU mode is properly configured
599
+ import torch
600
+ torch.set_default_device("cpu")
601
+
602
+ # Use low memory mode
603
+ base_model = AutoModelForCausalLM.from_pretrained(
604
+ "meta-llama/Llama-3.1-8B-Instruct",
605
+ device_map="cpu",
606
+ torch_dtype=torch.float32,
607
+ low_cpu_mem_usage=True
608
+ )
609
+ ```
610
+
611
+ ### Performance Optimization
612
+
613
+ 1. **Memory Optimization**
614
+ - Use 8-bit or 4-bit quantization
615
+ - Enable gradient checkpointing
616
+ - Use DeepSpeed for large models
617
+
618
+ 2. **Speed Optimization**
619
+ - Use GPU acceleration
620
+ - Batch processing
621
+ - Model caching
622
+
623
+ ## Citation
624
+
625
+ If you use this work, please cite:
626
+
627
+ ```bibtex
628
+ @article{finlora2024,
629
+ title={FinLoRA: Financial Large Language Models with LoRA Adaptation},
630
+ author={Your Name},
631
+ journal={Financial AI Conference},
632
+ year={2024}
633
+ }
634
+ ```
635
+
636
+ ## License
637
+
638
+ This project is licensed under the MIT License - see the LICENSE file for details.
639
+
640
+ ## Contact
641
+
642
+ For questions and support, please contact:
643
+ - Email: your.email@domain.com
644
+ - GitHub Issues: [Project Repository](https://github.com/your-repo/finlora)
645
+
646
+ ## Acknowledgments
647
+
648
+ - Meta AI for the Llama-3.1-8B-Instruct base model
649
+ - Hugging Face for the transformers library
650
+ - Microsoft for the LoRA adaptation technique
651
+ - Quest cluster at Northwestern University for computational resources
finlora_hf_submission/SUBMISSION_SUMMARY.md ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # FinLoRA Hugging Face Submission Summary
2
+
3
+ ## Submission Requirements Met
4
+
5
+ ✅ **Model Files**: All trained LoRA model files (excluding checkpoints) are included
6
+ ✅ **Inference Scripts**: Comprehensive scripts to load and run the models
7
+ ✅ **External Tools Integration**: RAG system and evaluation tools included
8
+
9
+ ## Submission Structure
10
+
11
+ ```
12
+ finlora_hf_submission/
13
+ ├── models/ # 9 Complete 8-bit LoRA Models (82MB)
14
+ │ ├── sentiment_llama_3_1_8b_8bits_r8/
15
+ │ ├── ner_llama_3_1_8b_8bits_r8/
16
+ │ ├── headline_llama_3_1_8b_8bits_r8/
17
+ │ ├── xbrl_extract_llama_3_1_8b_8bits_r8/
18
+ │ ├── xbrl_term_llama_3_1_8b_8bits_r8/
19
+ │ ├── financebench_llama_3_1_8b_8bits_r8/
20
+ │ ├── finer_llama_3_1_8b_8bits_r8/
21
+ │ ├── formula_llama_3_1_8b_8bits_r8/
22
+ │ └── xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits_r8/
23
+ ├── models_4bit/ # 8 Complete 4-bit LoRA Models (37MB)
24
+ │ ├── sentiment_llama_3_1_8b_4bits_r4/
25
+ │ ├── ner_llama_3_1_8b_4bits_r4/
26
+ │ ├── headline_llama_3_1_8b_4bits_r4/
27
+ │ ├── xbrl_extract_llama_3_1_8b_4bits_r4/
28
+ │ ├── xbrl_term_llama_3_1_8b_4bits_r4/
29
+ │ ├── financebench_llama_3_1_8b_4bits_r4/
30
+ │ ├── finer_llama_3_1_8b_4bits_r4/
31
+ │ └── formula_llama_3_1_8b_4bits_r4/
32
+ ├── testdata/ # Evaluation Datasets (3.5MB)
33
+ │ ├── FinCL-eval-subset.csv
34
+ │ └── FinNI-eval-subset.csv
35
+ ├── rag_system/ # RAG System Components (8.3MB)
36
+ │ ├── cfa_rag_system.py
37
+ │ ├── multi_task_rag_system.py
38
+ │ └── rag_config.json
39
+ ├── inference.py # Main Inference Script
40
+ ├── comprehensive_evaluation.py # Full Evaluation Script
41
+ ├── incremental_evaluation.py # Incremental Evaluation
42
+ ├── robust_incremental.py # Robust Evaluation
43
+ ├── missing_tests.py # Missing Test Detection
44
+ ├── test_submission.py # Submission Test Script
45
+ ├── upload_to_hf.py # Hugging Face Upload Script
46
+ ├── requirements.txt # Python Dependencies
47
+ └── README.md # Comprehensive Documentation
48
+ ```
49
+
50
+ ## Available Models
51
+
52
+ ### 8-bit Quantized Models (Recommended)
53
+ 1. **sentiment_llama_3_1_8b_8bits_r8** - Financial sentiment analysis
54
+ 2. **ner_llama_3_1_8b_8bits_r8** - Named entity recognition
55
+ 3. **headline_llama_3_1_8b_8bits_r8** - Financial headline classification
56
+ 4. **xbrl_extract_llama_3_1_8b_8bits_r8** - XBRL tag extraction
57
+ 5. **xbrl_term_llama_3_1_8b_8bits_r8** - XBRL terminology processing
58
+ 6. **financebench_llama_3_1_8b_8bits_r8** - Comprehensive financial benchmark
59
+ 7. **finer_llama_3_1_8b_8bits_r8** - Financial NER
60
+ 8. **formula_llama_3_1_8b_8bits_r8** - Financial formula processing
61
+ 9. **xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits_r8** - XBRL training model
62
+
63
+ ### 4-bit Quantized Models (Memory Efficient)
64
+ 1. **sentiment_llama_3_1_8b_4bits_r4** - Financial sentiment analysis
65
+ 2. **ner_llama_3_1_8b_4bits_r4** - Named entity recognition
66
+ 3. **headline_llama_3_1_8b_4bits_r4** - Financial headline classification
67
+ 4. **xbrl_extract_llama_3_1_8b_4bits_r4** - XBRL tag extraction
68
+ 5. **xbrl_term_llama_3_1_8b_4bits_r4** - XBRL terminology processing
69
+ 6. **financebench_llama_3_1_8b_4bits_r4** - Comprehensive financial benchmark
70
+ 7. **finer_llama_3_1_8b_4bits_r4** - Financial NER
71
+ 8. **formula_llama_3_1_8b_4bits_r4** - Financial formula processing
72
+
73
+ ## Key Features
74
+
75
+ ### 1. Easy Model Loading
76
+ ```python
77
+ from inference import FinLoRAPredictor
78
+
79
+ # Load 8-bit model
80
+ predictor = FinLoRAPredictor("sentiment_llama_3_1_8b_8bits_r8", use_4bit=False)
81
+
82
+ # Load 4-bit model for memory efficiency
83
+ predictor = FinLoRAPredictor("sentiment_llama_3_1_8b_4bits_r4", use_4bit=True)
84
+ ```
85
+
86
+ ### 2. Multiple Task Support
87
+ - Financial sentiment analysis
88
+ - Named entity recognition
89
+ - Headline classification
90
+ - XBRL tag extraction
91
+ - Financial formula processing
92
+
93
+ ### 3. Comprehensive Evaluation
94
+ - Full evaluation on financial datasets
95
+ - Incremental evaluation capabilities
96
+ - Robust evaluation testing
97
+ - Missing test detection
98
+
99
+ ### 4. Memory Efficiency
100
+ - 8-bit models for optimal performance
101
+ - 4-bit models for limited memory environments
102
+ - Automatic device detection (GPU/CPU)
103
+
104
+ ## Performance Results
105
+
106
+ | Task | Dataset | F1 Score | Accuracy |
107
+ |------|---------|----------|----------|
108
+ | Sentiment Analysis | Financial Phrasebank | 0.333 | 0.500 |
109
+ | NER | Financial NER | 0.889 | 0.800 |
110
+ | Classification | Headline Classification | 0.697 | 0.700 |
111
+ | XBRL Processing | XBRL Tag Extraction | - | 0.200 |
112
+ | Sentiment Analysis | FIQA SA | 0.727 | 0.700 |
113
+
114
+ ## Usage Instructions
115
+
116
+ ### Quick Start
117
+ ```bash
118
+ # 1. Install dependencies
119
+ pip install -r requirements.txt
120
+
121
+ # 2. Test the submission
122
+ python test_submission.py
123
+
124
+ # 3. Run inference
125
+ python inference.py
126
+
127
+ # 4. Run evaluation
128
+ python comprehensive_evaluation.py
129
+ ```
130
+
131
+ ### Upload to Hugging Face
132
+ ```bash
133
+ # Set your Hugging Face token
134
+ export HUGGINGFACE_TOKEN="your_token_here"
135
+
136
+ # Upload the model
137
+ python upload_to_hf.py
138
+ ```
139
+
140
+ ## Submission Checklist
141
+
142
+ - [x] All model files included (excluding checkpoints)
143
+ - [x] Inference scripts provided
144
+ - [x] External tools integration (RAG system)
145
+ - [x] Comprehensive documentation
146
+ - [x] Easy installation and setup
147
+ - [x] Multiple usage examples
148
+ - [x] Evaluation scripts
149
+ - [x] Test scripts for verification
150
+ - [x] Hugging Face upload automation
151
+ - [x] Both 8-bit and 4-bit model variants
152
+ - [x] Complete evaluation datasets
153
+
154
+ ## Ready for Submission
155
+
156
+ The FinLoRA submission is complete and ready for Hugging Face upload. All requirements have been met:
157
+
158
+ 1. **Model Files**: 17 complete LoRA models (9 x 8-bit + 8 x 4-bit) with all necessary files
159
+ 2. **Inference Scripts**: Comprehensive Python scripts for loading and running models
160
+ 3. **External Tools**: RAG system with evaluation tools and datasets
161
+ 4. **Documentation**: Complete README with usage examples
162
+ 5. **Testing**: Automated test scripts to verify functionality
163
+
164
+ The submission can be easily uploaded to Hugging Face using the provided `upload_to_hf.py` script.
165
+
166
+ ## Total Size: ~130MB
167
+ - Models (8-bit): 82MB
168
+ - Models (4-bit): 37MB
169
+ - Test data: 3.5MB
170
+ - RAG system: 8.3MB
171
+ - Scripts and docs: <1MB
finlora_hf_submission/__pycache__/comprehensive_evaluation.cpython-313.pyc ADDED
Binary file (23 kB). View file
 
finlora_hf_submission/__pycache__/incremental_evaluation.cpython-313.pyc ADDED
Binary file (4.99 kB). View file
 
finlora_hf_submission/__pycache__/inference.cpython-313.pyc ADDED
Binary file (12.1 kB). View file
 
finlora_hf_submission/__pycache__/missing_tests.cpython-313.pyc ADDED
Binary file (10.4 kB). View file
 
finlora_hf_submission/__pycache__/robust_incremental.cpython-313.pyc ADDED
Binary file (7.67 kB). View file
 
finlora_hf_submission/inference.py ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ FinLoRA: Financial Large Language Models with LoRA Adaptation
4
+ Main inference script for Hugging Face submission
5
+
6
+ This script provides easy loading and inference for all FinLoRA models.
7
+ """
8
+
9
+ import torch
10
+ import os
11
+ import json
12
+ import warnings
13
+ from typing import Dict, List, Optional, Any, Union
14
+ from pathlib import Path
15
+
16
+ # Suppress warnings for cleaner output
17
+ warnings.filterwarnings('ignore')
18
+
19
+ try:
20
+ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
21
+ from peft import PeftModel
22
+ except ImportError as e:
23
+ print(f"Missing required dependencies: {e}")
24
+ print("Please install: pip install transformers peft bitsandbytes")
25
+ exit(1)
26
+
27
+ class FinLoRAPredictor:
28
+ """Main FinLoRA predictor class"""
29
+
30
+ def __init__(self,
31
+ model_name: str = "sentiment_llama_3_1_8b_8bits_r8",
32
+ base_model: str = "meta-llama/Llama-3.1-8B-Instruct",
33
+ use_4bit: bool = False):
34
+ """
35
+ Initialize FinLoRA predictor
36
+
37
+ Args:
38
+ model_name: Name of the LoRA model to load
39
+ base_model: Base model name
40
+ use_4bit: Whether to use 4-bit quantized models
41
+ """
42
+ self.model_name = model_name
43
+ self.base_model = base_model
44
+ self.use_4bit = use_4bit
45
+
46
+ # Device configuration
47
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
48
+ print(f"Using device: {self.device}")
49
+
50
+ # Model components
51
+ self.model = None
52
+ self.tokenizer = None
53
+
54
+ # Load model
55
+ self._load_model()
56
+
57
+ def _load_model(self):
58
+ """Load the FinLoRA model"""
59
+ try:
60
+ print(f"Loading model: {self.model_name}")
61
+
62
+ # Load tokenizer
63
+ self.tokenizer = AutoTokenizer.from_pretrained(self.base_model)
64
+ if self.tokenizer.pad_token is None:
65
+ self.tokenizer.pad_token = self.tokenizer.eos_token
66
+
67
+ # Configure quantization based on device and preference
68
+ if self.device == "cuda":
69
+ if self.use_4bit:
70
+ bnb_config = BitsAndBytesConfig(
71
+ load_in_4bit=True,
72
+ bnb_4bit_use_double_quant=True,
73
+ bnb_4bit_quant_type="nf4",
74
+ bnb_4bit_compute_dtype=torch.bfloat16
75
+ )
76
+ else:
77
+ bnb_config = BitsAndBytesConfig(
78
+ load_in_8bit=True,
79
+ llm_int8_threshold=6.0
80
+ )
81
+
82
+ base_model = AutoModelForCausalLM.from_pretrained(
83
+ self.base_model,
84
+ quantization_config=bnb_config,
85
+ device_map="auto",
86
+ torch_dtype=torch.float16,
87
+ trust_remote_code=True
88
+ )
89
+ else:
90
+ # CPU mode
91
+ base_model = AutoModelForCausalLM.from_pretrained(
92
+ self.base_model,
93
+ device_map="cpu",
94
+ torch_dtype=torch.float32,
95
+ low_cpu_mem_usage=True
96
+ )
97
+
98
+ # Load LoRA adapter
99
+ model_dir = "models_4bit" if self.use_4bit else "models"
100
+ model_path = f"{model_dir}/{self.model_name}"
101
+
102
+ if not os.path.exists(model_path):
103
+ raise FileNotFoundError(f"Model path not found: {model_path}")
104
+
105
+ self.model = PeftModel.from_pretrained(base_model, model_path)
106
+ self.model.eval()
107
+
108
+ print(f"Model loaded successfully: {self.model_name}")
109
+
110
+ except Exception as e:
111
+ print(f"Error loading model: {e}")
112
+ raise
113
+
114
+ def predict(self,
115
+ text: str,
116
+ max_length: int = 256,
117
+ temperature: float = 0.7) -> str:
118
+ """
119
+ Generate prediction for given text
120
+
121
+ Args:
122
+ text: Input text
123
+ max_length: Maximum length of generated text
124
+ temperature: Sampling temperature
125
+ """
126
+ try:
127
+ # Tokenize input
128
+ inputs = self.tokenizer(
129
+ text,
130
+ return_tensors="pt",
131
+ truncation=True,
132
+ max_length=512
133
+ )
134
+
135
+ if self.device == "cuda":
136
+ inputs = {k: v.to(self.device) for k, v in inputs.items()}
137
+
138
+ # Generate response
139
+ with torch.no_grad():
140
+ outputs = self.model.generate(
141
+ **inputs,
142
+ max_new_tokens=max_length,
143
+ do_sample=True,
144
+ temperature=temperature,
145
+ top_p=0.9,
146
+ pad_token_id=self.tokenizer.eos_token_id,
147
+ eos_token_id=self.tokenizer.eos_token_id
148
+ )
149
+
150
+ # Decode response
151
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
152
+
153
+ # Remove input text from response
154
+ if text in response:
155
+ response = response.replace(text, "").strip()
156
+
157
+ return response
158
+
159
+ except Exception as e:
160
+ print(f"Prediction error: {e}")
161
+ return f"Error: {str(e)}"
162
+
163
+ def classify_sentiment(self, text: str) -> str:
164
+ """Classify financial sentiment"""
165
+ prompt = f"Classify the sentiment of this financial text as positive, negative, or neutral:\n\nText: {text}\n\nSentiment:"
166
+ response = self.predict(prompt, max_length=10)
167
+
168
+ # Extract sentiment
169
+ if 'positive' in response.lower():
170
+ return "positive"
171
+ elif 'negative' in response.lower():
172
+ return "negative"
173
+ else:
174
+ return "neutral"
175
+
176
+ def extract_entities(self, text: str) -> str:
177
+ """Extract financial entities"""
178
+ prompt = f"Extract financial entities from the following text:\n\nText: {text}\n\nEntities:"
179
+ return self.predict(prompt, max_length=100)
180
+
181
+ def classify_headline(self, headline: str) -> str:
182
+ """Classify financial headline"""
183
+ prompt = f"Classify this financial headline as positive or negative:\n\nHeadline: {headline}\n\nSentiment:"
184
+ response = self.predict(prompt, max_length=10)
185
+
186
+ if 'positive' in response.lower() or 'yes' in response.lower():
187
+ return "positive"
188
+ else:
189
+ return "negative"
190
+
191
+ def extract_xbrl_tags(self, text: str) -> str:
192
+ """Extract XBRL tags from financial text"""
193
+ prompt = f"Extract XBRL tags from the following financial statement:\n\nStatement: {text}\n\nXBRL Tags:"
194
+ return self.predict(prompt, max_length=100)
195
+
196
+ def process_financial_text(self, text: str) -> str:
197
+ """Process general financial text"""
198
+ prompt = f"Analyze this financial text and provide insights:\n\nText: {text}\n\nAnalysis:"
199
+ return self.predict(prompt, max_length=200)
200
+
201
+ def list_available_models(use_4bit: bool = False) -> List[str]:
202
+ """List all available models"""
203
+ model_dir = "models_4bit" if use_4bit else "models"
204
+ models_path = Path(model_dir)
205
+
206
+ if not models_path.exists():
207
+ return []
208
+
209
+ models = []
210
+ for model_dir in models_path.iterdir():
211
+ if model_dir.is_dir() and (model_dir / "adapter_config.json").exists():
212
+ models.append(model_dir.name)
213
+
214
+ return sorted(models)
215
+
216
+ def main():
217
+ """Main function for testing the model"""
218
+ print("=== FinLoRA Financial Language Model ===")
219
+ print("Loading model and testing inference...")
220
+
221
+ # List available models
222
+ available_models_8bit = list_available_models(use_4bit=False)
223
+ available_models_4bit = list_available_models(use_4bit=True)
224
+
225
+ print(f"Available 8-bit models: {', '.join(available_models_8bit)}")
226
+ print(f"Available 4-bit models: {', '.join(available_models_4bit)}")
227
+
228
+ if not available_models_8bit and not available_models_4bit:
229
+ print("No models found in 'models' or 'models_4bit' directories")
230
+ return
231
+
232
+ # Load the first available model
233
+ if available_models_8bit:
234
+ model_name = available_models_8bit[0]
235
+ use_4bit = False
236
+ else:
237
+ model_name = available_models_4bit[0]
238
+ use_4bit = True
239
+
240
+ print(f"Loading model: {model_name} ({'4-bit' if use_4bit else '8-bit'})")
241
+
242
+ try:
243
+ # Initialize predictor
244
+ predictor = FinLoRAPredictor(
245
+ model_name=model_name,
246
+ use_4bit=use_4bit
247
+ )
248
+
249
+ # Test cases
250
+ test_cases = [
251
+ {
252
+ "task": "Sentiment Analysis",
253
+ "text": "The company's quarterly earnings exceeded expectations by 20%.",
254
+ "method": predictor.classify_sentiment
255
+ },
256
+ {
257
+ "task": "Entity Extraction",
258
+ "text": "Apple Inc. reported revenue of $394.3 billion in 2022.",
259
+ "method": predictor.extract_entities
260
+ },
261
+ {
262
+ "task": "Headline Classification",
263
+ "text": "Federal Reserve announces interest rate cut",
264
+ "method": predictor.classify_headline
265
+ },
266
+ {
267
+ "task": "XBRL Tag Extraction",
268
+ "text": "Total assets: $1,234,567,890. Current assets: $456,789,123.",
269
+ "method": predictor.extract_xbrl_tags
270
+ }
271
+ ]
272
+
273
+ # Run tests
274
+ for i, test_case in enumerate(test_cases, 1):
275
+ print(f"\n--- Test {i}: {test_case['task']} ---")
276
+ print(f"Input: {test_case['text']}")
277
+
278
+ try:
279
+ result = test_case['method'](test_case['text'])
280
+ print(f"Output: {result}")
281
+ except Exception as e:
282
+ print(f"Error: {e}")
283
+
284
+ print("\nModel testing completed successfully!")
285
+
286
+ except Exception as e:
287
+ print(f"Error: {e}")
288
+ print("\nTroubleshooting:")
289
+ print("1. Ensure all model files are in the 'models' or 'models_4bit' directory")
290
+ print("2. Check that the base model can be downloaded")
291
+ print("3. Verify CUDA availability if using GPU")
292
+
293
+ if __name__ == "__main__":
294
+ main()
finlora_hf_submission/models/.DS_Store ADDED
Binary file (10.2 kB). View file
 
finlora_hf_submission/models/financebench_llama_3_1_8b_8bits_r8/README.md ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
3
+ should probably proofread and complete it, then remove this comment. -->
4
+
5
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
6
+ <details><summary>See axolotl config</summary>
7
+
8
+ axolotl version: `0.9.1`
9
+ ```yaml
10
+ base_model: meta-llama/Llama-3.1-8B-Instruct
11
+ model_type: LlamaForCausalLM
12
+ tokenizer_type: AutoTokenizer
13
+ gradient_accumulation_steps: 2
14
+ micro_batch_size: 1
15
+ num_epochs: 4
16
+ optimizer: adamw_bnb_8bit
17
+ lr_scheduler: cosine
18
+ learning_rate: 0.0001
19
+ load_in_8bit: true
20
+ load_in_4bit: false
21
+ adapter: lora
22
+ lora_model_dir: null
23
+ lora_r: 8
24
+ lora_alpha: 16
25
+ lora_dropout: 0.05
26
+ lora_target_modules:
27
+ - q_proj
28
+ - v_proj
29
+ - k_proj
30
+ datasets:
31
+ - path: /workspace/FinLoRA/data/train/financebench_train.jsonl
32
+ type:
33
+ system_prompt: ''
34
+ field_system: system
35
+ field_instruction: context
36
+ field_output: target
37
+ format: '[INST] {instruction} [/INST]'
38
+ no_input_format: '[INST] {instruction} [/INST]'
39
+ dataset_prepared_path: null
40
+ val_set_size: 0.02
41
+ output_dir: /workspace/FinLoRA/lora/axolotl-output/financebench_llama_3_1_8b_8bits_r8
42
+ peft_use_dora: false
43
+ sequence_len: 4096
44
+ sample_packing: false
45
+ pad_to_sequence_len: false
46
+ wandb_project: finlora_models
47
+ wandb_entity: null
48
+ wandb_watch: gradients
49
+ wandb_name: financebench_llama_3_1_8b_8bits_r8
50
+ wandb_log_model: 'false'
51
+ bf16: auto
52
+ tf32: false
53
+ gradient_checkpointing: true
54
+ resume_from_checkpoint: null
55
+ logging_steps: 500
56
+ flash_attention: false
57
+ deepspeed: deepspeed_configs/zero1.json
58
+ warmup_steps: 10
59
+ evals_per_epoch: 4
60
+ saves_per_epoch: 1
61
+ weight_decay: 0.0
62
+ special_tokens:
63
+ pad_token: <|end_of_text|>
64
+ chat_template: llama3
65
+
66
+ ```
67
+
68
+ </details><br>
69
+
70
+ # workspace/FinLoRA/lora/axolotl-output/financebench_llama_3_1_8b_8bits_r8
71
+
72
+ This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the /workspace/FinLoRA/data/train/financebench_train.jsonl dataset.
73
+ It achieves the following results on the evaluation set:
74
+ - Loss: 3.0593
75
+
76
+ ## Model description
77
+
78
+ More information needed
79
+
80
+ ## Intended uses & limitations
81
+
82
+ More information needed
83
+
84
+ ## Training and evaluation data
85
+
86
+ More information needed
87
+
88
+ ## Training procedure
89
+
90
+ ### Training hyperparameters
91
+
92
+ The following hyperparameters were used during training:
93
+ - learning_rate: 0.0001
94
+ - train_batch_size: 1
95
+ - eval_batch_size: 1
96
+ - seed: 42
97
+ - distributed_type: multi-GPU
98
+ - num_devices: 5
99
+ - gradient_accumulation_steps: 2
100
+ - total_train_batch_size: 10
101
+ - total_eval_batch_size: 5
102
+ - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
103
+ - lr_scheduler_type: cosine
104
+ - lr_scheduler_warmup_steps: 10
105
+ - num_epochs: 4.0
106
+
107
+ ### Training results
108
+
109
+ | Training Loss | Epoch | Step | Validation Loss |
110
+ |:-------------:|:------:|:----:|:---------------:|
111
+ | No log | 0.1176 | 1 | 4.6396 |
112
+ | No log | 0.2353 | 2 | 4.5918 |
113
+ | No log | 0.4706 | 4 | 4.5650 |
114
+ | No log | 0.7059 | 6 | 4.5194 |
115
+ | No log | 0.9412 | 8 | 4.4293 |
116
+ | No log | 1.1176 | 10 | 4.3325 |
117
+ | No log | 1.3529 | 12 | 3.9557 |
118
+ | No log | 1.5882 | 14 | 3.6519 |
119
+ | No log | 1.8235 | 16 | 3.6472 |
120
+ | No log | 2.0 | 18 | 3.4611 |
121
+ | No log | 2.2353 | 20 | 3.3681 |
122
+ | No log | 2.4706 | 22 | 3.2136 |
123
+ | No log | 2.7059 | 24 | 3.1790 |
124
+ | No log | 2.9412 | 26 | 3.1455 |
125
+ | No log | 3.1176 | 28 | 3.1480 |
126
+ | No log | 3.3529 | 30 | 3.0489 |
127
+ | No log | 3.5882 | 32 | 3.0593 |
128
+
129
+
130
+ ### Framework versions
131
+
132
+ - PEFT 0.15.2
133
+ - Transformers 4.51.3
134
+ - Pytorch 2.8.0.dev20250319+cu128
135
+ - Datasets 3.5.1
136
+ - Tokenizers 0.21.1
finlora_hf_submission/models/financebench_llama_3_1_8b_8bits_r8/adapter_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": null,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 8,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "v_proj",
28
+ "k_proj",
29
+ "q_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "trainable_token_indices": null,
33
+ "use_dora": false,
34
+ "use_rslora": false
35
+ }
finlora_hf_submission/models/financebench_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f247859d3e9b80c4dfd2f8d7b8d9ea574b5cd1925e2fc12ab3e7babc6e3a6bd7
3
+ size 9462656
finlora_hf_submission/models/finer_llama_3_1_8b_8bits_r8/README.md ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
3
+ should probably proofread and complete it, then remove this comment. -->
4
+
5
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
6
+ <details><summary>See axolotl config</summary>
7
+
8
+ axolotl version: `0.9.0`
9
+ ```yaml
10
+ base_model: NousResearch/Meta-Llama-3.1-8B-Instruct
11
+ model_type: LlamaForCausalLM
12
+ tokenizer_type: AutoTokenizer
13
+ gradient_accumulation_steps: 8
14
+ micro_batch_size: 1
15
+ num_epochs: 4
16
+ optimizer: adamw_bnb_8bit
17
+ lr_scheduler: cosine
18
+ learning_rate: 0.0001
19
+ load_in_8bit: true
20
+ load_in_4bit: false
21
+ adapter: lora
22
+ lora_model_dir: null
23
+ lora_r: 8
24
+ lora_alpha: 16
25
+ lora_dropout: 0.05
26
+ lora_target_modules:
27
+ - q_proj
28
+ - v_proj
29
+ - k_proj
30
+ datasets:
31
+ - path: /workspace/FinLoRA/data/train/finer_train_batched.jsonl
32
+ type:
33
+ system_prompt: ''
34
+ field_system: system
35
+ field_instruction: context
36
+ field_output: target
37
+ format: '[INST] {instruction} [/INST]'
38
+ no_input_format: '[INST] {instruction} [/INST]'
39
+ dataset_prepared_path: null
40
+ val_set_size: 0.02
41
+ output_dir: /workspace/FinLoRA/lora/axolotl-output/finer_llama_3_1_8b_8bits_r8
42
+ sequence_len: 4096
43
+ sample_packing: false
44
+ pad_to_sequence_len: false
45
+ wandb_project: finlora_models
46
+ wandb_entity: null
47
+ wandb_watch: gradients
48
+ wandb_name: finer_llama_3_1_8b_8bits_r8
49
+ wandb_log_model: 'false'
50
+ bf16: auto
51
+ tf32: false
52
+ gradient_checkpointing: true
53
+ resume_from_checkpoint: null
54
+ logging_steps: 500
55
+ flash_attention: false
56
+ deepspeed: deepspeed_configs/zero1.json
57
+ warmup_steps: 10
58
+ evals_per_epoch: 4
59
+ saves_per_epoch: 1
60
+ weight_decay: 0.0
61
+ special_tokens:
62
+ pad_token: <|end_of_text|>
63
+ chat_template: llama3
64
+
65
+ ```
66
+
67
+ </details><br>
68
+
69
+ # workspace/FinLoRA/fine-tune/axolotl-output/finer_llama_3_1_8B_8bits_r8
70
+
71
+ This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the /workspace/FinLoRA/data/train/finer_train_batched.jsonl dataset.
72
+ It achieves the following results on the evaluation set:
73
+ - Loss: 0.0331
74
+
75
+ ## Model description
76
+
77
+ More information needed
78
+
79
+ ## Intended uses & limitations
80
+
81
+ More information needed
82
+
83
+ ## Training and evaluation data
84
+
85
+ More information needed
86
+
87
+ ## Training procedure
88
+
89
+ ### Training hyperparameters
90
+
91
+ The following hyperparameters were used during training:
92
+ - learning_rate: 0.0001
93
+ - train_batch_size: 1
94
+ - eval_batch_size: 1
95
+ - seed: 42
96
+ - distributed_type: multi-GPU
97
+ - num_devices: 2
98
+ - gradient_accumulation_steps: 8
99
+ - total_train_batch_size: 16
100
+ - total_eval_batch_size: 2
101
+ - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
102
+ - lr_scheduler_type: cosine
103
+ - lr_scheduler_warmup_steps: 10
104
+ - num_epochs: 4.0
105
+
106
+ ### Training results
107
+
108
+ | Training Loss | Epoch | Step | Validation Loss |
109
+ |:-------------:|:------:|:----:|:---------------:|
110
+ | No log | 0.0016 | 1 | 0.5433 |
111
+ | No log | 0.2497 | 153 | 0.0520 |
112
+ | No log | 0.4995 | 306 | 0.0459 |
113
+ | No log | 0.7492 | 459 | 0.0406 |
114
+ | 0.0693 | 0.9990 | 612 | 0.0386 |
115
+ | 0.0693 | 1.2497 | 765 | 0.0396 |
116
+ | 0.0693 | 1.4995 | 918 | 0.0363 |
117
+ | 0.036 | 1.7492 | 1071 | 0.0351 |
118
+ | 0.036 | 1.9990 | 1224 | 0.0348 |
119
+ | 0.036 | 2.2497 | 1377 | 0.0360 |
120
+ | 0.0302 | 2.4995 | 1530 | 0.0321 |
121
+ | 0.0302 | 2.7492 | 1683 | 0.0347 |
122
+ | 0.0302 | 2.9990 | 1836 | 0.0324 |
123
+ | 0.0302 | 3.2497 | 1989 | 0.0328 |
124
+ | 0.0242 | 3.4995 | 2142 | 0.0334 |
125
+ | 0.0242 | 3.7492 | 2295 | 0.0332 |
126
+ | 0.0242 | 3.9990 | 2448 | 0.0331 |
127
+
128
+
129
+ ### Framework versions
130
+
131
+ - PEFT 0.15.2
132
+ - Transformers 4.51.3
133
+ - Pytorch 2.8.0.dev20250319+cu128
134
+ - Datasets 3.5.0
135
+ - Tokenizers 0.21.1
finlora_hf_submission/models/finer_llama_3_1_8b_8bits_r8/adapter_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "NousResearch/Meta-Llama-3.1-8B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": null,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 8,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "k_proj",
28
+ "q_proj",
29
+ "v_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "trainable_token_indices": null,
33
+ "use_dora": false,
34
+ "use_rslora": false
35
+ }
finlora_hf_submission/models/finer_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8a25d48802d10e77609254723de643d2683d2c72d1a73bdb4110ed78f3f9d0b
3
+ size 9462656
finlora_hf_submission/models/formula_llama_3_1_8b_8bits_r8/README.md ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
3
+ should probably proofread and complete it, then remove this comment. -->
4
+
5
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
6
+ <details><summary>See axolotl config</summary>
7
+
8
+ axolotl version: `0.9.1`
9
+ ```yaml
10
+ base_model: meta-llama/Llama-3.1-8B-Instruct
11
+ model_type: LlamaForCausalLM
12
+ tokenizer_type: AutoTokenizer
13
+ gradient_accumulation_steps: 2
14
+ micro_batch_size: 4
15
+ num_epochs: 1
16
+ optimizer: adamw_bnb_8bit
17
+ lr_scheduler: cosine
18
+ learning_rate: 0.0001
19
+ load_in_8bit: true
20
+ load_in_4bit: false
21
+ adapter: lora
22
+ lora_model_dir: null
23
+ lora_r: 8
24
+ lora_alpha: 16
25
+ lora_dropout: 0.05
26
+ lora_target_modules:
27
+ - q_proj
28
+ - v_proj
29
+ - k_proj
30
+ datasets:
31
+ - path: /workspace/FinLoRA/data/train/formula_train.jsonl
32
+ type:
33
+ system_prompt: ''
34
+ field_system: system
35
+ field_instruction: context
36
+ field_output: target
37
+ format: '[INST] {instruction} [/INST]'
38
+ no_input_format: '[INST] {instruction} [/INST]'
39
+ dataset_prepared_path: null
40
+ val_set_size: 0.02
41
+ output_dir: /workspace/FinLoRA/lora/axolotl-output/formula_llama_3_1_8b_8bits_r8
42
+ peft_use_dora: false
43
+ sequence_len: 4096
44
+ sample_packing: false
45
+ pad_to_sequence_len: false
46
+ wandb_project: finlora_models
47
+ wandb_entity: null
48
+ wandb_watch: gradients
49
+ wandb_name: formula_llama_3_1_8b_8bits_r8
50
+ wandb_log_model: 'false'
51
+ bf16: auto
52
+ tf32: false
53
+ gradient_checkpointing: true
54
+ resume_from_checkpoint: null
55
+ logging_steps: 500
56
+ flash_attention: false
57
+ deepspeed: deepspeed_configs/zero1.json
58
+ warmup_steps: 10
59
+ evals_per_epoch: 4
60
+ saves_per_epoch: 1
61
+ weight_decay: 0.0
62
+ special_tokens:
63
+ pad_token: <|end_of_text|>
64
+ chat_template: llama3
65
+
66
+ ```
67
+
68
+ </details><br>
69
+
70
+ # workspace/FinLoRA/lora/axolotl-output/formula_llama_3_1_8b_8bits_r8
71
+
72
+ This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the /workspace/FinLoRA/data/train/formula_train.jsonl dataset.
73
+ It achieves the following results on the evaluation set:
74
+ - Loss: 1.5104
75
+
76
+ ## Model description
77
+
78
+ More information needed
79
+
80
+ ## Intended uses & limitations
81
+
82
+ More information needed
83
+
84
+ ## Training and evaluation data
85
+
86
+ More information needed
87
+
88
+ ## Training procedure
89
+
90
+ ### Training hyperparameters
91
+
92
+ The following hyperparameters were used during training:
93
+ - learning_rate: 0.0001
94
+ - train_batch_size: 4
95
+ - eval_batch_size: 4
96
+ - seed: 42
97
+ - distributed_type: multi-GPU
98
+ - num_devices: 5
99
+ - gradient_accumulation_steps: 2
100
+ - total_train_batch_size: 40
101
+ - total_eval_batch_size: 20
102
+ - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
103
+ - lr_scheduler_type: cosine
104
+ - lr_scheduler_warmup_steps: 10
105
+ - num_epochs: 1.0
106
+
107
+ ### Training results
108
+
109
+ | Training Loss | Epoch | Step | Validation Loss |
110
+ |:-------------:|:-----:|:----:|:---------------:|
111
+ | No log | 0.05 | 1 | 4.5176 |
112
+ | No log | 0.25 | 5 | 4.2441 |
113
+ | No log | 0.5 | 10 | 2.5134 |
114
+ | No log | 0.75 | 15 | 1.6948 |
115
+ | No log | 1.0 | 20 | 1.5104 |
116
+
117
+
118
+ ### Framework versions
119
+
120
+ - PEFT 0.15.2
121
+ - Transformers 4.51.3
122
+ - Pytorch 2.8.0.dev20250319+cu128
123
+ - Datasets 3.5.1
124
+ - Tokenizers 0.21.1
finlora_hf_submission/models/formula_llama_3_1_8b_8bits_r8/adapter_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": null,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 8,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "v_proj",
28
+ "q_proj",
29
+ "k_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "trainable_token_indices": null,
33
+ "use_dora": false,
34
+ "use_rslora": false
35
+ }
finlora_hf_submission/models/formula_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21559ce8defb3cd2bb17b0f827749447c48d53170994e87b7548b243ef7c31a3
3
+ size 9462656
finlora_hf_submission/models/headline_llama_3_1_8b_8bits_r8/README.md ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Model Card for Model ID
3
+
4
+ <!-- Provide a quick summary of what the model is/does. -->
5
+
6
+
7
+
8
+ ## Model Details
9
+
10
+ ### Model Description
11
+
12
+ <!-- Provide a longer summary of what this model is. -->
13
+
14
+
15
+
16
+ - **Developed by:** [More Information Needed]
17
+ - **Funded by [optional]:** [More Information Needed]
18
+ - **Shared by [optional]:** [More Information Needed]
19
+ - **Model type:** [More Information Needed]
20
+ - **Language(s) (NLP):** [More Information Needed]
21
+ - **License:** [More Information Needed]
22
+ - **Finetuned from model [optional]:** [More Information Needed]
23
+
24
+ ### Model Sources [optional]
25
+
26
+ <!-- Provide the basic links for the model. -->
27
+
28
+ - **Repository:** [More Information Needed]
29
+ - **Paper [optional]:** [More Information Needed]
30
+ - **Demo [optional]:** [More Information Needed]
31
+
32
+ ## Uses
33
+
34
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
35
+
36
+ ### Direct Use
37
+
38
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
39
+
40
+ [More Information Needed]
41
+
42
+ ### Downstream Use [optional]
43
+
44
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
45
+
46
+ [More Information Needed]
47
+
48
+ ### Out-of-Scope Use
49
+
50
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
51
+
52
+ [More Information Needed]
53
+
54
+ ## Bias, Risks, and Limitations
55
+
56
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
57
+
58
+ [More Information Needed]
59
+
60
+ ### Recommendations
61
+
62
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
63
+
64
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
65
+
66
+ ## How to Get Started with the Model
67
+
68
+ Use the code below to get started with the model.
69
+
70
+ [More Information Needed]
71
+
72
+ ## Training Details
73
+
74
+ ### Training Data
75
+
76
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the fine-tuning data is all about as well as documentation related to data pre-processing or additional filtering. -->
77
+
78
+ [More Information Needed]
79
+
80
+ ### Training Procedure
81
+
82
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the fine-tuning procedure. -->
83
+
84
+ #### Preprocessing [optional]
85
+
86
+ [More Information Needed]
87
+
88
+
89
+ #### Training Hyperparameters
90
+
91
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
92
+
93
+ #### Speeds, Sizes, Times [optional]
94
+
95
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
96
+
97
+ [More Information Needed]
98
+
99
+ ## Evaluation
100
+
101
+ <!-- This section describes the evaluation protocols and provides the results. -->
102
+
103
+ ### Testing Data, Factors & Metrics
104
+
105
+ #### Testing Data
106
+
107
+ <!-- This should link to a Dataset Card if possible. -->
108
+
109
+ [More Information Needed]
110
+
111
+ #### Factors
112
+
113
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
114
+
115
+ [More Information Needed]
116
+
117
+ #### Metrics
118
+
119
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
120
+
121
+ [More Information Needed]
122
+
123
+ ### Results
124
+
125
+ [More Information Needed]
126
+
127
+ #### Summary
128
+
129
+
130
+
131
+ ## Model Examination [optional]
132
+
133
+ <!-- Relevant interpretability work for the model goes here -->
134
+
135
+ [More Information Needed]
136
+
137
+ ## Environmental Impact
138
+
139
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
140
+
141
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
142
+
143
+ - **Hardware Type:** [More Information Needed]
144
+ - **Hours used:** [More Information Needed]
145
+ - **Cloud Provider:** [More Information Needed]
146
+ - **Compute Region:** [More Information Needed]
147
+ - **Carbon Emitted:** [More Information Needed]
148
+
149
+ ## Technical Specifications [optional]
150
+
151
+ ### Model Architecture and Objective
152
+
153
+ [More Information Needed]
154
+
155
+ ### Compute Infrastructure
156
+
157
+ [More Information Needed]
158
+
159
+ #### Hardware
160
+
161
+ [More Information Needed]
162
+
163
+ #### Software
164
+
165
+ [More Information Needed]
166
+
167
+ ## Citation [optional]
168
+
169
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
170
+
171
+ **BibTeX:**
172
+
173
+ [More Information Needed]
174
+
175
+ **APA:**
176
+
177
+ [More Information Needed]
178
+
179
+ ## Glossary [optional]
180
+
181
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
182
+
183
+ [More Information Needed]
184
+
185
+ ## More Information [optional]
186
+
187
+ [More Information Needed]
188
+
189
+ ## Model Card Authors [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Contact
194
+
195
+ [More Information Needed]
196
+ ### Framework versions
197
+
198
+ - PEFT 0.13.2
finlora_hf_submission/models/headline_llama_3_1_8b_8bits_r8/adapter_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 32,
14
+ "lora_dropout": 0.1,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "q_proj",
24
+ "k_proj",
25
+ "v_proj"
26
+ ],
27
+ "task_type": "CAUSAL_LM",
28
+ "use_dora": false,
29
+ "use_rslora": false
30
+ }
finlora_hf_submission/models/headline_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bb065b036c7f919cf23b5bc1ff2039d2ee9cf30fe0136cf4a77bb3b56ad187c
3
+ size 9462656
finlora_hf_submission/models/ner_llama_3_1_8b_8bits_r8/README.md ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Model Card for Model ID
3
+
4
+ <!-- Provide a quick summary of what the model is/does. -->
5
+
6
+
7
+
8
+ ## Model Details
9
+
10
+ ### Model Description
11
+
12
+ <!-- Provide a longer summary of what this model is. -->
13
+
14
+
15
+
16
+ - **Developed by:** [More Information Needed]
17
+ - **Funded by [optional]:** [More Information Needed]
18
+ - **Shared by [optional]:** [More Information Needed]
19
+ - **Model type:** [More Information Needed]
20
+ - **Language(s) (NLP):** [More Information Needed]
21
+ - **License:** [More Information Needed]
22
+ - **Finetuned from model [optional]:** [More Information Needed]
23
+
24
+ ### Model Sources [optional]
25
+
26
+ <!-- Provide the basic links for the model. -->
27
+
28
+ - **Repository:** [More Information Needed]
29
+ - **Paper [optional]:** [More Information Needed]
30
+ - **Demo [optional]:** [More Information Needed]
31
+
32
+ ## Uses
33
+
34
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
35
+
36
+ ### Direct Use
37
+
38
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
39
+
40
+ [More Information Needed]
41
+
42
+ ### Downstream Use [optional]
43
+
44
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
45
+
46
+ [More Information Needed]
47
+
48
+ ### Out-of-Scope Use
49
+
50
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
51
+
52
+ [More Information Needed]
53
+
54
+ ## Bias, Risks, and Limitations
55
+
56
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
57
+
58
+ [More Information Needed]
59
+
60
+ ### Recommendations
61
+
62
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
63
+
64
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
65
+
66
+ ## How to Get Started with the Model
67
+
68
+ Use the code below to get started with the model.
69
+
70
+ [More Information Needed]
71
+
72
+ ## Training Details
73
+
74
+ ### Training Data
75
+
76
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the fine-tuning data is all about as well as documentation related to data pre-processing or additional filtering. -->
77
+
78
+ [More Information Needed]
79
+
80
+ ### Training Procedure
81
+
82
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the fine-tuning procedure. -->
83
+
84
+ #### Preprocessing [optional]
85
+
86
+ [More Information Needed]
87
+
88
+
89
+ #### Training Hyperparameters
90
+
91
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
92
+
93
+ #### Speeds, Sizes, Times [optional]
94
+
95
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
96
+
97
+ [More Information Needed]
98
+
99
+ ## Evaluation
100
+
101
+ <!-- This section describes the evaluation protocols and provides the results. -->
102
+
103
+ ### Testing Data, Factors & Metrics
104
+
105
+ #### Testing Data
106
+
107
+ <!-- This should link to a Dataset Card if possible. -->
108
+
109
+ [More Information Needed]
110
+
111
+ #### Factors
112
+
113
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
114
+
115
+ [More Information Needed]
116
+
117
+ #### Metrics
118
+
119
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
120
+
121
+ [More Information Needed]
122
+
123
+ ### Results
124
+
125
+ [More Information Needed]
126
+
127
+ #### Summary
128
+
129
+
130
+
131
+ ## Model Examination [optional]
132
+
133
+ <!-- Relevant interpretability work for the model goes here -->
134
+
135
+ [More Information Needed]
136
+
137
+ ## Environmental Impact
138
+
139
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
140
+
141
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
142
+
143
+ - **Hardware Type:** [More Information Needed]
144
+ - **Hours used:** [More Information Needed]
145
+ - **Cloud Provider:** [More Information Needed]
146
+ - **Compute Region:** [More Information Needed]
147
+ - **Carbon Emitted:** [More Information Needed]
148
+
149
+ ## Technical Specifications [optional]
150
+
151
+ ### Model Architecture and Objective
152
+
153
+ [More Information Needed]
154
+
155
+ ### Compute Infrastructure
156
+
157
+ [More Information Needed]
158
+
159
+ #### Hardware
160
+
161
+ [More Information Needed]
162
+
163
+ #### Software
164
+
165
+ [More Information Needed]
166
+
167
+ ## Citation [optional]
168
+
169
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
170
+
171
+ **BibTeX:**
172
+
173
+ [More Information Needed]
174
+
175
+ **APA:**
176
+
177
+ [More Information Needed]
178
+
179
+ ## Glossary [optional]
180
+
181
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
182
+
183
+ [More Information Needed]
184
+
185
+ ## More Information [optional]
186
+
187
+ [More Information Needed]
188
+
189
+ ## Model Card Authors [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Contact
194
+
195
+ [More Information Needed]
196
+ ### Framework versions
197
+
198
+ - PEFT 0.13.2
finlora_hf_submission/models/ner_llama_3_1_8b_8bits_r8/adapter_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 32,
14
+ "lora_dropout": 0.1,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "k_proj",
24
+ "v_proj",
25
+ "q_proj"
26
+ ],
27
+ "task_type": "CAUSAL_LM",
28
+ "use_dora": false,
29
+ "use_rslora": false
30
+ }
finlora_hf_submission/models/ner_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd879e3d4d381efd7d20aafbb9f16519bfd212ab25eb733a7186d1a0234afa9f
3
+ size 9462464
finlora_hf_submission/models/sentiment_llama_3_1_8b_8bits_r8/README.md ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Model Card for Model ID
3
+
4
+ <!-- Provide a quick summary of what the model is/does. -->
5
+
6
+
7
+
8
+ ## Model Details
9
+
10
+ ### Model Description
11
+
12
+ <!-- Provide a longer summary of what this model is. -->
13
+
14
+
15
+
16
+ - **Developed by:** [More Information Needed]
17
+ - **Funded by [optional]:** [More Information Needed]
18
+ - **Shared by [optional]:** [More Information Needed]
19
+ - **Model type:** [More Information Needed]
20
+ - **Language(s) (NLP):** [More Information Needed]
21
+ - **License:** [More Information Needed]
22
+ - **Finetuned from model [optional]:** [More Information Needed]
23
+
24
+ ### Model Sources [optional]
25
+
26
+ <!-- Provide the basic links for the model. -->
27
+
28
+ - **Repository:** [More Information Needed]
29
+ - **Paper [optional]:** [More Information Needed]
30
+ - **Demo [optional]:** [More Information Needed]
31
+
32
+ ## Uses
33
+
34
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
35
+
36
+ ### Direct Use
37
+
38
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
39
+
40
+ [More Information Needed]
41
+
42
+ ### Downstream Use [optional]
43
+
44
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
45
+
46
+ [More Information Needed]
47
+
48
+ ### Out-of-Scope Use
49
+
50
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
51
+
52
+ [More Information Needed]
53
+
54
+ ## Bias, Risks, and Limitations
55
+
56
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
57
+
58
+ [More Information Needed]
59
+
60
+ ### Recommendations
61
+
62
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
63
+
64
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
65
+
66
+ ## How to Get Started with the Model
67
+
68
+ Use the code below to get started with the model.
69
+
70
+ [More Information Needed]
71
+
72
+ ## Training Details
73
+
74
+ ### Training Data
75
+
76
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
77
+
78
+ [More Information Needed]
79
+
80
+ ### Training Procedure
81
+
82
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
83
+
84
+ #### Preprocessing [optional]
85
+
86
+ [More Information Needed]
87
+
88
+
89
+ #### Training Hyperparameters
90
+
91
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
92
+
93
+ #### Speeds, Sizes, Times [optional]
94
+
95
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
96
+
97
+ [More Information Needed]
98
+
99
+ ## Evaluation
100
+
101
+ <!-- This section describes the evaluation protocols and provides the results. -->
102
+
103
+ ### Testing Data, Factors & Metrics
104
+
105
+ #### Testing Data
106
+
107
+ <!-- This should link to a Dataset Card if possible. -->
108
+
109
+ [More Information Needed]
110
+
111
+ #### Factors
112
+
113
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
114
+
115
+ [More Information Needed]
116
+
117
+ #### Metrics
118
+
119
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
120
+
121
+ [More Information Needed]
122
+
123
+ ### Results
124
+
125
+ [More Information Needed]
126
+
127
+ #### Summary
128
+
129
+
130
+
131
+ ## Model Examination [optional]
132
+
133
+ <!-- Relevant interpretability work for the model goes here -->
134
+
135
+ [More Information Needed]
136
+
137
+ ## Environmental Impact
138
+
139
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
140
+
141
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
142
+
143
+ - **Hardware Type:** [More Information Needed]
144
+ - **Hours used:** [More Information Needed]
145
+ - **Cloud Provider:** [More Information Needed]
146
+ - **Compute Region:** [More Information Needed]
147
+ - **Carbon Emitted:** [More Information Needed]
148
+
149
+ ## Technical Specifications [optional]
150
+
151
+ ### Model Architecture and Objective
152
+
153
+ [More Information Needed]
154
+
155
+ ### Compute Infrastructure
156
+
157
+ [More Information Needed]
158
+
159
+ #### Hardware
160
+
161
+ [More Information Needed]
162
+
163
+ #### Software
164
+
165
+ [More Information Needed]
166
+
167
+ ## Citation [optional]
168
+
169
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
170
+
171
+ **BibTeX:**
172
+
173
+ [More Information Needed]
174
+
175
+ **APA:**
176
+
177
+ [More Information Needed]
178
+
179
+ ## Glossary [optional]
180
+
181
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
182
+
183
+ [More Information Needed]
184
+
185
+ ## More Information [optional]
186
+
187
+ [More Information Needed]
188
+
189
+ ## Model Card Authors [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Contact
194
+
195
+ [More Information Needed]
196
+ ### Framework versions
197
+
198
+ - PEFT 0.15.2
finlora_hf_submission/models/sentiment_llama_3_1_8b_8bits_r8/adapter_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.1,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 8,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "k_proj",
28
+ "v_proj",
29
+ "q_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "trainable_token_indices": null,
33
+ "use_dora": false,
34
+ "use_rslora": false
35
+ }
finlora_hf_submission/models/sentiment_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b1185820498a284facd897021fd2fcf7eed9f02bb6c558abfb0e03f3b563034
3
+ size 9462464
finlora_hf_submission/models/xbrl_extract_llama_3_1_8b_8bits_r8/README.md ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
3
+ should probably proofread and complete it, then remove this comment. -->
4
+
5
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
6
+ <details><summary>See axolotl config</summary>
7
+
8
+ axolotl version: `0.9.1.post1`
9
+ ```yaml
10
+ base_model: meta-llama/Llama-3.1-8B-Instruct
11
+ model_type: LlamaForCausalLM
12
+ tokenizer_type: AutoTokenizer
13
+ gradient_accumulation_steps: 8
14
+ micro_batch_size: 1
15
+ num_epochs: 1
16
+ optimizer: adamw_bnb_8bit
17
+ lr_scheduler: cosine
18
+ learning_rate: 0.0001
19
+ load_in_8bit: true
20
+ load_in_4bit: false
21
+ adapter: lora
22
+ lora_model_dir: null
23
+ lora_r: 8
24
+ lora_alpha: 16
25
+ lora_dropout: 0.05
26
+ lora_target_modules:
27
+ - q_proj
28
+ - v_proj
29
+ - k_proj
30
+ datasets:
31
+ - path: /workspace/FinLoRA/data/train/xbrl_extract_train.jsonl
32
+ type:
33
+ system_prompt: ''
34
+ field_system: system
35
+ field_instruction: context
36
+ field_output: target
37
+ format: '[INST] {instruction} [/INST]'
38
+ no_input_format: '[INST] {instruction} [/INST]'
39
+ dataset_prepared_path: null
40
+ val_set_size: 0.02
41
+ output_dir: /workspace/FinLoRA/lora/axolotl-output/xbrl_extract_llama_3_1_8b_8bits_r8
42
+ peft_use_dora: false
43
+ peft_use_rslora: false
44
+ sequence_len: 4096
45
+ sample_packing: false
46
+ pad_to_sequence_len: false
47
+ wandb_project: finlora_models
48
+ wandb_entity: null
49
+ wandb_watch: gradients
50
+ wandb_name: xbrl_extract_llama_3_1_8b_8bits_r8
51
+ wandb_log_model: 'false'
52
+ bf16: auto
53
+ tf32: false
54
+ gradient_checkpointing: true
55
+ resume_from_checkpoint: null
56
+ logging_steps: 500
57
+ flash_attention: false
58
+ deepspeed: deepspeed_configs/zero1.json
59
+ warmup_steps: 10
60
+ evals_per_epoch: 4
61
+ saves_per_epoch: 1
62
+ weight_decay: 0.0
63
+ special_tokens:
64
+ pad_token: <|end_of_text|>
65
+ chat_template: llama3
66
+
67
+ ```
68
+
69
+ </details><br>
70
+
71
+ # workspace/FinLoRA/lora/axolotl-output/xbrl_extract_llama_3_1_8b_8bits_r8
72
+
73
+ This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the /workspace/FinLoRA/data/train/xbrl_extract_train.jsonl dataset.
74
+ It achieves the following results on the evaluation set:
75
+ - Loss: 0.0025
76
+
77
+ ## Model description
78
+
79
+ More information needed
80
+
81
+ ## Intended uses & limitations
82
+
83
+ More information needed
84
+
85
+ ## Training and evaluation data
86
+
87
+ More information needed
88
+
89
+ ## Training procedure
90
+
91
+ ### Training hyperparameters
92
+
93
+ The following hyperparameters were used during training:
94
+ - learning_rate: 0.0001
95
+ - train_batch_size: 1
96
+ - eval_batch_size: 1
97
+ - seed: 42
98
+ - distributed_type: multi-GPU
99
+ - num_devices: 4
100
+ - gradient_accumulation_steps: 8
101
+ - total_train_batch_size: 32
102
+ - total_eval_batch_size: 4
103
+ - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
104
+ - lr_scheduler_type: cosine
105
+ - lr_scheduler_warmup_steps: 10
106
+ - num_epochs: 1.0
107
+
108
+ ### Training results
109
+
110
+ | Training Loss | Epoch | Step | Validation Loss |
111
+ |:-------------:|:------:|:----:|:---------------:|
112
+ | No log | 0.0038 | 1 | 1.6299 |
113
+ | No log | 0.2526 | 67 | 0.0075 |
114
+ | No log | 0.5052 | 134 | 0.0037 |
115
+ | No log | 0.7578 | 201 | 0.0025 |
116
+
117
+
118
+ ### Framework versions
119
+
120
+ - PEFT 0.15.2
121
+ - Transformers 4.51.3
122
+ - Pytorch 2.8.0.dev20250319+cu128
123
+ - Datasets 3.5.1
124
+ - Tokenizers 0.21.1
finlora_hf_submission/models/xbrl_extract_llama_3_1_8b_8bits_r8/adapter_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": null,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 8,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "q_proj",
28
+ "v_proj",
29
+ "k_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "trainable_token_indices": null,
33
+ "use_dora": false,
34
+ "use_rslora": false
35
+ }
finlora_hf_submission/models/xbrl_extract_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb3c3622e2c20b903a37a51149121196cc2a6ae83e63ca97d859d3389bbc5025
3
+ size 9462656
finlora_hf_submission/models/xbrl_term_llama_3_1_8b_8bits_r8/README.md ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
3
+ should probably proofread and complete it, then remove this comment. -->
4
+
5
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
6
+ <details><summary>See axolotl config</summary>
7
+
8
+ axolotl version: `0.9.1`
9
+ ```yaml
10
+ base_model: meta-llama/Llama-3.1-8B-Instruct
11
+ model_type: LlamaForCausalLM
12
+ tokenizer_type: AutoTokenizer
13
+ gradient_accumulation_steps: 2
14
+ micro_batch_size: 4
15
+ num_epochs: 1
16
+ optimizer: adamw_bnb_8bit
17
+ lr_scheduler: cosine
18
+ learning_rate: 0.0001
19
+ load_in_8bit: true
20
+ load_in_4bit: false
21
+ adapter: lora
22
+ lora_model_dir: null
23
+ lora_r: 8
24
+ lora_alpha: 16
25
+ lora_dropout: 0.05
26
+ lora_target_modules:
27
+ - q_proj
28
+ - v_proj
29
+ - k_proj
30
+ datasets:
31
+ - path: /workspace/FinLoRA/data/train/xbrl_term_train.jsonl
32
+ type:
33
+ system_prompt: ''
34
+ field_system: system
35
+ field_instruction: context
36
+ field_output: target
37
+ format: '[INST] {instruction} [/INST]'
38
+ no_input_format: '[INST] {instruction} [/INST]'
39
+ dataset_prepared_path: null
40
+ val_set_size: 0.02
41
+ output_dir: /workspace/FinLoRA/lora/axolotl-output/xbrl_term_llama_3_1_8b_8bits_r8
42
+ peft_use_dora: false
43
+ sequence_len: 4096
44
+ sample_packing: false
45
+ pad_to_sequence_len: false
46
+ wandb_project: finlora_models
47
+ wandb_entity: null
48
+ wandb_watch: gradients
49
+ wandb_name: xbrl_term_llama_3_1_8b_8bits_r8
50
+ wandb_log_model: 'false'
51
+ bf16: auto
52
+ tf32: false
53
+ gradient_checkpointing: true
54
+ resume_from_checkpoint: null
55
+ logging_steps: 500
56
+ flash_attention: false
57
+ deepspeed: deepspeed_configs/zero1.json
58
+ warmup_steps: 10
59
+ evals_per_epoch: 4
60
+ saves_per_epoch: 1
61
+ weight_decay: 0.0
62
+ special_tokens:
63
+ pad_token: <|end_of_text|>
64
+ chat_template: llama3
65
+
66
+ ```
67
+
68
+ </details><br>
69
+
70
+ # workspace/FinLoRA/lora/axolotl-output/xbrl_term_llama_3_1_8b_8bits_r8
71
+
72
+ This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the /workspace/FinLoRA/data/train/xbrl_term_train.jsonl dataset.
73
+ It achieves the following results on the evaluation set:
74
+ - Loss: 1.5077
75
+
76
+ ## Model description
77
+
78
+ More information needed
79
+
80
+ ## Intended uses & limitations
81
+
82
+ More information needed
83
+
84
+ ## Training and evaluation data
85
+
86
+ More information needed
87
+
88
+ ## Training procedure
89
+
90
+ ### Training hyperparameters
91
+
92
+ The following hyperparameters were used during training:
93
+ - learning_rate: 0.0001
94
+ - train_batch_size: 4
95
+ - eval_batch_size: 4
96
+ - seed: 42
97
+ - distributed_type: multi-GPU
98
+ - num_devices: 5
99
+ - gradient_accumulation_steps: 2
100
+ - total_train_batch_size: 40
101
+ - total_eval_batch_size: 20
102
+ - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
103
+ - lr_scheduler_type: cosine
104
+ - lr_scheduler_warmup_steps: 10
105
+ - num_epochs: 1.0
106
+
107
+ ### Training results
108
+
109
+ | Training Loss | Epoch | Step | Validation Loss |
110
+ |:-------------:|:------:|:----:|:---------------:|
111
+ | No log | 0.0070 | 1 | 2.5692 |
112
+ | No log | 0.2509 | 36 | 1.7055 |
113
+ | No log | 0.5017 | 72 | 1.5480 |
114
+ | No log | 0.7526 | 108 | 1.5077 |
115
+
116
+
117
+ ### Framework versions
118
+
119
+ - PEFT 0.15.2
120
+ - Transformers 4.51.3
121
+ - Pytorch 2.8.0.dev20250319+cu128
122
+ - Datasets 3.5.1
123
+ - Tokenizers 0.21.1
finlora_hf_submission/models/xbrl_term_llama_3_1_8b_8bits_r8/adapter_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": null,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 8,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "v_proj",
28
+ "q_proj",
29
+ "k_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "trainable_token_indices": null,
33
+ "use_dora": false,
34
+ "use_rslora": false
35
+ }
finlora_hf_submission/models/xbrl_term_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d91d9dd57ea4e694d41c537eb78a3426fc0798be06789d80d67d5b8438b9eea
3
+ size 9462656
finlora_hf_submission/models/xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits-r8/README.md ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Model Card for Model ID
3
+
4
+ <!-- Provide a quick summary of what the model is/does. -->
5
+
6
+
7
+
8
+ ## Model Details
9
+
10
+ ### Model Description
11
+
12
+ <!-- Provide a longer summary of what this model is. -->
13
+
14
+
15
+
16
+ - **Developed by:** [More Information Needed]
17
+ - **Funded by [optional]:** [More Information Needed]
18
+ - **Shared by [optional]:** [More Information Needed]
19
+ - **Model type:** [More Information Needed]
20
+ - **Language(s) (NLP):** [More Information Needed]
21
+ - **License:** [More Information Needed]
22
+ - **Finetuned from model [optional]:** [More Information Needed]
23
+
24
+ ### Model Sources [optional]
25
+
26
+ <!-- Provide the basic links for the model. -->
27
+
28
+ - **Repository:** [More Information Needed]
29
+ - **Paper [optional]:** [More Information Needed]
30
+ - **Demo [optional]:** [More Information Needed]
31
+
32
+ ## Uses
33
+
34
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
35
+
36
+ ### Direct Use
37
+
38
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
39
+
40
+ [More Information Needed]
41
+
42
+ ### Downstream Use [optional]
43
+
44
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
45
+
46
+ [More Information Needed]
47
+
48
+ ### Out-of-Scope Use
49
+
50
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
51
+
52
+ [More Information Needed]
53
+
54
+ ## Bias, Risks, and Limitations
55
+
56
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
57
+
58
+ [More Information Needed]
59
+
60
+ ### Recommendations
61
+
62
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
63
+
64
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
65
+
66
+ ## How to Get Started with the Model
67
+
68
+ Use the code below to get started with the model.
69
+
70
+ [More Information Needed]
71
+
72
+ ## Training Details
73
+
74
+ ### Training Data
75
+
76
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the fine-tuning data is all about as well as documentation related to data pre-processing or additional filtering. -->
77
+
78
+ [More Information Needed]
79
+
80
+ ### Training Procedure
81
+
82
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the fine-tuning procedure. -->
83
+
84
+ #### Preprocessing [optional]
85
+
86
+ [More Information Needed]
87
+
88
+
89
+ #### Training Hyperparameters
90
+
91
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
92
+
93
+ #### Speeds, Sizes, Times [optional]
94
+
95
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
96
+
97
+ [More Information Needed]
98
+
99
+ ## Evaluation
100
+
101
+ <!-- This section describes the evaluation protocols and provides the results. -->
102
+
103
+ ### Testing Data, Factors & Metrics
104
+
105
+ #### Testing Data
106
+
107
+ <!-- This should link to a Dataset Card if possible. -->
108
+
109
+ [More Information Needed]
110
+
111
+ #### Factors
112
+
113
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
114
+
115
+ [More Information Needed]
116
+
117
+ #### Metrics
118
+
119
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
120
+
121
+ [More Information Needed]
122
+
123
+ ### Results
124
+
125
+ [More Information Needed]
126
+
127
+ #### Summary
128
+
129
+
130
+
131
+ ## Model Examination [optional]
132
+
133
+ <!-- Relevant interpretability work for the model goes here -->
134
+
135
+ [More Information Needed]
136
+
137
+ ## Environmental Impact
138
+
139
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
140
+
141
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
142
+
143
+ - **Hardware Type:** [More Information Needed]
144
+ - **Hours used:** [More Information Needed]
145
+ - **Cloud Provider:** [More Information Needed]
146
+ - **Compute Region:** [More Information Needed]
147
+ - **Carbon Emitted:** [More Information Needed]
148
+
149
+ ## Technical Specifications [optional]
150
+
151
+ ### Model Architecture and Objective
152
+
153
+ [More Information Needed]
154
+
155
+ ### Compute Infrastructure
156
+
157
+ [More Information Needed]
158
+
159
+ #### Hardware
160
+
161
+ [More Information Needed]
162
+
163
+ #### Software
164
+
165
+ [More Information Needed]
166
+
167
+ ## Citation [optional]
168
+
169
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
170
+
171
+ **BibTeX:**
172
+
173
+ [More Information Needed]
174
+
175
+ **APA:**
176
+
177
+ [More Information Needed]
178
+
179
+ ## Glossary [optional]
180
+
181
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
182
+
183
+ [More Information Needed]
184
+
185
+ ## More Information [optional]
186
+
187
+ [More Information Needed]
188
+
189
+ ## Model Card Authors [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Contact
194
+
195
+ [More Information Needed]
196
+ ### Framework versions
197
+
198
+ - PEFT 0.13.2
finlora_hf_submission/models/xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits-r8/adapter_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 32,
14
+ "lora_dropout": 0.1,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 8,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "v_proj",
24
+ "k_proj",
25
+ "q_proj"
26
+ ],
27
+ "task_type": "CAUSAL_LM",
28
+ "use_dora": false,
29
+ "use_rslora": false
30
+ }
finlora_hf_submission/models/xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits-r8/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf7d39b1998d060dfeaee90c1c0031a17d848871c090af928d1b696936d2eb2b
3
+ size 9462464
finlora_hf_submission/models_4bit/.DS_Store ADDED
Binary file (10.2 kB). View file
 
finlora_hf_submission/models_4bit/financebench_llama_3_1_8b_4bits_r4/README.md ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
3
+ should probably proofread and complete it, then remove this comment. -->
4
+
5
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
6
+ <details><summary>See axolotl config</summary>
7
+
8
+ axolotl version: `0.9.1`
9
+ ```yaml
10
+ base_model: meta-llama/Llama-3.1-8B-Instruct
11
+ model_type: LlamaForCausalLM
12
+ tokenizer_type: AutoTokenizer
13
+ gradient_accumulation_steps: 2
14
+ micro_batch_size: 1
15
+ num_epochs: 4
16
+ optimizer: adamw_torch_fused
17
+ lr_scheduler: cosine
18
+ learning_rate: 0.0001
19
+ load_in_8bit: false
20
+ load_in_4bit: true
21
+ adapter: lora
22
+ lora_model_dir: null
23
+ lora_r: 4
24
+ lora_alpha: 16
25
+ lora_dropout: 0.05
26
+ lora_target_modules:
27
+ - q_proj
28
+ - v_proj
29
+ - k_proj
30
+ datasets:
31
+ - path: /workspace/FinLoRA/data/train/financebench_train.jsonl
32
+ type:
33
+ system_prompt: ''
34
+ field_system: system
35
+ field_instruction: context
36
+ field_output: target
37
+ format: '[INST] {instruction} [/INST]'
38
+ no_input_format: '[INST] {instruction} [/INST]'
39
+ dataset_prepared_path: null
40
+ val_set_size: 0.02
41
+ output_dir: /workspace/FinLoRA/lora/axolotl-output/financebench_llama_3_1_8b_4bits_r4
42
+ peft_use_dora: false
43
+ sequence_len: 4096
44
+ sample_packing: false
45
+ pad_to_sequence_len: false
46
+ wandb_project: finlora_models
47
+ wandb_entity: null
48
+ wandb_watch: gradients
49
+ wandb_name: financebench_llama_3_1_8b_4bits_r4
50
+ wandb_log_model: 'false'
51
+ bf16: auto
52
+ tf32: false
53
+ gradient_checkpointing: true
54
+ resume_from_checkpoint: null
55
+ logging_steps: 500
56
+ flash_attention: false
57
+ deepspeed: deepspeed_configs/zero1.json
58
+ warmup_steps: 10
59
+ evals_per_epoch: 4
60
+ saves_per_epoch: 1
61
+ weight_decay: 0.0
62
+ special_tokens:
63
+ pad_token: <|end_of_text|>
64
+ chat_template: llama3
65
+
66
+ ```
67
+
68
+ </details><br>
69
+
70
+ # workspace/FinLoRA/lora/axolotl-output/financebench_llama_3_1_8b_4bits_r4
71
+
72
+ This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the /workspace/FinLoRA/data/train/financebench_train.jsonl dataset.
73
+ It achieves the following results on the evaluation set:
74
+ - Loss: 3.3003
75
+
76
+ ## Model description
77
+
78
+ More information needed
79
+
80
+ ## Intended uses & limitations
81
+
82
+ More information needed
83
+
84
+ ## Training and evaluation data
85
+
86
+ More information needed
87
+
88
+ ## Training procedure
89
+
90
+ ### Training hyperparameters
91
+
92
+ The following hyperparameters were used during training:
93
+ - learning_rate: 0.0001
94
+ - train_batch_size: 1
95
+ - eval_batch_size: 1
96
+ - seed: 42
97
+ - distributed_type: multi-GPU
98
+ - num_devices: 5
99
+ - gradient_accumulation_steps: 2
100
+ - total_train_batch_size: 10
101
+ - total_eval_batch_size: 5
102
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
103
+ - lr_scheduler_type: cosine
104
+ - lr_scheduler_warmup_steps: 10
105
+ - num_epochs: 4.0
106
+
107
+ ### Training results
108
+
109
+ | Training Loss | Epoch | Step | Validation Loss |
110
+ |:-------------:|:------:|:----:|:---------------:|
111
+ | No log | 0.1176 | 1 | 4.9794 |
112
+ | No log | 0.2353 | 2 | 4.9922 |
113
+ | No log | 0.4706 | 4 | 4.9603 |
114
+ | No log | 0.7059 | 6 | 4.8793 |
115
+ | No log | 0.9412 | 8 | 4.6411 |
116
+ | No log | 1.1176 | 10 | 4.4789 |
117
+ | No log | 1.3529 | 12 | 4.1465 |
118
+ | No log | 1.5882 | 14 | 3.9720 |
119
+ | No log | 1.8235 | 16 | 3.8714 |
120
+ | No log | 2.0 | 18 | 3.7423 |
121
+ | No log | 2.2353 | 20 | 3.6258 |
122
+ | No log | 2.4706 | 22 | 3.5165 |
123
+ | No log | 2.7059 | 24 | 3.4236 |
124
+ | No log | 2.9412 | 26 | 3.3368 |
125
+ | No log | 3.1176 | 28 | 3.3172 |
126
+ | No log | 3.3529 | 30 | 3.2741 |
127
+ | No log | 3.5882 | 32 | 3.3003 |
128
+
129
+
130
+ ### Framework versions
131
+
132
+ - PEFT 0.15.2
133
+ - Transformers 4.51.3
134
+ - Pytorch 2.8.0.dev20250319+cu128
135
+ - Datasets 3.5.1
136
+ - Tokenizers 0.21.1
finlora_hf_submission/models_4bit/financebench_llama_3_1_8b_4bits_r4/adapter_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": null,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 4,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "q_proj",
28
+ "v_proj",
29
+ "k_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "trainable_token_indices": null,
33
+ "use_dora": false,
34
+ "use_rslora": false
35
+ }
finlora_hf_submission/models_4bit/financebench_llama_3_1_8b_4bits_r4/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4313c4d38d04e2f0a3324938a91d1680a0d1d39fe4c4eafd8ec90acaf5953ba
3
+ size 4744016
finlora_hf_submission/models_4bit/finer_llama_3_1_8b_4bits_r4/README.md ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Model Card for Model ID
3
+
4
+ <!-- Provide a quick summary of what the model is/does. -->
5
+
6
+
7
+
8
+ ## Model Details
9
+
10
+ ### Model Description
11
+
12
+ <!-- Provide a longer summary of what this model is. -->
13
+
14
+
15
+
16
+ - **Developed by:** [More Information Needed]
17
+ - **Funded by [optional]:** [More Information Needed]
18
+ - **Shared by [optional]:** [More Information Needed]
19
+ - **Model type:** [More Information Needed]
20
+ - **Language(s) (NLP):** [More Information Needed]
21
+ - **License:** [More Information Needed]
22
+ - **Finetuned from model [optional]:** [More Information Needed]
23
+
24
+ ### Model Sources [optional]
25
+
26
+ <!-- Provide the basic links for the model. -->
27
+
28
+ - **Repository:** [More Information Needed]
29
+ - **Paper [optional]:** [More Information Needed]
30
+ - **Demo [optional]:** [More Information Needed]
31
+
32
+ ## Uses
33
+
34
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
35
+
36
+ ### Direct Use
37
+
38
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
39
+
40
+ [More Information Needed]
41
+
42
+ ### Downstream Use [optional]
43
+
44
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
45
+
46
+ [More Information Needed]
47
+
48
+ ### Out-of-Scope Use
49
+
50
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
51
+
52
+ [More Information Needed]
53
+
54
+ ## Bias, Risks, and Limitations
55
+
56
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
57
+
58
+ [More Information Needed]
59
+
60
+ ### Recommendations
61
+
62
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
63
+
64
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
65
+
66
+ ## How to Get Started with the Model
67
+
68
+ Use the code below to get started with the model.
69
+
70
+ [More Information Needed]
71
+
72
+ ## Training Details
73
+
74
+ ### Training Data
75
+
76
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
77
+
78
+ [More Information Needed]
79
+
80
+ ### Training Procedure
81
+
82
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
83
+
84
+ #### Preprocessing [optional]
85
+
86
+ [More Information Needed]
87
+
88
+
89
+ #### Training Hyperparameters
90
+
91
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
92
+
93
+ #### Speeds, Sizes, Times [optional]
94
+
95
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
96
+
97
+ [More Information Needed]
98
+
99
+ ## Evaluation
100
+
101
+ <!-- This section describes the evaluation protocols and provides the results. -->
102
+
103
+ ### Testing Data, Factors & Metrics
104
+
105
+ #### Testing Data
106
+
107
+ <!-- This should link to a Dataset Card if possible. -->
108
+
109
+ [More Information Needed]
110
+
111
+ #### Factors
112
+
113
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
114
+
115
+ [More Information Needed]
116
+
117
+ #### Metrics
118
+
119
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
120
+
121
+ [More Information Needed]
122
+
123
+ ### Results
124
+
125
+ [More Information Needed]
126
+
127
+ #### Summary
128
+
129
+
130
+
131
+ ## Model Examination [optional]
132
+
133
+ <!-- Relevant interpretability work for the model goes here -->
134
+
135
+ [More Information Needed]
136
+
137
+ ## Environmental Impact
138
+
139
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
140
+
141
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
142
+
143
+ - **Hardware Type:** [More Information Needed]
144
+ - **Hours used:** [More Information Needed]
145
+ - **Cloud Provider:** [More Information Needed]
146
+ - **Compute Region:** [More Information Needed]
147
+ - **Carbon Emitted:** [More Information Needed]
148
+
149
+ ## Technical Specifications [optional]
150
+
151
+ ### Model Architecture and Objective
152
+
153
+ [More Information Needed]
154
+
155
+ ### Compute Infrastructure
156
+
157
+ [More Information Needed]
158
+
159
+ #### Hardware
160
+
161
+ [More Information Needed]
162
+
163
+ #### Software
164
+
165
+ [More Information Needed]
166
+
167
+ ## Citation [optional]
168
+
169
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
170
+
171
+ **BibTeX:**
172
+
173
+ [More Information Needed]
174
+
175
+ **APA:**
176
+
177
+ [More Information Needed]
178
+
179
+ ## Glossary [optional]
180
+
181
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
182
+
183
+ [More Information Needed]
184
+
185
+ ## More Information [optional]
186
+
187
+ [More Information Needed]
188
+
189
+ ## Model Card Authors [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Contact
194
+
195
+ [More Information Needed]
196
+ ### Framework versions
197
+
198
+ - PEFT 0.15.0
finlora_hf_submission/models_4bit/finer_llama_3_1_8b_4bits_r4/adapter_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 32,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.1,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 4,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "q_proj",
28
+ "k_proj",
29
+ "v_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "trainable_token_indices": null,
33
+ "use_dora": false,
34
+ "use_rslora": false
35
+ }
finlora_hf_submission/models_4bit/finer_llama_3_1_8b_4bits_r4/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f9e208ddfc1866721c6193f3b2aab45cee7511097c7b8abc45be3d8065d9ee3
3
+ size 4743824
finlora_hf_submission/models_4bit/formula_llama_3_1_8b_4bits_r4/README.md ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
3
+ should probably proofread and complete it, then remove this comment. -->
4
+
5
+ [<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
6
+ <details><summary>See axolotl config</summary>
7
+
8
+ axolotl version: `0.9.1`
9
+ ```yaml
10
+ base_model: meta-llama/Llama-3.1-8B-Instruct
11
+ model_type: LlamaForCausalLM
12
+ tokenizer_type: AutoTokenizer
13
+ gradient_accumulation_steps: 2
14
+ micro_batch_size: 4
15
+ num_epochs: 1
16
+ optimizer: adamw_torch_fused
17
+ lr_scheduler: cosine
18
+ learning_rate: 0.0001
19
+ load_in_8bit: false
20
+ load_in_4bit: true
21
+ adapter: lora
22
+ lora_model_dir: null
23
+ lora_r: 4
24
+ lora_alpha: 16
25
+ lora_dropout: 0.05
26
+ lora_target_modules:
27
+ - q_proj
28
+ - v_proj
29
+ - k_proj
30
+ datasets:
31
+ - path: /workspace/FinLoRA/data/train/formula_train.jsonl
32
+ type:
33
+ system_prompt: ''
34
+ field_system: system
35
+ field_instruction: context
36
+ field_output: target
37
+ format: '[INST] {instruction} [/INST]'
38
+ no_input_format: '[INST] {instruction} [/INST]'
39
+ dataset_prepared_path: null
40
+ val_set_size: 0.02
41
+ output_dir: /workspace/FinLoRA/lora/axolotl-output/formula_llama_3_1_8b_4bits_r4
42
+ peft_use_dora: false
43
+ sequence_len: 4096
44
+ sample_packing: false
45
+ pad_to_sequence_len: false
46
+ wandb_project: finlora_models
47
+ wandb_entity: null
48
+ wandb_watch: gradients
49
+ wandb_name: formula_llama_3_1_8b_4bits_r4
50
+ wandb_log_model: 'false'
51
+ bf16: auto
52
+ tf32: false
53
+ gradient_checkpointing: true
54
+ resume_from_checkpoint: null
55
+ logging_steps: 500
56
+ flash_attention: false
57
+ deepspeed: deepspeed_configs/zero1.json
58
+ warmup_steps: 10
59
+ evals_per_epoch: 4
60
+ saves_per_epoch: 1
61
+ weight_decay: 0.0
62
+ special_tokens:
63
+ pad_token: <|end_of_text|>
64
+ chat_template: llama3
65
+
66
+ ```
67
+
68
+ </details><br>
69
+
70
+ # workspace/FinLoRA/lora/axolotl-output/formula_llama_3_1_8b_4bits_r4
71
+
72
+ This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the /workspace/FinLoRA/data/train/formula_train.jsonl dataset.
73
+ It achieves the following results on the evaluation set:
74
+ - Loss: 1.6143
75
+
76
+ ## Model description
77
+
78
+ More information needed
79
+
80
+ ## Intended uses & limitations
81
+
82
+ More information needed
83
+
84
+ ## Training and evaluation data
85
+
86
+ More information needed
87
+
88
+ ## Training procedure
89
+
90
+ ### Training hyperparameters
91
+
92
+ The following hyperparameters were used during training:
93
+ - learning_rate: 0.0001
94
+ - train_batch_size: 4
95
+ - eval_batch_size: 4
96
+ - seed: 42
97
+ - distributed_type: multi-GPU
98
+ - num_devices: 5
99
+ - gradient_accumulation_steps: 2
100
+ - total_train_batch_size: 40
101
+ - total_eval_batch_size: 20
102
+ - optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
103
+ - lr_scheduler_type: cosine
104
+ - lr_scheduler_warmup_steps: 10
105
+ - num_epochs: 1.0
106
+
107
+ ### Training results
108
+
109
+ | Training Loss | Epoch | Step | Validation Loss |
110
+ |:-------------:|:-----:|:----:|:---------------:|
111
+ | No log | 0.05 | 1 | 3.8659 |
112
+ | No log | 0.25 | 5 | 3.6317 |
113
+ | No log | 0.5 | 10 | 2.6735 |
114
+ | No log | 0.75 | 15 | 1.7570 |
115
+ | No log | 1.0 | 20 | 1.6143 |
116
+
117
+
118
+ ### Framework versions
119
+
120
+ - PEFT 0.15.2
121
+ - Transformers 4.51.3
122
+ - Pytorch 2.8.0.dev20250319+cu128
123
+ - Datasets 3.5.1
124
+ - Tokenizers 0.21.1
finlora_hf_submission/models_4bit/formula_llama_3_1_8b_4bits_r4/adapter_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": null,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "r": 4,
24
+ "rank_pattern": {},
25
+ "revision": null,
26
+ "target_modules": [
27
+ "v_proj",
28
+ "q_proj",
29
+ "k_proj"
30
+ ],
31
+ "task_type": "CAUSAL_LM",
32
+ "trainable_token_indices": null,
33
+ "use_dora": false,
34
+ "use_rslora": false
35
+ }
finlora_hf_submission/models_4bit/formula_llama_3_1_8b_4bits_r4/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1ffb0c00e606c0df93d8b8e5369289e1503bafbe46deefd4097d0b4d80046fb
3
+ size 4744016
finlora_hf_submission/models_4bit/headline_llama_3_1_8b_4bits_r4/README.md ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Model Card for Model ID
3
+
4
+ <!-- Provide a quick summary of what the model is/does. -->
5
+
6
+
7
+
8
+ ## Model Details
9
+
10
+ ### Model Description
11
+
12
+ <!-- Provide a longer summary of what this model is. -->
13
+
14
+
15
+
16
+ - **Developed by:** [More Information Needed]
17
+ - **Funded by [optional]:** [More Information Needed]
18
+ - **Shared by [optional]:** [More Information Needed]
19
+ - **Model type:** [More Information Needed]
20
+ - **Language(s) (NLP):** [More Information Needed]
21
+ - **License:** [More Information Needed]
22
+ - **Finetuned from model [optional]:** [More Information Needed]
23
+
24
+ ### Model Sources [optional]
25
+
26
+ <!-- Provide the basic links for the model. -->
27
+
28
+ - **Repository:** [More Information Needed]
29
+ - **Paper [optional]:** [More Information Needed]
30
+ - **Demo [optional]:** [More Information Needed]
31
+
32
+ ## Uses
33
+
34
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
35
+
36
+ ### Direct Use
37
+
38
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
39
+
40
+ [More Information Needed]
41
+
42
+ ### Downstream Use [optional]
43
+
44
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
45
+
46
+ [More Information Needed]
47
+
48
+ ### Out-of-Scope Use
49
+
50
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
51
+
52
+ [More Information Needed]
53
+
54
+ ## Bias, Risks, and Limitations
55
+
56
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
57
+
58
+ [More Information Needed]
59
+
60
+ ### Recommendations
61
+
62
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
63
+
64
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
65
+
66
+ ## How to Get Started with the Model
67
+
68
+ Use the code below to get started with the model.
69
+
70
+ [More Information Needed]
71
+
72
+ ## Training Details
73
+
74
+ ### Training Data
75
+
76
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the fine-tuning data is all about as well as documentation related to data pre-processing or additional filtering. -->
77
+
78
+ [More Information Needed]
79
+
80
+ ### Training Procedure
81
+
82
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the fine-tuning procedure. -->
83
+
84
+ #### Preprocessing [optional]
85
+
86
+ [More Information Needed]
87
+
88
+
89
+ #### Training Hyperparameters
90
+
91
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
92
+
93
+ #### Speeds, Sizes, Times [optional]
94
+
95
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
96
+
97
+ [More Information Needed]
98
+
99
+ ## Evaluation
100
+
101
+ <!-- This section describes the evaluation protocols and provides the results. -->
102
+
103
+ ### Testing Data, Factors & Metrics
104
+
105
+ #### Testing Data
106
+
107
+ <!-- This should link to a Dataset Card if possible. -->
108
+
109
+ [More Information Needed]
110
+
111
+ #### Factors
112
+
113
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
114
+
115
+ [More Information Needed]
116
+
117
+ #### Metrics
118
+
119
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
120
+
121
+ [More Information Needed]
122
+
123
+ ### Results
124
+
125
+ [More Information Needed]
126
+
127
+ #### Summary
128
+
129
+
130
+
131
+ ## Model Examination [optional]
132
+
133
+ <!-- Relevant interpretability work for the model goes here -->
134
+
135
+ [More Information Needed]
136
+
137
+ ## Environmental Impact
138
+
139
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
140
+
141
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
142
+
143
+ - **Hardware Type:** [More Information Needed]
144
+ - **Hours used:** [More Information Needed]
145
+ - **Cloud Provider:** [More Information Needed]
146
+ - **Compute Region:** [More Information Needed]
147
+ - **Carbon Emitted:** [More Information Needed]
148
+
149
+ ## Technical Specifications [optional]
150
+
151
+ ### Model Architecture and Objective
152
+
153
+ [More Information Needed]
154
+
155
+ ### Compute Infrastructure
156
+
157
+ [More Information Needed]
158
+
159
+ #### Hardware
160
+
161
+ [More Information Needed]
162
+
163
+ #### Software
164
+
165
+ [More Information Needed]
166
+
167
+ ## Citation [optional]
168
+
169
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
170
+
171
+ **BibTeX:**
172
+
173
+ [More Information Needed]
174
+
175
+ **APA:**
176
+
177
+ [More Information Needed]
178
+
179
+ ## Glossary [optional]
180
+
181
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
182
+
183
+ [More Information Needed]
184
+
185
+ ## More Information [optional]
186
+
187
+ [More Information Needed]
188
+
189
+ ## Model Card Authors [optional]
190
+
191
+ [More Information Needed]
192
+
193
+ ## Model Card Contact
194
+
195
+ [More Information Needed]
196
+ ### Framework versions
197
+
198
+ - PEFT 0.13.2
finlora_hf_submission/models_4bit/headline_llama_3_1_8b_4bits_r4/adapter_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 32,
14
+ "lora_dropout": 0.1,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 4,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "k_proj",
24
+ "v_proj",
25
+ "q_proj"
26
+ ],
27
+ "task_type": "CAUSAL_LM",
28
+ "use_dora": false,
29
+ "use_rslora": false
30
+ }