LbbbbbY commited on Sep 16, 2025

Commit

0408017

verified ·

1 Parent(s): 540a57b

Upload 80 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
finlora_hf_submission/.DS_Store +0 -0
finlora_hf_submission/README——finlora.md +651 -0
finlora_hf_submission/SUBMISSION_SUMMARY.md +171 -0
finlora_hf_submission/__pycache__/comprehensive_evaluation.cpython-313.pyc +0 -0
finlora_hf_submission/__pycache__/incremental_evaluation.cpython-313.pyc +0 -0
finlora_hf_submission/__pycache__/inference.cpython-313.pyc +0 -0
finlora_hf_submission/__pycache__/missing_tests.cpython-313.pyc +0 -0
finlora_hf_submission/__pycache__/robust_incremental.cpython-313.pyc +0 -0
finlora_hf_submission/inference.py +294 -0
finlora_hf_submission/models/.DS_Store +0 -0
finlora_hf_submission/models/financebench_llama_3_1_8b_8bits_r8/README.md +136 -0
finlora_hf_submission/models/financebench_llama_3_1_8b_8bits_r8/adapter_config.json +35 -0
finlora_hf_submission/models/financebench_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
finlora_hf_submission/models/finer_llama_3_1_8b_8bits_r8/README.md +135 -0
finlora_hf_submission/models/finer_llama_3_1_8b_8bits_r8/adapter_config.json +35 -0
finlora_hf_submission/models/finer_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
finlora_hf_submission/models/formula_llama_3_1_8b_8bits_r8/README.md +124 -0
finlora_hf_submission/models/formula_llama_3_1_8b_8bits_r8/adapter_config.json +35 -0
finlora_hf_submission/models/formula_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
finlora_hf_submission/models/headline_llama_3_1_8b_8bits_r8/README.md +198 -0
finlora_hf_submission/models/headline_llama_3_1_8b_8bits_r8/adapter_config.json +30 -0
finlora_hf_submission/models/headline_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
finlora_hf_submission/models/ner_llama_3_1_8b_8bits_r8/README.md +198 -0
finlora_hf_submission/models/ner_llama_3_1_8b_8bits_r8/adapter_config.json +30 -0
finlora_hf_submission/models/ner_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
finlora_hf_submission/models/sentiment_llama_3_1_8b_8bits_r8/README.md +198 -0
finlora_hf_submission/models/sentiment_llama_3_1_8b_8bits_r8/adapter_config.json +35 -0
finlora_hf_submission/models/sentiment_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
finlora_hf_submission/models/xbrl_extract_llama_3_1_8b_8bits_r8/README.md +124 -0
finlora_hf_submission/models/xbrl_extract_llama_3_1_8b_8bits_r8/adapter_config.json +35 -0
finlora_hf_submission/models/xbrl_extract_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
finlora_hf_submission/models/xbrl_term_llama_3_1_8b_8bits_r8/README.md +123 -0
finlora_hf_submission/models/xbrl_term_llama_3_1_8b_8bits_r8/adapter_config.json +35 -0
finlora_hf_submission/models/xbrl_term_llama_3_1_8b_8bits_r8/adapter_model.safetensors +3 -0
finlora_hf_submission/models/xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits-r8/README.md +198 -0
finlora_hf_submission/models/xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits-r8/adapter_config.json +30 -0
finlora_hf_submission/models/xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits-r8/adapter_model.safetensors +3 -0
finlora_hf_submission/models_4bit/.DS_Store +0 -0
finlora_hf_submission/models_4bit/financebench_llama_3_1_8b_4bits_r4/README.md +136 -0
finlora_hf_submission/models_4bit/financebench_llama_3_1_8b_4bits_r4/adapter_config.json +35 -0
finlora_hf_submission/models_4bit/financebench_llama_3_1_8b_4bits_r4/adapter_model.safetensors +3 -0
finlora_hf_submission/models_4bit/finer_llama_3_1_8b_4bits_r4/README.md +198 -0
finlora_hf_submission/models_4bit/finer_llama_3_1_8b_4bits_r4/adapter_config.json +35 -0
finlora_hf_submission/models_4bit/finer_llama_3_1_8b_4bits_r4/adapter_model.safetensors +3 -0
finlora_hf_submission/models_4bit/formula_llama_3_1_8b_4bits_r4/README.md +124 -0
finlora_hf_submission/models_4bit/formula_llama_3_1_8b_4bits_r4/adapter_config.json +35 -0
finlora_hf_submission/models_4bit/formula_llama_3_1_8b_4bits_r4/adapter_model.safetensors +3 -0
finlora_hf_submission/models_4bit/headline_llama_3_1_8b_4bits_r4/README.md +198 -0
finlora_hf_submission/models_4bit/headline_llama_3_1_8b_4bits_r4/adapter_config.json +30 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+finlora_hf_submission/rag_system/cfa_complete_rag.faiss filter=lfs diff=lfs merge=lfs -text

finlora_hf_submission/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

finlora_hf_submission/README——finlora.md ADDED Viewed

	@@ -0,0 +1,651 @@

+# FinLoRA: Financial Large Language Models with LoRA Adaptation
+## Overview
+FinLoRA is a comprehensive framework for fine-tuning large language models on financial tasks using Low-Rank Adaptation (LoRA). This project provides trained LoRA adapters for various financial NLP tasks including sentiment analysis, named entity recognition, headline classification, XBRL processing, and CFA knowledge integration.
+## Model Architecture
+- **Base Model**: Meta-Llama-3.1-8B-Instruct
+- **Adaptation Method**: LoRA (Low-Rank Adaptation)
+- **Quantization**: 8-bit and 4-bit quantization support
+- **Tasks**: Financial sentiment analysis, NER, classification, XBRL processing, CFA knowledge integration
+## Available Models
+### Core Financial Models
+- `sentiment_llama_3_1_8b_8bits_r8` - Financial sentiment analysis
+- `ner_llama_3_1_8b_8bits_r8` - Named entity recognition
+- `headline_llama_3_1_8b_8bits_r8` - Financial headline classification
+- `xbrl_extract_llama_3_1_8b_8bits_r8` - XBRL tag extraction
+- `xbrl_term_llama_3_1_8b_8bits_r8` - XBRL terminology processing
+### Advanced Models
+- `financebench_llama_3_1_8b_8bits_r8` - Comprehensive financial benchmark
+- `finer_llama_3_1_8b_8bits_r8` - Financial NER
+- `formula_llama_3_1_8b_8bits_r8` - Financial formula processing
+### RAG Knowledge Base
+- CFA RAG knowledge base (FAISS index + JSONL data)
+- FinTagging RAG knowledge base (FAISS index + JSONL data)
+- RAG system scripts and configuration files
+## Quick Start (5 minutes)
+### 1. Environment Setup
+```bash
+# Clone the repository
+git clone <repository-url>
+cd FinLora——RAG
+# Create and activate environment
+conda env create -f FinLoRA/environment.yml
+conda activate finenv
+```
+### 2. Test a Single Model
+```python
+# Quick test script
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+from peft import PeftModel
+import torch
+# Check if CUDA is available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# Load model (replace with your model path)
+model_path = "FinLoRA/lora_adapters/8bits_r8/sentiment_llama_3_1_8b_8bits_r8"
+base_model = "meta-llama/Llama-3.1-8B-Instruct"
+# Load tokenizer
+tokenizer = AutoTokenizer.from_pretrained(base_model)
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+# Configure quantization based on device
+if device == "cuda":
+    bnb_config = BitsAndBytesConfig(load_in_8bit=True)
+    base_model = AutoModelForCausalLM.from_pretrained(
+        base_model, quantization_config=bnb_config, device_map="auto"
+    )
+else:
+    # CPU mode - no quantization
+    base_model = AutoModelForCausalLM.from_pretrained(
+        base_model, device_map="cpu", torch_dtype=torch.float32
+    )
+# Load LoRA adapter
+model = PeftModel.from_pretrained(base_model, model_path)
+# Test inference
+def quick_test(text):
+    inputs = tokenizer(text, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_new_tokens=50, temperature=0.7)
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# Test
+result = quick_test("Classify sentiment: 'The stock market is performing well today.'")
+print(result)
+```
+### 3. Run Full Evaluation
+```bash
+cd testdata
+python comprehensive_evaluation.py
+```
+## Environment Setup
+### Quest Cluster Environment (Original Development)
+The original development was done on Northwestern University's Quest cluster with:
+- **OS**: Linux 4.18.0-553.64.1.el8_10.x86_64
+- **GPU**: NVIDIA H100 80GB HBM3
+- **CUDA**: Version 12.8
+- **Environment**: `finenv` conda environment
+### Option 1: Using Conda (Recommended)
+```bash
+# Create environment from provided environment.yml
+conda env create -f FinLoRA/environment.yml
+# Activate environment
+conda activate finenv
+# Install additional requirements
+pip install -r FinLoRA/requirements.txt
+```
+### Option 2: Manual Installation
+#### For GPU Users:
+```bash
+# Create new conda environment
+conda create -n finlora python=3.11
+# Activate environment
+conda activate finlora
+# Install PyTorch with CUDA support
+conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia
+# Install core dependencies
+pip install transformers==4.45.2
+pip install datasets==2.19.1
+pip install peft==0.13.2
+pip install bitsandbytes==0.44.1
+pip install accelerate==1.0.0
+pip install deepspeed==0.15.2
+pip install sentence-transformers
+pip install faiss-cpu
+pip install scikit-learn
+pip install pandas numpy
+```
+#### For CPU-Only Users:
+```bash
+# Create new conda environment
+conda create -n finlora python=3.11
+# Activate environment
+conda activate finlora
+# Install PyTorch CPU version
+conda install pytorch torchvision torchaudio cpuonly -c pytorch
+# Install core dependencies (CPU-compatible versions)
+pip install transformers==4.45.2
+pip install datasets==2.19.1
+pip install peft==0.13.2
+pip install accelerate==1.0.0
+pip install sentence-transformers
+pip install faiss-cpu
+pip install scikit-learn
+pip install pandas numpy
+```
+### Option 3: Alternative Platforms
+#### Google Colab
+```python
+# Install dependencies
+!pip install transformers==4.45.2
+!pip install datasets==2.19.1
+!pip install peft==0.13.2
+!pip install bitsandbytes==0.44.1
+!pip install accelerate==1.0.0
+!pip install sentence-transformers
+!pip install faiss-cpu
+!pip install scikit-learn
+# Check GPU availability
+import torch
+print(f"CUDA available: {torch.cuda.is_available()}")
+if torch.cuda.is_available():
+    print(f"GPU: {torch.cuda.get_device_name(0)}")
+    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
+```
+#### AWS EC2 / Azure / Local GPU
+```bash
+# Install NVIDIA drivers and CUDA toolkit
+# Then follow Option 1 or 2 above
+```
+#### CPU-Only Mode
+```python
+# Complete CPU-only model loading example
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import PeftModel
+import torch
+# Force CPU usage
+device = "cpu"
+torch.set_default_device(device)
+# Load tokenizer
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+# Load base model for CPU (no quantization)
+base_model = AutoModelForCausalLM.from_pretrained(
+    "meta-llama/Llama-3.1-8B-Instruct",
+    device_map="cpu",
+    torch_dtype=torch.float32,
+    low_cpu_mem_usage=True
+)
+# Load LoRA adapter
+model = PeftModel.from_pretrained(base_model, "path/to/lora/adapter")
+# Test inference
+def cpu_predict(text):
+    inputs = tokenizer(text, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_new_tokens=50, temperature=0.7)
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# Test
+result = cpu_predict("Classify sentiment: 'The market is performing well.'")
+print(result)
+```
+## Usage Instructions
+### 1. Basic Model Loading and Inference
+```python
+from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+from peft import PeftModel
+import torch
+# Check device availability
+device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
+# Load tokenizer
+tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+# Configure model loading based on device
+if device == "cuda":
+    # GPU mode with quantization
+    bnb_config = BitsAndBytesConfig(
+        load_in_8bit=True,
+        llm_int8_threshold=6.0
+    )
+    base_model = AutoModelForCausalLM.from_pretrained(
+        "meta-llama/Llama-3.1-8B-Instruct",
+        quantization_config=bnb_config,
+        device_map="auto",
+        torch_dtype=torch.float16,
+        trust_remote_code=True
+    )
+else:
+    # CPU mode without quantization
+    base_model = AutoModelForCausalLM.from_pretrained(
+        "meta-llama/Llama-3.1-8B-Instruct",
+        device_map="cpu",
+        torch_dtype=torch.float32,
+        low_cpu_mem_usage=True
+    )
+# Load LoRA adapter
+model = PeftModel.from_pretrained(base_model, "path/to/lora/adapter")
+# Example inference
+def predict(text, max_length=256):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=max_length,
+            temperature=0.7,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id
+        )
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
+# Test the model
+result = predict("Classify the sentiment of this financial text: 'The company's revenue increased by 15% this quarter.'")
+print(result)
+```
+### 2. Comprehensive Evaluation
+For testing all models on financial datasets:
+```bash
+# Navigate to testdata directory
+cd testdata
+# Run comprehensive evaluation (works on any platform)
+python comprehensive_evaluation.py
+# For Quest cluster users only:
+# sbatch submit_comprehensive_evaluation.sh
+```
+**Note**: The evaluation script automatically detects your environment and adjusts accordingly:
+- **GPU available**: Uses CUDA with quantization
+- **CPU only**: Uses CPU mode without quantization
+- **Memory constraints**: Automatically reduces batch size
+### 3. Individual Model Testing
+```python
+# Test specific financial tasks
+from testdata.comprehensive_evaluation import FinLoRAPredictor
+# Initialize predictor
+predictor = FinLoRAPredictor("path/to/model")
+# Load model
+predictor.load_model()
+# Test sentiment analysis
+result = predictor.predict("Analyze the sentiment of: 'Stock prices are declining rapidly.'", max_length=50)
+print(result)
+```
+### 4. RAG System Usage
+The project includes RAG knowledge bases for enhanced financial understanding:
+```python
+# Load RAG system
+from FinLoRA.rag.cfa_rag_system import CFARAGSystem
+# Initialize RAG system
+rag_system = CFARAGSystem()
+# Query CFA knowledge base
+query = "What are the key principles of portfolio management?"
+results = rag_system.query(query, top_k=5)
+# Use with LoRA models for enhanced responses
+enhanced_response = rag_system.generate_enhanced_response(query, model)
+```
+## Data Input Formats for Testing
+### 1. Financial Sentiment Analysis
+**Input Format:**
+```python
+text = "The company's quarterly earnings exceeded expectations by 20%."
+prompt = f"Classify the sentiment of this financial text as positive, negative, or neutral:\n\nText: {text}\n\nSentiment:"
+```
+**Expected Output:**
+- `"positive"` - for positive financial sentiment
+- `"negative"` - for negative financial sentiment
+- `"neutral"` - for neutral financial sentiment
+**Test Examples:**
+- "Stock prices are soaring to new heights." → `positive`
+- "Revenue declined by 15% this quarter." → `negative`
+- "The company maintained stable performance." → `neutral`
+### 2. Named Entity Recognition
+**Input Format:**
+```python
+text = "Apple Inc. reported revenue of $394.3 billion in 2022."
+prompt = f"Extract financial entities from the following text:\n\nText: {text}\n\nEntities:"
+```
+**Expected Output:**
+- Company names, financial figures, dates, and financial terms
+- Structured entity extraction with context
+### 3. XBRL Processing
+**Input Format:**
+```python
+text = "Total assets: $1,234,567,890. Current assets: $456,789,123."
+prompt = f"Extract XBRL tags from the following financial statement:\n\nStatement: {text}\n\nXBRL Tags:"
+```
+**Expected Output:**
+- Structured XBRL tag extraction
+- Financial statement element identification
+### 4. CFA Knowledge Integration
+**Input Format:**
+```python
+question = "Explain the concept of weighted average cost of capital (WACC)."
+prompt = f"Answer this CFA-related question using your knowledge base:\n\nQuestion: {question}\n\nAnswer:"
+```
+**Expected Output:**
+- Comprehensive explanation with CFA knowledge
+- Structured financial concepts and formulas
+### 5. Headline Classification
+**Input Format:**
+```python
+headline = "Federal Reserve announces interest rate cut"
+prompt = f"Classify this financial headline:\n\nHeadline: {headline}\n\nClassification:"
+```
+**Expected Output:**
+- Financial news category classification
+- Market impact assessment
+## Running Without Quest GPU
+### Option 1: Local GPU Setup
+```bash
+# Check GPU availability
+nvidia-smi
+# Install CUDA toolkit (if not already installed)
+conda install cudatoolkit=11.8
+# Run evaluation with GPU
+cd testdata
+python comprehensive_evaluation.py
+```
+### Option 2: CPU-Only Mode
+```bash
+# Run evaluation on CPU (slower but works without GPU)
+cd testdata
+python comprehensive_evaluation.py
+```
+The evaluation script will automatically detect CPU mode and adjust settings accordingly.
+### Option 3: Cloud Platforms
+#### Google Colab
+```python
+# Upload the project files to Colab
+# Then run:
+!cd testdata && python comprehensive_evaluation.py
+```
+#### AWS EC2 / Azure / Local GPU
+```bash
+# Install NVIDIA drivers and CUDA toolkit first
+# Then follow the environment setup above
+cd testdata
+python comprehensive_evaluation.py
+```
+#### Hugging Face Spaces
+```python
+# Deploy as a web application
+# The model will run on Hugging Face's infrastructure
+```
+### Option 4: Docker with GPU Support
+```bash
+# Build Docker image
+docker build -t finlora .
+# Run with GPU support
+docker run --gpus all -it finlora python comprehensive_evaluation.py
+# Run without GPU (CPU mode)
+docker run -it finlora python comprehensive_evaluation.py
+```
+### Performance Expectations
+| Environment | Expected Speed | Memory Usage | Notes |
+|-------------|----------------|--------------|-------|
+| Quest H100 | Fastest | ~16GB | Original development environment |
+| Local GPU (RTX 4090) | Fast | ~12GB | High-end consumer GPU |
+| Google Colab T4 | Medium | ~8GB | Free tier available |
+| Google Colab V100 | Fast | ~16GB | Pro tier required |
+| CPU Only | Slow | ~32GB | Requires significant RAM |
+| AWS/Azure GPU | Fast | Variable | Depends on instance type |
+## Evaluation Results
+The models have been evaluated on multiple financial datasets:
+### Performance Metrics
+- **Financial Phrasebank**: F1=0.333, Accuracy=0.500
+- **NER Classification**: F1=0.889, Accuracy=0.800
+- **Headline Classification**: F1=0.697, Accuracy=0.700
+- **XBRL Tag Extraction**: Accuracy=0.200
+- **FIQA Sentiment Analysis**: F1=0.727, Accuracy=0.700
+### Dataset Coverage
+- BloombergGPT tasks: Financial Phrasebank, FIQA SA, Headline, NER, ConvFinQA
+- XBRL tasks: Tag extraction, Value extraction, Formula construction, Formula calculation
+- CFA integration: Level 1 and Level 2 knowledge base
+## File Structure
+```
+FinLoRA/
+├── lora_adapters/          # Trained LoRA adapters
+│   ├── 8bits_r8/          # 8-bit quantized models
+│   ├── 4bits_r4/          # 4-bit quantized models
+│   └── fp16_r8/           # Full precision models
+├── testdata/              # Evaluation scripts and data
+│   ├── comprehensive_evaluation.py
+│   ├── incremental_evaluation.py
+│   └── submit_*.sh       # SLURM submission scripts
+├── rag/                   # RAG system components
+├── data/                  # Training and test data
+├── environment.yml        # Conda environment specification
+└── requirements.txt       # Python dependencies
+```
+## Environment Verification
+Before running the models, verify your environment setup:
+```python
+# Environment verification script
+import torch
+import transformers
+import peft
+import datasets
+import sys
+print("=== Environment Verification ===")
+print(f"Python version: {sys.version}")
+print(f"PyTorch version: {torch.__version__}")
+print(f"CUDA available: {torch.cuda.is_available()}")
+print(f"CUDA version: {torch.version.cuda}")
+print(f"Transformers version: {transformers.__version__}")
+print(f"PEFT version: {peft.__version__}")
+print(f"Datasets version: {datasets.__version__}")
+if torch.cuda.is_available():
+    print(f"GPU count: {torch.cuda.device_count()}")
+    for i in range(torch.cuda.device_count()):
+        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
+        print(f"GPU {i} memory: {torch.cuda.get_device_properties(i).total_memory / 1e9:.1f} GB")
+else:
+    print("Running in CPU mode")
+print("=== Model Path Verification ===")
+import os
+model_paths = [
+    "FinLoRA/lora_adapters/8bits_r8/sentiment_llama_3_1_8b_8bits_r8",
+    "FinLoRA/lora_adapters/8bits_r8/ner_llama_3_1_8b_8bits_r8",
+    "FinLoRA/lora_adapters/8bits_r8/headline_llama_3_1_8b_8bits_r8"
+]
+for path in model_paths:
+    exists = os.path.exists(path)
+    print(f"{path}: {'✓' if exists else '✗'}")
+```
+## Troubleshooting
+### Common Issues
+1. **CUDA Out of Memory**
+   ```python
+   # Reduce batch size or use gradient checkpointing
+   model.gradient_checkpointing_enable()
+   # Or use CPU mode
+   device = "cpu"
+   ```
+2. **Model Loading Errors**
+   ```python
+   # Check model path and permissions
+   import os
+   print(os.path.exists("path/to/model"))
+   # Check if base model can be loaded
+   from transformers import AutoTokenizer
+   tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
+   ```
+3. **Dependency Conflicts**
+   ```bash
+   # Create fresh environment
+   conda create -n finlora_new python=3.11
+   conda activate finlora_new
+   pip install -r requirements.txt
+   ```
+4. **CPU Mode Issues**
+   ```python
+   # Ensure CPU mode is properly configured
+   import torch
+   torch.set_default_device("cpu")
+   # Use low memory mode
+   base_model = AutoModelForCausalLM.from_pretrained(
+       "meta-llama/Llama-3.1-8B-Instruct",
+       device_map="cpu",
+       torch_dtype=torch.float32,
+       low_cpu_mem_usage=True
+   )
+   ```
+### Performance Optimization
+1. **Memory Optimization**
+   - Use 8-bit or 4-bit quantization
+   - Enable gradient checkpointing
+   - Use DeepSpeed for large models
+2. **Speed Optimization**
+   - Use GPU acceleration
+   - Batch processing
+   - Model caching
+## Citation
+If you use this work, please cite:
+```bibtex
+@article{finlora2024,
+  title={FinLoRA: Financial Large Language Models with LoRA Adaptation},
+  author={Your Name},
+  journal={Financial AI Conference},
+  year={2024}
+}
+```
+## License
+This project is licensed under the MIT License - see the LICENSE file for details.
+## Contact
+For questions and support, please contact:
+- Email: your.email@domain.com
+- GitHub Issues: [Project Repository](https://github.com/your-repo/finlora)
+## Acknowledgments
+- Meta AI for the Llama-3.1-8B-Instruct base model
+- Hugging Face for the transformers library
+- Microsoft for the LoRA adaptation technique
+- Quest cluster at Northwestern University for computational resources

finlora_hf_submission/SUBMISSION_SUMMARY.md ADDED Viewed

	@@ -0,0 +1,171 @@

+# FinLoRA Hugging Face Submission Summary
+## Submission Requirements Met
+✅ **Model Files**: All trained LoRA model files (excluding checkpoints) are included
+✅ **Inference Scripts**: Comprehensive scripts to load and run the models
+✅ **External Tools Integration**: RAG system and evaluation tools included
+## Submission Structure
+```
+finlora_hf_submission/
+├── models/                          # 9 Complete 8-bit LoRA Models (82MB)
+│   ├── sentiment_llama_3_1_8b_8bits_r8/
+│   ├── ner_llama_3_1_8b_8bits_r8/
+│   ├── headline_llama_3_1_8b_8bits_r8/
+│   ├── xbrl_extract_llama_3_1_8b_8bits_r8/
+│   ├── xbrl_term_llama_3_1_8b_8bits_r8/
+│   ├── financebench_llama_3_1_8b_8bits_r8/
+│   ├── finer_llama_3_1_8b_8bits_r8/
+│   ├── formula_llama_3_1_8b_8bits_r8/
+│   └── xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits_r8/
+├── models_4bit/                     # 8 Complete 4-bit LoRA Models (37MB)
+│   ├── sentiment_llama_3_1_8b_4bits_r4/
+│   ├── ner_llama_3_1_8b_4bits_r4/
+│   ├── headline_llama_3_1_8b_4bits_r4/
+│   ├── xbrl_extract_llama_3_1_8b_4bits_r4/
+│   ├── xbrl_term_llama_3_1_8b_4bits_r4/
+│   ├── financebench_llama_3_1_8b_4bits_r4/
+│   ├── finer_llama_3_1_8b_4bits_r4/
+│   └── formula_llama_3_1_8b_4bits_r4/
+├── testdata/                        # Evaluation Datasets (3.5MB)
+│   ├── FinCL-eval-subset.csv
+│   └── FinNI-eval-subset.csv
+├── rag_system/                      # RAG System Components (8.3MB)
+│   ├── cfa_rag_system.py
+│   ├── multi_task_rag_system.py
+│   └── rag_config.json
+├── inference.py                     # Main Inference Script
+├── comprehensive_evaluation.py      # Full Evaluation Script
+├── incremental_evaluation.py        # Incremental Evaluation
+├── robust_incremental.py           # Robust Evaluation
+├── missing_tests.py                # Missing Test Detection
+├── test_submission.py              # Submission Test Script
+├── upload_to_hf.py                 # Hugging Face Upload Script
+├── requirements.txt                # Python Dependencies
+└── README.md                       # Comprehensive Documentation
+```
+## Available Models
+### 8-bit Quantized Models (Recommended)
+1. **sentiment_llama_3_1_8b_8bits_r8** - Financial sentiment analysis
+2. **ner_llama_3_1_8b_8bits_r8** - Named entity recognition
+3. **headline_llama_3_1_8b_8bits_r8** - Financial headline classification
+4. **xbrl_extract_llama_3_1_8b_8bits_r8** - XBRL tag extraction
+5. **xbrl_term_llama_3_1_8b_8bits_r8** - XBRL terminology processing
+6. **financebench_llama_3_1_8b_8bits_r8** - Comprehensive financial benchmark
+7. **finer_llama_3_1_8b_8bits_r8** - Financial NER
+8. **formula_llama_3_1_8b_8bits_r8** - Financial formula processing
+9. **xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits_r8** - XBRL training model
+### 4-bit Quantized Models (Memory Efficient)
+1. **sentiment_llama_3_1_8b_4bits_r4** - Financial sentiment analysis
+2. **ner_llama_3_1_8b_4bits_r4** - Named entity recognition
+3. **headline_llama_3_1_8b_4bits_r4** - Financial headline classification
+4. **xbrl_extract_llama_3_1_8b_4bits_r4** - XBRL tag extraction
+5. **xbrl_term_llama_3_1_8b_4bits_r4** - XBRL terminology processing
+6. **financebench_llama_3_1_8b_4bits_r4** - Comprehensive financial benchmark
+7. **finer_llama_3_1_8b_4bits_r4** - Financial NER
+8. **formula_llama_3_1_8b_4bits_r4** - Financial formula processing
+## Key Features
+### 1. Easy Model Loading
+```python
+from inference import FinLoRAPredictor
+# Load 8-bit model
+predictor = FinLoRAPredictor("sentiment_llama_3_1_8b_8bits_r8", use_4bit=False)
+# Load 4-bit model for memory efficiency
+predictor = FinLoRAPredictor("sentiment_llama_3_1_8b_4bits_r4", use_4bit=True)
+```
+### 2. Multiple Task Support
+- Financial sentiment analysis
+- Named entity recognition
+- Headline classification
+- XBRL tag extraction
+- Financial formula processing
+### 3. Comprehensive Evaluation
+- Full evaluation on financial datasets
+- Incremental evaluation capabilities
+- Robust evaluation testing
+- Missing test detection
+### 4. Memory Efficiency
+- 8-bit models for optimal performance
+- 4-bit models for limited memory environments
+- Automatic device detection (GPU/CPU)
+## Performance Results
+| Task | Dataset | F1 Score | Accuracy |
+|------|---------|----------|----------|
+| Sentiment Analysis | Financial Phrasebank | 0.333 | 0.500 |
+| NER | Financial NER | 0.889 | 0.800 |
+| Classification | Headline Classification | 0.697 | 0.700 |
+| XBRL Processing | XBRL Tag Extraction | - | 0.200 |
+| Sentiment Analysis | FIQA SA | 0.727 | 0.700 |
+## Usage Instructions
+### Quick Start
+```bash
+# 1. Install dependencies
+pip install -r requirements.txt
+# 2. Test the submission
+python test_submission.py
+# 3. Run inference
+python inference.py
+# 4. Run evaluation
+python comprehensive_evaluation.py
+```
+### Upload to Hugging Face
+```bash
+# Set your Hugging Face token
+export HUGGINGFACE_TOKEN="your_token_here"
+# Upload the model
+python upload_to_hf.py
+```
+## Submission Checklist
+- [x] All model files included (excluding checkpoints)
+- [x] Inference scripts provided
+- [x] External tools integration (RAG system)
+- [x] Comprehensive documentation
+- [x] Easy installation and setup
+- [x] Multiple usage examples
+- [x] Evaluation scripts
+- [x] Test scripts for verification
+- [x] Hugging Face upload automation
+- [x] Both 8-bit and 4-bit model variants
+- [x] Complete evaluation datasets
+## Ready for Submission
+The FinLoRA submission is complete and ready for Hugging Face upload. All requirements have been met:
+1. **Model Files**: 17 complete LoRA models (9 x 8-bit + 8 x 4-bit) with all necessary files
+2. **Inference Scripts**: Comprehensive Python scripts for loading and running models
+3. **External Tools**: RAG system with evaluation tools and datasets
+4. **Documentation**: Complete README with usage examples
+5. **Testing**: Automated test scripts to verify functionality
+The submission can be easily uploaded to Hugging Face using the provided `upload_to_hf.py` script.
+## Total Size: ~130MB
+- Models (8-bit): 82MB
+- Models (4-bit): 37MB
+- Test data: 3.5MB
+- RAG system: 8.3MB
+- Scripts and docs: <1MB

finlora_hf_submission/__pycache__/comprehensive_evaluation.cpython-313.pyc ADDED Viewed

Binary file (23 kB). View file

finlora_hf_submission/__pycache__/incremental_evaluation.cpython-313.pyc ADDED Viewed

Binary file (4.99 kB). View file

finlora_hf_submission/__pycache__/inference.cpython-313.pyc ADDED Viewed

Binary file (12.1 kB). View file

finlora_hf_submission/__pycache__/missing_tests.cpython-313.pyc ADDED Viewed

Binary file (10.4 kB). View file

finlora_hf_submission/__pycache__/robust_incremental.cpython-313.pyc ADDED Viewed

Binary file (7.67 kB). View file

finlora_hf_submission/inference.py ADDED Viewed

	@@ -0,0 +1,294 @@

+#!/usr/bin/env python3
+"""
+FinLoRA: Financial Large Language Models with LoRA Adaptation
+Main inference script for Hugging Face submission
+This script provides easy loading and inference for all FinLoRA models.
+"""
+import torch
+import os
+import json
+import warnings
+from typing import Dict, List, Optional, Any, Union
+from pathlib import Path
+# Suppress warnings for cleaner output
+warnings.filterwarnings('ignore')
+try:
+    from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+    from peft import PeftModel
+except ImportError as e:
+    print(f"Missing required dependencies: {e}")
+    print("Please install: pip install transformers peft bitsandbytes")
+    exit(1)
+class FinLoRAPredictor:
+    """Main FinLoRA predictor class"""
+    def __init__(self,
+                 model_name: str = "sentiment_llama_3_1_8b_8bits_r8",
+                 base_model: str = "meta-llama/Llama-3.1-8B-Instruct",
+                 use_4bit: bool = False):
+        """
+        Initialize FinLoRA predictor
+        Args:
+            model_name: Name of the LoRA model to load
+            base_model: Base model name
+            use_4bit: Whether to use 4-bit quantized models
+        """
+        self.model_name = model_name
+        self.base_model = base_model
+        self.use_4bit = use_4bit
+        # Device configuration
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Using device: {self.device}")
+        # Model components
+        self.model = None
+        self.tokenizer = None
+        # Load model
+        self._load_model()
+    def _load_model(self):
+        """Load the FinLoRA model"""
+        try:
+            print(f"Loading model: {self.model_name}")
+            # Load tokenizer
+            self.tokenizer = AutoTokenizer.from_pretrained(self.base_model)
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            # Configure quantization based on device and preference
+            if self.device == "cuda":
+                if self.use_4bit:
+                    bnb_config = BitsAndBytesConfig(
+                        load_in_4bit=True,
+                        bnb_4bit_use_double_quant=True,
+                        bnb_4bit_quant_type="nf4",
+                        bnb_4bit_compute_dtype=torch.bfloat16
+                    )
+                else:
+                    bnb_config = BitsAndBytesConfig(
+                        load_in_8bit=True,
+                        llm_int8_threshold=6.0
+                    )
+                base_model = AutoModelForCausalLM.from_pretrained(
+                    self.base_model,
+                    quantization_config=bnb_config,
+                    device_map="auto",
+                    torch_dtype=torch.float16,
+                    trust_remote_code=True
+                )
+            else:
+                # CPU mode
+                base_model = AutoModelForCausalLM.from_pretrained(
+                    self.base_model,
+                    device_map="cpu",
+                    torch_dtype=torch.float32,
+                    low_cpu_mem_usage=True
+                )
+            # Load LoRA adapter
+            model_dir = "models_4bit" if self.use_4bit else "models"
+            model_path = f"{model_dir}/{self.model_name}"
+            if not os.path.exists(model_path):
+                raise FileNotFoundError(f"Model path not found: {model_path}")
+            self.model = PeftModel.from_pretrained(base_model, model_path)
+            self.model.eval()
+            print(f"Model loaded successfully: {self.model_name}")
+        except Exception as e:
+            print(f"Error loading model: {e}")
+            raise
+    def predict(self,
+                text: str,
+                max_length: int = 256,
+                temperature: float = 0.7) -> str:
+        """
+        Generate prediction for given text
+        Args:
+            text: Input text
+            max_length: Maximum length of generated text
+            temperature: Sampling temperature
+        """
+        try:
+            # Tokenize input
+            inputs = self.tokenizer(
+                text,
+                return_tensors="pt",
+                truncation=True,
+                max_length=512
+            )
+            if self.device == "cuda":
+                inputs = {k: v.to(self.device) for k, v in inputs.items()}
+            # Generate response
+            with torch.no_grad():
+                outputs = self.model.generate(
+                    **inputs,
+                    max_new_tokens=max_length,
+                    do_sample=True,
+                    temperature=temperature,
+                    top_p=0.9,
+                    pad_token_id=self.tokenizer.eos_token_id,
+                    eos_token_id=self.tokenizer.eos_token_id
+                )
+            # Decode response
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+            # Remove input text from response
+            if text in response:
+                response = response.replace(text, "").strip()
+            return response
+        except Exception as e:
+            print(f"Prediction error: {e}")
+            return f"Error: {str(e)}"
+    def classify_sentiment(self, text: str) -> str:
+        """Classify financial sentiment"""
+        prompt = f"Classify the sentiment of this financial text as positive, negative, or neutral:\n\nText: {text}\n\nSentiment:"
+        response = self.predict(prompt, max_length=10)
+        # Extract sentiment
+        if 'positive' in response.lower():
+            return "positive"
+        elif 'negative' in response.lower():
+            return "negative"
+        else:
+            return "neutral"
+    def extract_entities(self, text: str) -> str:
+        """Extract financial entities"""
+        prompt = f"Extract financial entities from the following text:\n\nText: {text}\n\nEntities:"
+        return self.predict(prompt, max_length=100)
+    def classify_headline(self, headline: str) -> str:
+        """Classify financial headline"""
+        prompt = f"Classify this financial headline as positive or negative:\n\nHeadline: {headline}\n\nSentiment:"
+        response = self.predict(prompt, max_length=10)
+        if 'positive' in response.lower() or 'yes' in response.lower():
+            return "positive"
+        else:
+            return "negative"
+    def extract_xbrl_tags(self, text: str) -> str:
+        """Extract XBRL tags from financial text"""
+        prompt = f"Extract XBRL tags from the following financial statement:\n\nStatement: {text}\n\nXBRL Tags:"
+        return self.predict(prompt, max_length=100)
+    def process_financial_text(self, text: str) -> str:
+        """Process general financial text"""
+        prompt = f"Analyze this financial text and provide insights:\n\nText: {text}\n\nAnalysis:"
+        return self.predict(prompt, max_length=200)
+def list_available_models(use_4bit: bool = False) -> List[str]:
+    """List all available models"""
+    model_dir = "models_4bit" if use_4bit else "models"
+    models_path = Path(model_dir)
+    if not models_path.exists():
+        return []
+    models = []
+    for model_dir in models_path.iterdir():
+        if model_dir.is_dir() and (model_dir / "adapter_config.json").exists():
+            models.append(model_dir.name)
+    return sorted(models)
+def main():
+    """Main function for testing the model"""
+    print("=== FinLoRA Financial Language Model ===")
+    print("Loading model and testing inference...")
+    # List available models
+    available_models_8bit = list_available_models(use_4bit=False)
+    available_models_4bit = list_available_models(use_4bit=True)
+    print(f"Available 8-bit models: {', '.join(available_models_8bit)}")
+    print(f"Available 4-bit models: {', '.join(available_models_4bit)}")
+    if not available_models_8bit and not available_models_4bit:
+        print("No models found in 'models' or 'models_4bit' directories")
+        return
+    # Load the first available model
+    if available_models_8bit:
+        model_name = available_models_8bit[0]
+        use_4bit = False
+    else:
+        model_name = available_models_4bit[0]
+        use_4bit = True
+    print(f"Loading model: {model_name} ({'4-bit' if use_4bit else '8-bit'})")
+    try:
+        # Initialize predictor
+        predictor = FinLoRAPredictor(
+            model_name=model_name,
+            use_4bit=use_4bit
+        )
+        # Test cases
+        test_cases = [
+            {
+                "task": "Sentiment Analysis",
+                "text": "The company's quarterly earnings exceeded expectations by 20%.",
+                "method": predictor.classify_sentiment
+            },
+            {
+                "task": "Entity Extraction",
+                "text": "Apple Inc. reported revenue of $394.3 billion in 2022.",
+                "method": predictor.extract_entities
+            },
+            {
+                "task": "Headline Classification",
+                "text": "Federal Reserve announces interest rate cut",
+                "method": predictor.classify_headline
+            },
+            {
+                "task": "XBRL Tag Extraction",
+                "text": "Total assets: $1,234,567,890. Current assets: $456,789,123.",
+                "method": predictor.extract_xbrl_tags
+            }
+        ]
+        # Run tests
+        for i, test_case in enumerate(test_cases, 1):
+            print(f"\n--- Test {i}: {test_case['task']} ---")
+            print(f"Input: {test_case['text']}")
+            try:
+                result = test_case['method'](test_case['text'])
+                print(f"Output: {result}")
+            except Exception as e:
+                print(f"Error: {e}")
+        print("\nModel testing completed successfully!")
+    except Exception as e:
+        print(f"Error: {e}")
+        print("\nTroubleshooting:")
+        print("1. Ensure all model files are in the 'models' or 'models_4bit' directory")
+        print("2. Check that the base model can be downloaded")
+        print("3. Verify CUDA availability if using GPU")
+if __name__ == "__main__":
+    main()

finlora_hf_submission/models/.DS_Store ADDED Viewed

Binary file (10.2 kB). View file

finlora_hf_submission/models/financebench_llama_3_1_8b_8bits_r8/README.md ADDED Viewed

	@@ -0,0 +1,136 @@

+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
+<details><summary>See axolotl config</summary>
+axolotl version: `0.9.1`
+```yaml
+base_model: meta-llama/Llama-3.1-8B-Instruct
+model_type: LlamaForCausalLM
+tokenizer_type: AutoTokenizer
+gradient_accumulation_steps: 2
+micro_batch_size: 1
+num_epochs: 4
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0001
+load_in_8bit: true
+load_in_4bit: false
+adapter: lora
+lora_model_dir: null
+lora_r: 8
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules:
+- q_proj
+- v_proj
+- k_proj
+datasets:
+- path: /workspace/FinLoRA/data/train/financebench_train.jsonl
+  type:
+    system_prompt: ''
+    field_system: system
+    field_instruction: context
+    field_output: target
+    format: '[INST] {instruction} [/INST]'
+    no_input_format: '[INST] {instruction} [/INST]'
+dataset_prepared_path: null
+val_set_size: 0.02
+output_dir: /workspace/FinLoRA/lora/axolotl-output/financebench_llama_3_1_8b_8bits_r8
+peft_use_dora: false
+sequence_len: 4096
+sample_packing: false
+pad_to_sequence_len: false
+wandb_project: finlora_models
+wandb_entity: null
+wandb_watch: gradients
+wandb_name: financebench_llama_3_1_8b_8bits_r8
+wandb_log_model: 'false'
+bf16: auto
+tf32: false
+gradient_checkpointing: true
+resume_from_checkpoint: null
+logging_steps: 500
+flash_attention: false
+deepspeed: deepspeed_configs/zero1.json
+warmup_steps: 10
+evals_per_epoch: 4
+saves_per_epoch: 1
+weight_decay: 0.0
+special_tokens:
+  pad_token: <|end_of_text|>
+chat_template: llama3
+```
+</details><br>
+# workspace/FinLoRA/lora/axolotl-output/financebench_llama_3_1_8b_8bits_r8
+This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the /workspace/FinLoRA/data/train/financebench_train.jsonl dataset.
+It achieves the following results on the evaluation set:
+- Loss: 3.0593
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 5
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 10
+- total_eval_batch_size: 5
+- optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 10
+- num_epochs: 4.0
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| No log        | 0.1176 | 1    | 4.6396          |
+| No log        | 0.2353 | 2    | 4.5918          |
+| No log        | 0.4706 | 4    | 4.5650          |
+| No log        | 0.7059 | 6    | 4.5194          |
+| No log        | 0.9412 | 8    | 4.4293          |
+| No log        | 1.1176 | 10   | 4.3325          |
+| No log        | 1.3529 | 12   | 3.9557          |
+| No log        | 1.5882 | 14   | 3.6519          |
+| No log        | 1.8235 | 16   | 3.6472          |
+| No log        | 2.0    | 18   | 3.4611          |
+| No log        | 2.2353 | 20   | 3.3681          |
+| No log        | 2.4706 | 22   | 3.2136          |
+| No log        | 2.7059 | 24   | 3.1790          |
+| No log        | 2.9412 | 26   | 3.1455          |
+| No log        | 3.1176 | 28   | 3.1480          |
+| No log        | 3.3529 | 30   | 3.0489          |
+| No log        | 3.5882 | 32   | 3.0593          |
+### Framework versions
+- PEFT 0.15.2
+- Transformers 4.51.3
+- Pytorch 2.8.0.dev20250319+cu128
+- Datasets 3.5.1
+- Tokenizers 0.21.1

finlora_hf_submission/models/financebench_llama_3_1_8b_8bits_r8/adapter_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "k_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

finlora_hf_submission/models/financebench_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f247859d3e9b80c4dfd2f8d7b8d9ea574b5cd1925e2fc12ab3e7babc6e3a6bd7
+size 9462656

finlora_hf_submission/models/finer_llama_3_1_8b_8bits_r8/README.md ADDED Viewed

	@@ -0,0 +1,135 @@

+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
+<details><summary>See axolotl config</summary>
+axolotl version: `0.9.0`
+```yaml
+base_model: NousResearch/Meta-Llama-3.1-8B-Instruct
+model_type: LlamaForCausalLM
+tokenizer_type: AutoTokenizer
+gradient_accumulation_steps: 8
+micro_batch_size: 1
+num_epochs: 4
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0001
+load_in_8bit: true
+load_in_4bit: false
+adapter: lora
+lora_model_dir: null
+lora_r: 8
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules:
+- q_proj
+- v_proj
+- k_proj
+datasets:
+- path: /workspace/FinLoRA/data/train/finer_train_batched.jsonl
+  type:
+    system_prompt: ''
+    field_system: system
+    field_instruction: context
+    field_output: target
+    format: '[INST] {instruction} [/INST]'
+    no_input_format: '[INST] {instruction} [/INST]'
+dataset_prepared_path: null
+val_set_size: 0.02
+output_dir: /workspace/FinLoRA/lora/axolotl-output/finer_llama_3_1_8b_8bits_r8
+sequence_len: 4096
+sample_packing: false
+pad_to_sequence_len: false
+wandb_project: finlora_models
+wandb_entity: null
+wandb_watch: gradients
+wandb_name: finer_llama_3_1_8b_8bits_r8
+wandb_log_model: 'false'
+bf16: auto
+tf32: false
+gradient_checkpointing: true
+resume_from_checkpoint: null
+logging_steps: 500
+flash_attention: false
+deepspeed: deepspeed_configs/zero1.json
+warmup_steps: 10
+evals_per_epoch: 4
+saves_per_epoch: 1
+weight_decay: 0.0
+special_tokens:
+  pad_token: <|end_of_text|>
+chat_template: llama3
+```
+</details><br>
+# workspace/FinLoRA/fine-tune/axolotl-output/finer_llama_3_1_8B_8bits_r8
+This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the /workspace/FinLoRA/data/train/finer_train_batched.jsonl dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.0331
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 2
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 16
+- total_eval_batch_size: 2
+- optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 10
+- num_epochs: 4.0
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| No log        | 0.0016 | 1    | 0.5433          |
+| No log        | 0.2497 | 153  | 0.0520          |
+| No log        | 0.4995 | 306  | 0.0459          |
+| No log        | 0.7492 | 459  | 0.0406          |
+| 0.0693        | 0.9990 | 612  | 0.0386          |
+| 0.0693        | 1.2497 | 765  | 0.0396          |
+| 0.0693        | 1.4995 | 918  | 0.0363          |
+| 0.036         | 1.7492 | 1071 | 0.0351          |
+| 0.036         | 1.9990 | 1224 | 0.0348          |
+| 0.036         | 2.2497 | 1377 | 0.0360          |
+| 0.0302        | 2.4995 | 1530 | 0.0321          |
+| 0.0302        | 2.7492 | 1683 | 0.0347          |
+| 0.0302        | 2.9990 | 1836 | 0.0324          |
+| 0.0302        | 3.2497 | 1989 | 0.0328          |
+| 0.0242        | 3.4995 | 2142 | 0.0334          |
+| 0.0242        | 3.7492 | 2295 | 0.0332          |
+| 0.0242        | 3.9990 | 2448 | 0.0331          |
+### Framework versions
+- PEFT 0.15.2
+- Transformers 4.51.3
+- Pytorch 2.8.0.dev20250319+cu128
+- Datasets 3.5.0
+- Tokenizers 0.21.1

finlora_hf_submission/models/finer_llama_3_1_8b_8bits_r8/adapter_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "NousResearch/Meta-Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

finlora_hf_submission/models/finer_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f8a25d48802d10e77609254723de643d2683d2c72d1a73bdb4110ed78f3f9d0b
+size 9462656

finlora_hf_submission/models/formula_llama_3_1_8b_8bits_r8/README.md ADDED Viewed

	@@ -0,0 +1,124 @@

+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
+<details><summary>See axolotl config</summary>
+axolotl version: `0.9.1`
+```yaml
+base_model: meta-llama/Llama-3.1-8B-Instruct
+model_type: LlamaForCausalLM
+tokenizer_type: AutoTokenizer
+gradient_accumulation_steps: 2
+micro_batch_size: 4
+num_epochs: 1
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0001
+load_in_8bit: true
+load_in_4bit: false
+adapter: lora
+lora_model_dir: null
+lora_r: 8
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules:
+- q_proj
+- v_proj
+- k_proj
+datasets:
+- path: /workspace/FinLoRA/data/train/formula_train.jsonl
+  type:
+    system_prompt: ''
+    field_system: system
+    field_instruction: context
+    field_output: target
+    format: '[INST] {instruction} [/INST]'
+    no_input_format: '[INST] {instruction} [/INST]'
+dataset_prepared_path: null
+val_set_size: 0.02
+output_dir: /workspace/FinLoRA/lora/axolotl-output/formula_llama_3_1_8b_8bits_r8
+peft_use_dora: false
+sequence_len: 4096
+sample_packing: false
+pad_to_sequence_len: false
+wandb_project: finlora_models
+wandb_entity: null
+wandb_watch: gradients
+wandb_name: formula_llama_3_1_8b_8bits_r8
+wandb_log_model: 'false'
+bf16: auto
+tf32: false
+gradient_checkpointing: true
+resume_from_checkpoint: null
+logging_steps: 500
+flash_attention: false
+deepspeed: deepspeed_configs/zero1.json
+warmup_steps: 10
+evals_per_epoch: 4
+saves_per_epoch: 1
+weight_decay: 0.0
+special_tokens:
+  pad_token: <|end_of_text|>
+chat_template: llama3
+```
+</details><br>
+# workspace/FinLoRA/lora/axolotl-output/formula_llama_3_1_8b_8bits_r8
+This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the /workspace/FinLoRA/data/train/formula_train.jsonl dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.5104
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 5
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 40
+- total_eval_batch_size: 20
+- optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 10
+- num_epochs: 1.0
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| No log        | 0.05  | 1    | 4.5176          |
+| No log        | 0.25  | 5    | 4.2441          |
+| No log        | 0.5   | 10   | 2.5134          |
+| No log        | 0.75  | 15   | 1.6948          |
+| No log        | 1.0   | 20   | 1.5104          |
+### Framework versions
+- PEFT 0.15.2
+- Transformers 4.51.3
+- Pytorch 2.8.0.dev20250319+cu128
+- Datasets 3.5.1
+- Tokenizers 0.21.1

finlora_hf_submission/models/formula_llama_3_1_8b_8bits_r8/adapter_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

finlora_hf_submission/models/formula_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21559ce8defb3cd2bb17b0f827749447c48d53170994e87b7548b243ef7c31a3
+size 9462656

finlora_hf_submission/models/headline_llama_3_1_8b_8bits_r8/README.md ADDED Viewed

	@@ -0,0 +1,198 @@

+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the fine-tuning data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the fine-tuning procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

finlora_hf_submission/models/headline_llama_3_1_8b_8bits_r8/adapter_config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

finlora_hf_submission/models/headline_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5bb065b036c7f919cf23b5bc1ff2039d2ee9cf30fe0136cf4a77bb3b56ad187c
+size 9462656

finlora_hf_submission/models/ner_llama_3_1_8b_8bits_r8/README.md ADDED Viewed

	@@ -0,0 +1,198 @@

+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the fine-tuning data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the fine-tuning procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

finlora_hf_submission/models/ner_llama_3_1_8b_8bits_r8/adapter_config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

finlora_hf_submission/models/ner_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fd879e3d4d381efd7d20aafbb9f16519bfd212ab25eb733a7186d1a0234afa9f
+size 9462464

finlora_hf_submission/models/sentiment_llama_3_1_8b_8bits_r8/README.md ADDED Viewed

	@@ -0,0 +1,198 @@

+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.2

finlora_hf_submission/models/sentiment_llama_3_1_8b_8bits_r8/adapter_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

finlora_hf_submission/models/sentiment_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9b1185820498a284facd897021fd2fcf7eed9f02bb6c558abfb0e03f3b563034
+size 9462464

finlora_hf_submission/models/xbrl_extract_llama_3_1_8b_8bits_r8/README.md ADDED Viewed

	@@ -0,0 +1,124 @@

+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
+<details><summary>See axolotl config</summary>
+axolotl version: `0.9.1.post1`
+```yaml
+base_model: meta-llama/Llama-3.1-8B-Instruct
+model_type: LlamaForCausalLM
+tokenizer_type: AutoTokenizer
+gradient_accumulation_steps: 8
+micro_batch_size: 1
+num_epochs: 1
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0001
+load_in_8bit: true
+load_in_4bit: false
+adapter: lora
+lora_model_dir: null
+lora_r: 8
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules:
+- q_proj
+- v_proj
+- k_proj
+datasets:
+- path: /workspace/FinLoRA/data/train/xbrl_extract_train.jsonl
+  type:
+    system_prompt: ''
+    field_system: system
+    field_instruction: context
+    field_output: target
+    format: '[INST] {instruction} [/INST]'
+    no_input_format: '[INST] {instruction} [/INST]'
+dataset_prepared_path: null
+val_set_size: 0.02
+output_dir: /workspace/FinLoRA/lora/axolotl-output/xbrl_extract_llama_3_1_8b_8bits_r8
+peft_use_dora: false
+peft_use_rslora: false
+sequence_len: 4096
+sample_packing: false
+pad_to_sequence_len: false
+wandb_project: finlora_models
+wandb_entity: null
+wandb_watch: gradients
+wandb_name: xbrl_extract_llama_3_1_8b_8bits_r8
+wandb_log_model: 'false'
+bf16: auto
+tf32: false
+gradient_checkpointing: true
+resume_from_checkpoint: null
+logging_steps: 500
+flash_attention: false
+deepspeed: deepspeed_configs/zero1.json
+warmup_steps: 10
+evals_per_epoch: 4
+saves_per_epoch: 1
+weight_decay: 0.0
+special_tokens:
+  pad_token: <|end_of_text|>
+chat_template: llama3
+```
+</details><br>
+# workspace/FinLoRA/lora/axolotl-output/xbrl_extract_llama_3_1_8b_8bits_r8
+This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the /workspace/FinLoRA/data/train/xbrl_extract_train.jsonl dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.0025
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 4
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 32
+- total_eval_batch_size: 4
+- optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 10
+- num_epochs: 1.0
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| No log        | 0.0038 | 1    | 1.6299          |
+| No log        | 0.2526 | 67   | 0.0075          |
+| No log        | 0.5052 | 134  | 0.0037          |
+| No log        | 0.7578 | 201  | 0.0025          |
+### Framework versions
+- PEFT 0.15.2
+- Transformers 4.51.3
+- Pytorch 2.8.0.dev20250319+cu128
+- Datasets 3.5.1
+- Tokenizers 0.21.1

finlora_hf_submission/models/xbrl_extract_llama_3_1_8b_8bits_r8/adapter_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

finlora_hf_submission/models/xbrl_extract_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb3c3622e2c20b903a37a51149121196cc2a6ae83e63ca97d859d3389bbc5025
+size 9462656

finlora_hf_submission/models/xbrl_term_llama_3_1_8b_8bits_r8/README.md ADDED Viewed

	@@ -0,0 +1,123 @@

+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
+<details><summary>See axolotl config</summary>
+axolotl version: `0.9.1`
+```yaml
+base_model: meta-llama/Llama-3.1-8B-Instruct
+model_type: LlamaForCausalLM
+tokenizer_type: AutoTokenizer
+gradient_accumulation_steps: 2
+micro_batch_size: 4
+num_epochs: 1
+optimizer: adamw_bnb_8bit
+lr_scheduler: cosine
+learning_rate: 0.0001
+load_in_8bit: true
+load_in_4bit: false
+adapter: lora
+lora_model_dir: null
+lora_r: 8
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules:
+- q_proj
+- v_proj
+- k_proj
+datasets:
+- path: /workspace/FinLoRA/data/train/xbrl_term_train.jsonl
+  type:
+    system_prompt: ''
+    field_system: system
+    field_instruction: context
+    field_output: target
+    format: '[INST] {instruction} [/INST]'
+    no_input_format: '[INST] {instruction} [/INST]'
+dataset_prepared_path: null
+val_set_size: 0.02
+output_dir: /workspace/FinLoRA/lora/axolotl-output/xbrl_term_llama_3_1_8b_8bits_r8
+peft_use_dora: false
+sequence_len: 4096
+sample_packing: false
+pad_to_sequence_len: false
+wandb_project: finlora_models
+wandb_entity: null
+wandb_watch: gradients
+wandb_name: xbrl_term_llama_3_1_8b_8bits_r8
+wandb_log_model: 'false'
+bf16: auto
+tf32: false
+gradient_checkpointing: true
+resume_from_checkpoint: null
+logging_steps: 500
+flash_attention: false
+deepspeed: deepspeed_configs/zero1.json
+warmup_steps: 10
+evals_per_epoch: 4
+saves_per_epoch: 1
+weight_decay: 0.0
+special_tokens:
+  pad_token: <|end_of_text|>
+chat_template: llama3
+```
+</details><br>
+# workspace/FinLoRA/lora/axolotl-output/xbrl_term_llama_3_1_8b_8bits_r8
+This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the /workspace/FinLoRA/data/train/xbrl_term_train.jsonl dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.5077
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 5
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 40
+- total_eval_batch_size: 20
+- optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 10
+- num_epochs: 1.0
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| No log        | 0.0070 | 1    | 2.5692          |
+| No log        | 0.2509 | 36   | 1.7055          |
+| No log        | 0.5017 | 72   | 1.5480          |
+| No log        | 0.7526 | 108  | 1.5077          |
+### Framework versions
+- PEFT 0.15.2
+- Transformers 4.51.3
+- Pytorch 2.8.0.dev20250319+cu128
+- Datasets 3.5.1
+- Tokenizers 0.21.1

finlora_hf_submission/models/xbrl_term_llama_3_1_8b_8bits_r8/adapter_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

finlora_hf_submission/models/xbrl_term_llama_3_1_8b_8bits_r8/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d91d9dd57ea4e694d41c537eb78a3426fc0798be06789d80d67d5b8438b9eea
+size 9462656

finlora_hf_submission/models/xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits-r8/README.md ADDED Viewed

	@@ -0,0 +1,198 @@

+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the fine-tuning data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the fine-tuning procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

finlora_hf_submission/models/xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits-r8/adapter_config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "k_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

finlora_hf_submission/models/xbrl_train.jsonl-meta-llama-Llama-3.1-8B-Instruct-8bits-r8/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bf7d39b1998d060dfeaee90c1c0031a17d848871c090af928d1b696936d2eb2b
+size 9462464

finlora_hf_submission/models_4bit/.DS_Store ADDED Viewed

Binary file (10.2 kB). View file

finlora_hf_submission/models_4bit/financebench_llama_3_1_8b_4bits_r4/README.md ADDED Viewed

	@@ -0,0 +1,136 @@

+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
+<details><summary>See axolotl config</summary>
+axolotl version: `0.9.1`
+```yaml
+base_model: meta-llama/Llama-3.1-8B-Instruct
+model_type: LlamaForCausalLM
+tokenizer_type: AutoTokenizer
+gradient_accumulation_steps: 2
+micro_batch_size: 1
+num_epochs: 4
+optimizer: adamw_torch_fused
+lr_scheduler: cosine
+learning_rate: 0.0001
+load_in_8bit: false
+load_in_4bit: true
+adapter: lora
+lora_model_dir: null
+lora_r: 4
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules:
+- q_proj
+- v_proj
+- k_proj
+datasets:
+- path: /workspace/FinLoRA/data/train/financebench_train.jsonl
+  type:
+    system_prompt: ''
+    field_system: system
+    field_instruction: context
+    field_output: target
+    format: '[INST] {instruction} [/INST]'
+    no_input_format: '[INST] {instruction} [/INST]'
+dataset_prepared_path: null
+val_set_size: 0.02
+output_dir: /workspace/FinLoRA/lora/axolotl-output/financebench_llama_3_1_8b_4bits_r4
+peft_use_dora: false
+sequence_len: 4096
+sample_packing: false
+pad_to_sequence_len: false
+wandb_project: finlora_models
+wandb_entity: null
+wandb_watch: gradients
+wandb_name: financebench_llama_3_1_8b_4bits_r4
+wandb_log_model: 'false'
+bf16: auto
+tf32: false
+gradient_checkpointing: true
+resume_from_checkpoint: null
+logging_steps: 500
+flash_attention: false
+deepspeed: deepspeed_configs/zero1.json
+warmup_steps: 10
+evals_per_epoch: 4
+saves_per_epoch: 1
+weight_decay: 0.0
+special_tokens:
+  pad_token: <|end_of_text|>
+chat_template: llama3
+```
+</details><br>
+# workspace/FinLoRA/lora/axolotl-output/financebench_llama_3_1_8b_4bits_r4
+This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the /workspace/FinLoRA/data/train/financebench_train.jsonl dataset.
+It achieves the following results on the evaluation set:
+- Loss: 3.3003
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 5
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 10
+- total_eval_batch_size: 5
+- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 10
+- num_epochs: 4.0
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| No log        | 0.1176 | 1    | 4.9794          |
+| No log        | 0.2353 | 2    | 4.9922          |
+| No log        | 0.4706 | 4    | 4.9603          |
+| No log        | 0.7059 | 6    | 4.8793          |
+| No log        | 0.9412 | 8    | 4.6411          |
+| No log        | 1.1176 | 10   | 4.4789          |
+| No log        | 1.3529 | 12   | 4.1465          |
+| No log        | 1.5882 | 14   | 3.9720          |
+| No log        | 1.8235 | 16   | 3.8714          |
+| No log        | 2.0    | 18   | 3.7423          |
+| No log        | 2.2353 | 20   | 3.6258          |
+| No log        | 2.4706 | 22   | 3.5165          |
+| No log        | 2.7059 | 24   | 3.4236          |
+| No log        | 2.9412 | 26   | 3.3368          |
+| No log        | 3.1176 | 28   | 3.3172          |
+| No log        | 3.3529 | 30   | 3.2741          |
+| No log        | 3.5882 | 32   | 3.3003          |
+### Framework versions
+- PEFT 0.15.2
+- Transformers 4.51.3
+- Pytorch 2.8.0.dev20250319+cu128
+- Datasets 3.5.1
+- Tokenizers 0.21.1

finlora_hf_submission/models_4bit/financebench_llama_3_1_8b_4bits_r4/adapter_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 4,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

finlora_hf_submission/models_4bit/financebench_llama_3_1_8b_4bits_r4/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4313c4d38d04e2f0a3324938a91d1680a0d1d39fe4c4eafd8ec90acaf5953ba
+size 4744016

finlora_hf_submission/models_4bit/finer_llama_3_1_8b_4bits_r4/README.md ADDED Viewed

	@@ -0,0 +1,198 @@

+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.15.0

finlora_hf_submission/models_4bit/finer_llama_3_1_8b_4bits_r4/adapter_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 4,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "k_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

finlora_hf_submission/models_4bit/finer_llama_3_1_8b_4bits_r4/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f9e208ddfc1866721c6193f3b2aab45cee7511097c7b8abc45be3d8065d9ee3
+size 4743824

finlora_hf_submission/models_4bit/formula_llama_3_1_8b_4bits_r4/README.md ADDED Viewed

	@@ -0,0 +1,124 @@

+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
+<details><summary>See axolotl config</summary>
+axolotl version: `0.9.1`
+```yaml
+base_model: meta-llama/Llama-3.1-8B-Instruct
+model_type: LlamaForCausalLM
+tokenizer_type: AutoTokenizer
+gradient_accumulation_steps: 2
+micro_batch_size: 4
+num_epochs: 1
+optimizer: adamw_torch_fused
+lr_scheduler: cosine
+learning_rate: 0.0001
+load_in_8bit: false
+load_in_4bit: true
+adapter: lora
+lora_model_dir: null
+lora_r: 4
+lora_alpha: 16
+lora_dropout: 0.05
+lora_target_modules:
+- q_proj
+- v_proj
+- k_proj
+datasets:
+- path: /workspace/FinLoRA/data/train/formula_train.jsonl
+  type:
+    system_prompt: ''
+    field_system: system
+    field_instruction: context
+    field_output: target
+    format: '[INST] {instruction} [/INST]'
+    no_input_format: '[INST] {instruction} [/INST]'
+dataset_prepared_path: null
+val_set_size: 0.02
+output_dir: /workspace/FinLoRA/lora/axolotl-output/formula_llama_3_1_8b_4bits_r4
+peft_use_dora: false
+sequence_len: 4096
+sample_packing: false
+pad_to_sequence_len: false
+wandb_project: finlora_models
+wandb_entity: null
+wandb_watch: gradients
+wandb_name: formula_llama_3_1_8b_4bits_r4
+wandb_log_model: 'false'
+bf16: auto
+tf32: false
+gradient_checkpointing: true
+resume_from_checkpoint: null
+logging_steps: 500
+flash_attention: false
+deepspeed: deepspeed_configs/zero1.json
+warmup_steps: 10
+evals_per_epoch: 4
+saves_per_epoch: 1
+weight_decay: 0.0
+special_tokens:
+  pad_token: <|end_of_text|>
+chat_template: llama3
+```
+</details><br>
+# workspace/FinLoRA/lora/axolotl-output/formula_llama_3_1_8b_4bits_r4
+This model is a fine-tuned version of [meta-llama/Llama-3.1-8B-Instruct](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct) on the /workspace/FinLoRA/data/train/formula_train.jsonl dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.6143
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 5
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 40
+- total_eval_batch_size: 20
+- optimizer: Use OptimizerNames.ADAMW_TORCH_FUSED with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 10
+- num_epochs: 1.0
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| No log        | 0.05  | 1    | 3.8659          |
+| No log        | 0.25  | 5    | 3.6317          |
+| No log        | 0.5   | 10   | 2.6735          |
+| No log        | 0.75  | 15   | 1.7570          |
+| No log        | 1.0   | 20   | 1.6143          |
+### Framework versions
+- PEFT 0.15.2
+- Transformers 4.51.3
+- Pytorch 2.8.0.dev20250319+cu128
+- Datasets 3.5.1
+- Tokenizers 0.21.1

finlora_hf_submission/models_4bit/formula_llama_3_1_8b_4bits_r4/adapter_config.json ADDED Viewed

	@@ -0,0 +1,35 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 4,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj",
+    "k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

finlora_hf_submission/models_4bit/formula_llama_3_1_8b_4bits_r4/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c1ffb0c00e606c0df93d8b8e5369289e1503bafbe46deefd4097d0b4d80046fb
+size 4744016

finlora_hf_submission/models_4bit/headline_llama_3_1_8b_4bits_r4/README.md ADDED Viewed

	@@ -0,0 +1,198 @@

+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the fine-tuning data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the fine-tuning procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.13.2

finlora_hf_submission/models_4bit/headline_llama_3_1_8b_4bits_r4/adapter_config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 4,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}