Llama_for_Finance / use_model.py
TimberGu's picture
Upload use_model.py with huggingface_hub
2777377 verified
# Financial LLaMA Model Usage Script
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
def load_model(model_path="final_model_continue"):
"""Load the fine-tuned model"""
print("🔧 Loading model...")
# 4bit quantization configuration
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
# Load base model
base_model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Meta-Llama-3.1-8B-Instruct",
quantization_config=bnb_config,
device_map="auto",
torch_dtype=torch.bfloat16,
)
# Load LoRA adapter
model = PeftModel.from_pretrained(base_model, model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
print("✅ Model loading completed!")
return model, tokenizer
def generate_response(model, tokenizer, prompt, max_length=200):
"""Generate financial advice response"""
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=max_length,
do_sample=True,
temperature=0.7,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response[len(prompt):]
# Usage example
if __name__ == "__main__":
# Load model
model, tokenizer = load_model()
# Test prompt
prompt = """### Instruction:
Please provide investment advice for investors regarding technology stocks.
### Input:
A technology company's revenue grew 20% this quarter, but profit margin decreased by 5%, mainly due to increased R&D investment. The company has major breakthroughs in AI.
### Response:"""
# Generate advice
advice = generate_response(model, tokenizer, prompt)
print("🤖 AI Investment Advice:")
print(advice)