# Financial LLaMA Model Usage Script
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel

def load_model(model_path="final_model_continue"):
    """Load the fine-tuned model"""
    print("🔧 Loading model...")

    # 4bit quantization configuration
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )

    # Load base model
    base_model = AutoModelForCausalLM.from_pretrained(
        "meta-llama/Meta-Llama-3.1-8B-Instruct",
        quantization_config=bnb_config,
        device_map="auto",
        torch_dtype=torch.bfloat16,
    )

    # Load LoRA adapter
    model = PeftModel.from_pretrained(base_model, model_path)
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    print("✅ Model loading completed!")
    return model, tokenizer

def generate_response(model, tokenizer, prompt, max_length=200):
    """Generate financial advice response"""
    inputs = tokenizer(prompt, return_tensors="pt")

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_length,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.eos_token_id
        )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response[len(prompt):]

# Usage example
if __name__ == "__main__":
    # Load model
    model, tokenizer = load_model()

    # Test prompt
    prompt = """### Instruction:
Please provide investment advice for investors regarding technology stocks.

### Input:
A technology company's revenue grew 20% this quarter, but profit margin decreased by 5%, mainly due to increased R&D investment. The company has major breakthroughs in AI.

### Response:"""

    # Generate advice
    advice = generate_response(model, tokenizer, prompt)
    print("🤖 AI Investment Advice:")
    print(advice)