File size: 2,789 Bytes
2c40ce7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
"""
Apollo-Astralis V1 4B - Example Usage

This script demonstrates how to use Apollo-Astralis V1 4B with Transformers.
"""

from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

def load_model(model_name="VANTA-Research/apollo-astralis-v1-4b"):
    """Load Apollo-Astralis model and tokenizer."""
    print(f"Loading {model_name}...")
    
    tokenizer = AutoTokenizer.from_pretrained(
        model_name,
        trust_remote_code=True
    )
    
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.bfloat16,
        device_map="auto",
        trust_remote_code=True
    )
    
    print("Model loaded successfully!")
    return model, tokenizer

def generate_response(model, tokenizer, user_message, system_prompt=None):
    """Generate a response from Apollo."""
    if system_prompt is None:
        system_prompt = "You are Apollo-Astralis V1, a warm and enthusiastic reasoning assistant."
    
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_message}
    ]
    
    # Apply chat template
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    
    # Tokenize
    inputs = tokenizer([text], return_tensors="pt").to(model.device)
    
    # Generate
    outputs = model.generate(
        **inputs,
        max_new_tokens=512,
        temperature=0.7,
        top_p=0.9,
        do_sample=True,
        repetition_penalty=1.05
    )
    
    # Decode
    response = tokenizer.decode(
        outputs[0][inputs['input_ids'].shape[1]:],
        skip_special_tokens=True
    )
    
    return response

def main():
    # Load model
    model, tokenizer = load_model()
    
    # Example 1: Celebration
    print("\n" + "="*60)
    print("Example 1: Celebration Response")
    print("="*60)
    user_msg = "I just got my first job as a software engineer!"
    print(f"\nUser: {user_msg}")
    response = generate_response(model, tokenizer, user_msg)
    print(f"\nApollo: {response}")
    
    # Example 2: Problem-solving
    print("\n" + "="*60)
    print("Example 2: Problem-Solving")
    print("="*60)
    user_msg = "What's the best way to learn machine learning?"
    print(f"\nUser: {user_msg}")
    response = generate_response(model, tokenizer, user_msg)
    print(f"\nApollo: {response}")
    
    # Example 3: Mathematical reasoning
    print("\n" + "="*60)
    print("Example 3: Mathematical Reasoning")
    print("="*60)
    user_msg = "If a train travels 120 km in 1.5 hours, what's its average speed?"
    print(f"\nUser: {user_msg}")
    response = generate_response(model, tokenizer, user_msg)
    print(f"\nApollo: {response}")

if __name__ == "__main__":
    main()