File size: 5,190 Bytes
fd19dfe
 
 
 
833a8a2
 
 
 
fd19dfe
b0a934d
fd19dfe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6c717f
 
 
fd19dfe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import gradio as gr
from transformers import T5ForConditionalGeneration, T5Tokenizer
import torch

torch.set_float32_matmul_precision("highest")
torch.backends.mkldnn.enabled = False
torch.set_autocast_enabled(False)

# Load model
model_name = "ereniko/LaaLM-v1"  # Replace with your actual HF username
print("Loading model...")
model = T5ForConditionalGeneration.from_pretrained(model_name)
tokenizer = T5Tokenizer.from_pretrained(model_name)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
model.eval()

def run_command(system_state, command):
    """Run a command given the system state"""
    if not command.strip():
        return "Please enter a command"
    
    # Prepare input exactly as trained
    input_text = f"{system_state}\n{command}"
    
    # Tokenize and generate
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
    
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=128)
    
    # Decode output
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return result

# Example system states
example_empty = """Current directory: /home/user
Files in current directory:
- (empty)
Environment: USER=user, HOME=/home/user"""

example_with_files = """Current directory: /home/user
Files in current directory:
- test.txt (empty)
- data.txt (content: 'hello world')
Environment: USER=user, HOME=/home/user"""

# Create Gradio interface
with gr.Blocks(title="LaaLM-v1: Linux as a Language Model") as demo:
    gr.Markdown("""
    # 🐧 LaaLM-v1: Linux as a Language Model
    
    A T5-based model trained to predict Linux terminal outputs given the current system state.
    
    ## How to use:
    1. **System State** - Describes the current directory, files, and environment
    2. **Command** - The Linux command to execute
    3. Click **Run Command** to see the predicted output
    
    ### System State Format:
```
    Current directory: /home/user
    Files in current directory:
    - filename.txt (empty)
    - another.txt (content: 'some text')
    - (empty)  ← if no files
    Environment: USER=user, HOME=/home/user
```
    
    **Note:** This model is stateless - it doesn't remember previous commands. You must update the system state manually to simulate persistence. And because there's no persistence in this space right now file commands are currently declared as uneffective until a way gets find out for LaaLM to remember filesystems.

    **Note:** The model is very prone to making errors. While GPU tests were more successful on CPU the model quality seems to be degraded. If you want to test LaaLM-v1 on a GPU you can acces you can go to it's model page "ereniko/LaaLM-v1" and download the model files there and do a quick run by the provided quick inference command in it's model page.
    """)
    
    with gr.Row():
        with gr.Column():
            system_input = gr.Textbox(
                label="System State",
                placeholder="Current directory: /home/user\nFiles in current directory:\n- (empty)\nEnvironment: USER=user, HOME=/home/user",
                lines=8,
                value=example_empty
            )
            
            command_input = gr.Textbox(
                label="Command",
                placeholder="pwd",
                lines=1
            )
            
            run_btn = gr.Button("Run Command", variant="primary")
        
        with gr.Column():
            output = gr.Textbox(
                label="Output",
                lines=10,
                interactive=False
            )
    
    # Examples
    gr.Markdown("### 📝 Try these examples:")
    
    gr.Examples(
        examples=[
            [example_empty, "pwd"],
            [example_empty, "ls"],
            [example_empty, "echo hello world"],
            [example_empty, "touch newfile.txt"],
            [example_with_files, "ls"],
            [example_with_files, "cat test.txt"],
            [example_with_files, "cat data.txt"],
            [example_with_files, "cat nonexistent.txt"],
            [example_empty, "mkdir testdir"],
            [example_empty, "unknowncommand"],
        ],
        inputs=[system_input, command_input],
        outputs=output,
        fn=run_command,
    )
    
    gr.Markdown("""
    ### Supported Commands:
    - `pwd` - Print working directory
    - `ls` - List files
    - `echo` - Print text
    - `touch` - Create empty file
    - `cat` - Read file contents
    - `mkdir` - Create directory
    - `cd` - Change directory
    
    ### Limitations:
    - Model is stateless (doesn't track state between commands)
    - Limited to basic commands (v2 will have more!)
    - File contents must be simple strings
    - No pipe operators or complex bash features yet
    
    ---
    **Model:** T5-base fine-tuned on ~100k synthetic Linux command examples  
    **Accuracy:** ~97% on test set  
    **Training:** V100 32GB, ~2 hours
    """)
    
    # Connect button
    run_btn.click(
        fn=run_command,
        inputs=[system_input, command_input],
        outputs=output
    )

# Launch
if __name__ == "__main__":
    demo.launch()