File size: 15,645 Bytes
bfa6475
6f00807
bfa6475
6f00807
6a845e2
6f00807
 
 
 
987b9da
86019cd
 
 
 
 
 
 
 
 
76c1011
6f00807
 
 
 
 
 
 
18d129c
 
 
 
 
 
 
 
6f00807
 
 
ce0a586
6f00807
18d129c
 
6f00807
 
 
7da3adc
6f00807
18d129c
 
6f00807
 
 
987b9da
6f00807
18d129c
 
 
 
 
7da3adc
18d129c
 
 
6f00807
 
 
18d129c
987b9da
 
 
 
18d129c
987b9da
6a845e2
18d129c
987b9da
6a845e2
18d129c
 
6a845e2
18d129c
 
6f00807
 
 
 
55fc54f
86019cd
 
 
 
 
 
 
 
 
 
18d129c
6f00807
 
18d129c
6f00807
 
 
18d129c
 
 
 
 
 
 
 
6f00807
 
 
7da3adc
6f00807
18d129c
 
6f00807
 
 
7da3adc
6f00807
18d129c
 
6f00807
 
 
7da3adc
6f00807
18d129c
 
 
 
 
7da3adc
18d129c
 
 
6f00807
 
 
18d129c
 
 
 
 
 
 
6a845e2
18d129c
 
6a845e2
18d129c
 
6a845e2
18d129c
 
6f00807
 
 
 
 
 
 
 
 
18d129c
 
6f00807
18d129c
6f00807
18d129c
 
 
6f00807
 
 
 
18d129c
 
 
 
 
 
6f00807
 
 
 
 
55fc54f
 
 
6f00807
 
 
 
 
 
 
 
 
 
 
 
 
18d129c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6f00807
 
 
 
18d129c
 
 
 
 
 
 
6f00807
 
 
 
 
 
 
 
18d129c
 
 
 
 
 
6f00807
 
 
 
 
18d129c
 
6f00807
 
 
 
18d129c
 
 
6f00807
18d129c
 
 
 
 
 
 
 
 
 
 
 
 
6f00807
 
 
18d129c
 
6f00807
 
 
18d129c
6f00807
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
import streamlit as st
import time

# --- CONFIGURATION & CONTENT ---
st.set_page_config(page_title="Elf-OS Agent Simulator", page_icon="🤖", layout="wide")

CONTENT = {
    "Deutsch": {
        "title": "🤖 Elf-OS: Agenten Simulation",
        "intro": "Ein **KI-Agent** ist wie ein kluger Elf: Er rät nicht einfach, sondern benutzt **Werkzeuge** (Tools), um Aufgaben zu lösen. Deine Aufgabe ist es, dem Agenten zu sagen, in welcher REIHENFOLGE er die Werkzeuge nutzen soll. KI-Agenten haben nicht unendlich Energie. Achte auf deinen **Magie-Staub (Tokens)**. Jedes Werkzeug kostet Staub. Wenn er leer ist, bricht der Agent ab!",
        "help_title": "📜 Anleitung: So funktioniert es",
        "help_text": """
        **Dein Ziel:** Baue eine Werkzeug-Kette, um die Mission zu lösen.
        
        1. **Mission verstehen:** Lies oben das Briefing genau durch. Was ist das Ziel? Welche Daten brauchst du zuerst?
        2. **Kette bauen:** Klicke links auf die Werkzeuge (`➕`), um sie deiner Kette hinzuzufügen.
           - *Tipp:* Die Reihenfolge ist wichtig! (Wie beim Kochen: Erst Zutaten suchen, dann kochen).
        3. **Budget beachten:** Jedes Werkzeug kostet "Magie-Staub". Wenn der Balken rot wird, hast du zu viel verbraucht. Es ist immer gut möglichst wenig Budget zu verbrauchen.
        4. **Starten:** Klicke auf `🚀 Agent starten`, um deine Logik zu testen.
        """,
        "sidebar_title": "Wähle eine Mission",
        "tools_header": "🧰 Verfügbare Werkzeuge",
        "chain_header": "🔗 Deine Gedanken-Kette",
        "empty_chain": "*Noch keine Werkzeuge gewählt...*",
        "btn_add": "➕",
        "btn_clear": "🗑️ Kette löschen",
        "btn_run": "🚀 Agent starten",
        "cost_label": "✨ Magie-Staub Verbrauch:",
        "feedback_spotify": "❌ <strong>Fehler:</strong> Musik lenkt nur ab! (Halluzination)",
        "feedback_trap": "🚨 <strong>SICHERHEITSALARM:</strong> Du hast ein unsicheres Werkzeug benutzt! Der 'Hack' hat den Alarm ausgelöst. Agenten müssen sicher arbeiten.",
        "feedback_cost": "🪫 <strong>Zu wenig Magie!</strong> Du hast zu viele unnötige Werkzeuge benutzt. Effizienz ist wichtig bei LLMs.",
        "feedback_think": "🧠 <strong>Agent verwirrt:</strong> Bei so einer schweren Aufgabe muss der Agent erst NACHDENKEN (CoT), bevor er handelt.",
        "feedback_order": "⚠️ <strong>Reihenfolge falsch!</strong> Die Logik stimmt nicht.",
        "feedback_fail": "❌ <strong>Das reicht nicht.</strong> Der Agent konnte das Ziel nicht erreichen.",
        "feedback_success": "✅ <strong>Erfolg!</strong> Mission erfüllt!",
        "missions": {
            "L1": {
                "name": "Level 1: Das vergessene Geschenk",
                "briefing": "Der Weihnachtsmann hat vergessen, was die kleine Mia sich gewünscht hat! Er steht auf ihrem Dach. Finde heraus, was sie will, und ob es im Sack ist.",
                "solution_ids": ["db", "inv"],
                "budget": 30,
                "explanation": "Datenbank -> Inventar. Simpel und effizient."
            },
            "L2": {
                "name": "Level 2: Die Routen-Planung",
                "briefing": "Der Weihnachtsmann muss nach Berlin fliegen. Aber wir dürfen nur starten, wenn das Wetter gut ist UND wir genug Rentiermoos für den Weg haben.",
                "solution_ids": ["weather", "fuel", "nav"],
                "budget": 45,
                "explanation": "Erst die Bedingungen (Wetter/Tank) prüfen, dann handeln."
            },
            "L3": {
                "name": "Level 3: Der Notfall (Advanced)",
                "briefing": "Rudolph hat Schnupfen! Der Agent muss einen Tierarzt in der Nähe finden, der JETZT offen hat, und dort einen Termin buchen.",
                "solution_ids": ["gps", "maps", "cal", "book"],
                "budget": 60,
                "explanation": "Kontext (GPS) -> Suche (Maps) -> Verfügbarkeit (Kalender) -> Aktion (Buchen)."
            },
            "L4": {
                "name": "Level 4: Der Smart-Chimney (EXPERT)",
                "briefing": "🔥HÄRTEFALL: Der Weihanchtsmann versucht ein Geschenk zu überbringen aber ein Smart-Home Schornstein ist versperrt. Du musst ihn öffnen, ohne den Alarm auszulösen.",
                "solution_ids": ["scan", "think", "manual", "auth"],
                "budget": 55,
                "explanation": "<strong>Chain of Thought:</strong> Erst Scannen. Dann NACHDENKEN (Think), um zu verstehen, welches Modell es ist. Dann Handbuch lesen (Manual) und korrekt authentifizieren. 'Hacken' löst Alarm aus!"
            }
        },
        "tools": {
            "weather": {"name": "Wetter_Radar", "cost": 10},
            "db": {"name": "Wunschzettel_Datenbank", "cost": 10},
            "inv": {"name": "Geschenksack_Inventar", "cost": 10},
            "nav": {"name": "Navigations_System", "cost": 15},
            "fuel": {"name": "Tankanzeige_Rentiere", "cost": 5},
            "gps": {"name": "GPS_Ortung", "cost": 5},
            "maps": {"name": "Google_Maps_Suche", "cost": 15},
            "cal": {"name": "Kalender", "cost": 10},
            "book": {"name": "Buchungs_Tool", "cost": 20},
            "spotify": {"name": "Spotify_Weihnachtsplaylist", "cost": 5},
            "scan": {"name": "Modell_Scanner", "cost": 10},
            "hack": {"name": "Brute_Force_Hack", "cost": 20},
            "manual": {"name": "Technische_Handbücher", "cost": 15},
            "think": {"name": "Nachdenken", "cost": 5},
            "guess": {"name": "Passwort_Raten", "cost": 5},
            "auth": {"name": "Security_Protokoll", "cost": 20}
        }
    },
    "English": {
        "title": "🤖 Elf-OS: Agent Simulator",
        "intro": "An **AI agent** is like a clever elf: it doesn't just give advice, it uses **tools** to solve tasks. Your job is to tell the agent in which ORDER it should use the tools. AI Agents consume energy. Watch your **Magic Dust (Tokens)**. Each tool costs dust. If you run out, the agent crashes!",
        "help_title": "📜 Instructions: How to Play",
        "help_text": """
        **Your Goal:** Build a chain of tools to solve the active mission.
        
        1. **Understand the Mission:** Read the briefing at the top. What is the goal? What data do you need first?
        2. **Build the Chain:** Click tools on the left (`➕`) to add them to your sequence.
           - *Tip:* Order matters! (Just like cooking: Find ingredients first, then cook).
        3. **Watch the Budget:** Each tool costs "Magic Dust". If the bar turns red, you are using too much compute power. It is always good to use as little of the budget as possible.
        4. **Execute:** Click `🚀 Run Agent` to test your logic.
        """,
        "sidebar_title": "Choose Mission",
        "tools_header": "🧰 Available Tools",
        "chain_header": "🔗 Your Thought Chain",
        "empty_chain": "*No tools selected...*",
        "btn_add": "➕",
        "btn_clear": "🗑️ Clear Chain",
        "btn_run": "🚀 Run Agent",
        "cost_label": "✨ Magic Dust Usage:",
        "feedback_spotify": "❌ <strong>Error:</strong> Music is a distraction! (Hallucination)",
        "feedback_trap": "🚨 <strong>SECURITY ALERT:</strong> You used an unsafe tool! The 'Hack' triggered the alarm. Agents must follow protocol.",
        "feedback_cost": "🪫 <strong>Out of Magic!</strong> You used too many unnecessary tools. Efficiency matters for LLMs.",
        "feedback_think": "🧠 <strong>Agent Confused:</strong> For this complex task, the Agent must THINK (CoT) before acting.",
        "feedback_order": "⚠️ <strong>Wrong Order!</strong> The logic flow is incorrect.",
        "feedback_fail": "❌ <strong>Not enough.</strong> The Agent failed to reach the goal.",
        "feedback_success": "✅ <strong>Success!</strong> Mission accomplished!",
        "missions": {
            "L1": {
                "name": "Level 1: The Forgotten Gift",
                "briefing": "Santa Claus forgot what little Mia wanted for Christmas! He's standing on her roof. Find out what she wants and whether it's in his sack.",
                "solution_ids": ["db", "inv"],
                "budget": 30,
                "explanation": "Database -> Inventory. Simple and efficient."
            },
            "L2": {
                "name": "Level 2: Route Planning",
                "briefing": "Santa Claus has to fly to Berlin. But we can only take off if the weather is good AND we have enough reindeer moss for the journey.",
                "solution_ids": ["weather", "fuel", "nav"],
                "budget": 45,
                "explanation": "Check conditions (Weather/Fuel) before Action (Nav)."
            },
            "L3": {
                "name": "Level 3: The Emergency (Advanced)",
                "briefing": "Rudolph has a cold! The agent must find a nearby veterinarian who is open NOW and make an appointment there.",
                "solution_ids": ["gps", "maps", "cal", "book"],
                "budget": 60,
                "explanation": "Context (GPS) -> Search (Maps) -> Availability (Cal) -> Action (Book)."
            },
            "L4": {
                "name": "Level 4: The Smart Chimney (EXPERT)",
                "briefing": "🔥 HARD MODE: Santa Claus is trying to deliver a present, but the chimney of the smart home is blocked. You have to open it without triggering the alarm.",
                "solution_ids": ["scan", "think", "manual", "auth"],
                "budget": 55,
                "explanation": "<strong>Chain of Thought:</strong> Scan first. Then THINK to process the data. Then read the Manual. Finally, use the Security Protocol. 'Hacking' triggers the alarm!"
            }
        },
        "tools": {
            "weather": {"name": "Weather_Radar", "cost": 10},
            "db": {"name": "Wishlist_DB", "cost": 10},
            "inv": {"name": "Sack_Inventory", "cost": 10},
            "nav": {"name": "Nav_System", "cost": 15},
            "fuel": {"name": "Fuel_Gauge", "cost": 5},
            "gps": {"name": "GPS_Location", "cost": 5},
            "maps": {"name": "Google_Maps", "cost": 15},
            "cal": {"name": "Calendar", "cost": 10},
            "book": {"name": "Booking_Tool", "cost": 20},
            "spotify": {"name": "Spotify_Playlist", "cost": 5},
            "scan": {"name": "Model_Scanner", "cost": 10},
            "hack": {"name": "Brute_Force_Hack", "cost": 20},
            "manual": {"name": "Tech_Manuals", "cost": 15},
            "think": {"name": "Thinking", "cost": 5},
            "guess": {"name": "Password_Guesser", "cost": 5},
            "auth": {"name": "Security_Protocol", "cost": 20}
        }
    }
}

# --- CUSTOM CSS ---
st.markdown("""
    <style>
    .stButton>button {
        width: 100%;
        border-radius: 8px;
        height: 45px;
        font-weight: 500;
        border: 1px solid #ddd;
    }
    .success-box { padding: 15px; background-color: #d4edda; color: #155724; border-radius: 10px; border: 1px solid #c3e6cb; }
    .fail-box { padding: 15px; background-color: #f8d7da; color: #721c24; border-radius: 10px; border: 1px solid #f5c6cb; }
    .metric-box { text-align: center; font-size: 1.2rem; font-weight: bold; padding: 10px; background: #f0f2f6; border-radius: 8px; }
    </style>
""", unsafe_allow_html=True)

# --- APP LOGIC ---

# 1. Setup
if 'chain' not in st.session_state:
    st.session_state.chain = []

lang_key = st.sidebar.radio("Language / Sprache", ["English", "Deutsch"])
text = CONTENT[lang_key]

st.title(text["title"])
st.markdown(text["intro"])

with st.expander(text["help_title"]):
    st.markdown(text["help_text"])

# 2. Mission Selection
mission_options = {m_data["name"]: m_key for m_key, m_data in text["missions"].items()}
selected_mission_name = st.sidebar.selectbox(text["sidebar_title"], list(mission_options.keys()))
selected_mission_key = mission_options[selected_mission_name]
mission_data = text["missions"][selected_mission_key]

# Reset chain if mission changes
if 'current_mission' not in st.session_state or st.session_state.current_mission != selected_mission_key:
    st.session_state.chain = []
    st.session_state.current_mission = selected_mission_key

st.info(f"📋 **MISSION:** {mission_data['briefing']}")

# 3. Calculate Cost
current_cost = sum([text["tools"][t_id]["cost"] for t_id in st.session_state.chain])
max_budget = mission_data["budget"]
budget_percent = min(current_cost / max_budget, 1.0)

# Display Progress/Cost Bar
col_m1, col_m2 = st.columns([3, 1])
with col_m1:
    st.write(text["cost_label"])
    bar_color = "green" if current_cost <= max_budget else "red"
    st.progress(budget_percent)
with col_m2:
    st.markdown(f"<div class='metric-box' style='color:{bar_color}'>{current_cost} / {max_budget}</div>", unsafe_allow_html=True)

st.write("---")

# 4. Interface
col1, col2 = st.columns([1, 1])

with col1:
    st.subheader(text["tools_header"])
    
    # Group tools for better layout
    tools_list = list(text["tools"].items())
    
    for tool_id, tool_data in tools_list:
        cost_display = f" ({tool_data['cost']})"
        if st.button(f"{text['btn_add']} {tool_data['name']} {cost_display}", key=f"btn_{tool_id}"):
            st.session_state.chain.append(tool_id)

with col2:
    st.subheader(text["chain_header"])
    if len(st.session_state.chain) == 0:
        st.markdown(text["empty_chain"])
    else:
        for i, tool_id in enumerate(st.session_state.chain):
            tool_data = text["tools"][tool_id]
            st.markdown(f"**{i+1}.** `{tool_data['name']}` <small>({tool_data['cost']})</small>", unsafe_allow_html=True)
    
    st.write("")
    if st.button(text["btn_clear"], type="secondary"):
        st.session_state.chain = []

st.write("---")

# 5. Execution Logic
if st.button(text["btn_run"], type="primary"):
    with st.spinner("Agent running..."):
        time.sleep(1.2)
        
        user_chain_ids = st.session_state.chain
        correct_ids = mission_data['solution_ids']
        
        # FAIL CONDITION 1: TRAP TOOLS (Security/Hallucination)
        if "hack" in user_chain_ids or "guess" in user_chain_ids:
             st.markdown(f'<div class="fail-box">{text["feedback_trap"]}</div>', unsafe_allow_html=True)
        
        # FAIL CONDITION 2: SPOTIFY (Distraction)
        elif "spotify" in user_chain_ids:
            st.markdown(f'<div class="fail-box">{text["feedback_spotify"]}</div>', unsafe_allow_html=True)

        # FAIL CONDITION 3: BUDGET EXCEEDED
        elif current_cost > max_budget:
             st.markdown(f'<div class="fail-box">{text["feedback_cost"]}</div>', unsafe_allow_html=True)

        # FAIL CONDITION 4: LEVEL 4 MISSING "THINK" (Chain of Thought requirement)
        elif selected_mission_key == "L4" and "think" not in user_chain_ids:
             st.markdown(f'<div class="fail-box">{text["feedback_think"]}</div>', unsafe_allow_html=True)

        # SUCCESS
        elif user_chain_ids == correct_ids:
            st.balloons()
            st.markdown(f'<div class="success-box">{text["feedback_success"]}<br><br><em>💡 {mission_data["explanation"]}</em></div>', unsafe_allow_html=True)
        
        # PARTIAL/WRONG ORDER
        elif set(user_chain_ids) == set(correct_ids) and len(user_chain_ids) == len(correct_ids):
             st.markdown(f'<div class="fail-box">{text["feedback_order"]}</div>', unsafe_allow_html=True)
        
        # GENERIC FAIL
        else:
             st.markdown(f'<div class="fail-box">{text["feedback_fail"]}</div>', unsafe_allow_html=True)