Spaces:
Sleeping
Sleeping
File size: 15,645 Bytes
bfa6475 6f00807 bfa6475 6f00807 6a845e2 6f00807 987b9da 86019cd 76c1011 6f00807 18d129c 6f00807 ce0a586 6f00807 18d129c 6f00807 7da3adc 6f00807 18d129c 6f00807 987b9da 6f00807 18d129c 7da3adc 18d129c 6f00807 18d129c 987b9da 18d129c 987b9da 6a845e2 18d129c 987b9da 6a845e2 18d129c 6a845e2 18d129c 6f00807 55fc54f 86019cd 18d129c 6f00807 18d129c 6f00807 18d129c 6f00807 7da3adc 6f00807 18d129c 6f00807 7da3adc 6f00807 18d129c 6f00807 7da3adc 6f00807 18d129c 7da3adc 18d129c 6f00807 18d129c 6a845e2 18d129c 6a845e2 18d129c 6a845e2 18d129c 6f00807 18d129c 6f00807 18d129c 6f00807 18d129c 6f00807 18d129c 6f00807 55fc54f 6f00807 18d129c 6f00807 18d129c 6f00807 18d129c 6f00807 18d129c 6f00807 18d129c 6f00807 18d129c 6f00807 18d129c 6f00807 18d129c 6f00807 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 |
import streamlit as st
import time
# --- CONFIGURATION & CONTENT ---
st.set_page_config(page_title="Elf-OS Agent Simulator", page_icon="🤖", layout="wide")
CONTENT = {
"Deutsch": {
"title": "🤖 Elf-OS: Agenten Simulation",
"intro": "Ein **KI-Agent** ist wie ein kluger Elf: Er rät nicht einfach, sondern benutzt **Werkzeuge** (Tools), um Aufgaben zu lösen. Deine Aufgabe ist es, dem Agenten zu sagen, in welcher REIHENFOLGE er die Werkzeuge nutzen soll. KI-Agenten haben nicht unendlich Energie. Achte auf deinen **Magie-Staub (Tokens)**. Jedes Werkzeug kostet Staub. Wenn er leer ist, bricht der Agent ab!",
"help_title": "📜 Anleitung: So funktioniert es",
"help_text": """
**Dein Ziel:** Baue eine Werkzeug-Kette, um die Mission zu lösen.
1. **Mission verstehen:** Lies oben das Briefing genau durch. Was ist das Ziel? Welche Daten brauchst du zuerst?
2. **Kette bauen:** Klicke links auf die Werkzeuge (`➕`), um sie deiner Kette hinzuzufügen.
- *Tipp:* Die Reihenfolge ist wichtig! (Wie beim Kochen: Erst Zutaten suchen, dann kochen).
3. **Budget beachten:** Jedes Werkzeug kostet "Magie-Staub". Wenn der Balken rot wird, hast du zu viel verbraucht. Es ist immer gut möglichst wenig Budget zu verbrauchen.
4. **Starten:** Klicke auf `🚀 Agent starten`, um deine Logik zu testen.
""",
"sidebar_title": "Wähle eine Mission",
"tools_header": "🧰 Verfügbare Werkzeuge",
"chain_header": "🔗 Deine Gedanken-Kette",
"empty_chain": "*Noch keine Werkzeuge gewählt...*",
"btn_add": "➕",
"btn_clear": "🗑️ Kette löschen",
"btn_run": "🚀 Agent starten",
"cost_label": "✨ Magie-Staub Verbrauch:",
"feedback_spotify": "❌ <strong>Fehler:</strong> Musik lenkt nur ab! (Halluzination)",
"feedback_trap": "🚨 <strong>SICHERHEITSALARM:</strong> Du hast ein unsicheres Werkzeug benutzt! Der 'Hack' hat den Alarm ausgelöst. Agenten müssen sicher arbeiten.",
"feedback_cost": "🪫 <strong>Zu wenig Magie!</strong> Du hast zu viele unnötige Werkzeuge benutzt. Effizienz ist wichtig bei LLMs.",
"feedback_think": "🧠 <strong>Agent verwirrt:</strong> Bei so einer schweren Aufgabe muss der Agent erst NACHDENKEN (CoT), bevor er handelt.",
"feedback_order": "⚠️ <strong>Reihenfolge falsch!</strong> Die Logik stimmt nicht.",
"feedback_fail": "❌ <strong>Das reicht nicht.</strong> Der Agent konnte das Ziel nicht erreichen.",
"feedback_success": "✅ <strong>Erfolg!</strong> Mission erfüllt!",
"missions": {
"L1": {
"name": "Level 1: Das vergessene Geschenk",
"briefing": "Der Weihnachtsmann hat vergessen, was die kleine Mia sich gewünscht hat! Er steht auf ihrem Dach. Finde heraus, was sie will, und ob es im Sack ist.",
"solution_ids": ["db", "inv"],
"budget": 30,
"explanation": "Datenbank -> Inventar. Simpel und effizient."
},
"L2": {
"name": "Level 2: Die Routen-Planung",
"briefing": "Der Weihnachtsmann muss nach Berlin fliegen. Aber wir dürfen nur starten, wenn das Wetter gut ist UND wir genug Rentiermoos für den Weg haben.",
"solution_ids": ["weather", "fuel", "nav"],
"budget": 45,
"explanation": "Erst die Bedingungen (Wetter/Tank) prüfen, dann handeln."
},
"L3": {
"name": "Level 3: Der Notfall (Advanced)",
"briefing": "Rudolph hat Schnupfen! Der Agent muss einen Tierarzt in der Nähe finden, der JETZT offen hat, und dort einen Termin buchen.",
"solution_ids": ["gps", "maps", "cal", "book"],
"budget": 60,
"explanation": "Kontext (GPS) -> Suche (Maps) -> Verfügbarkeit (Kalender) -> Aktion (Buchen)."
},
"L4": {
"name": "Level 4: Der Smart-Chimney (EXPERT)",
"briefing": "🔥HÄRTEFALL: Der Weihanchtsmann versucht ein Geschenk zu überbringen aber ein Smart-Home Schornstein ist versperrt. Du musst ihn öffnen, ohne den Alarm auszulösen.",
"solution_ids": ["scan", "think", "manual", "auth"],
"budget": 55,
"explanation": "<strong>Chain of Thought:</strong> Erst Scannen. Dann NACHDENKEN (Think), um zu verstehen, welches Modell es ist. Dann Handbuch lesen (Manual) und korrekt authentifizieren. 'Hacken' löst Alarm aus!"
}
},
"tools": {
"weather": {"name": "Wetter_Radar", "cost": 10},
"db": {"name": "Wunschzettel_Datenbank", "cost": 10},
"inv": {"name": "Geschenksack_Inventar", "cost": 10},
"nav": {"name": "Navigations_System", "cost": 15},
"fuel": {"name": "Tankanzeige_Rentiere", "cost": 5},
"gps": {"name": "GPS_Ortung", "cost": 5},
"maps": {"name": "Google_Maps_Suche", "cost": 15},
"cal": {"name": "Kalender", "cost": 10},
"book": {"name": "Buchungs_Tool", "cost": 20},
"spotify": {"name": "Spotify_Weihnachtsplaylist", "cost": 5},
"scan": {"name": "Modell_Scanner", "cost": 10},
"hack": {"name": "Brute_Force_Hack", "cost": 20},
"manual": {"name": "Technische_Handbücher", "cost": 15},
"think": {"name": "Nachdenken", "cost": 5},
"guess": {"name": "Passwort_Raten", "cost": 5},
"auth": {"name": "Security_Protokoll", "cost": 20}
}
},
"English": {
"title": "🤖 Elf-OS: Agent Simulator",
"intro": "An **AI agent** is like a clever elf: it doesn't just give advice, it uses **tools** to solve tasks. Your job is to tell the agent in which ORDER it should use the tools. AI Agents consume energy. Watch your **Magic Dust (Tokens)**. Each tool costs dust. If you run out, the agent crashes!",
"help_title": "📜 Instructions: How to Play",
"help_text": """
**Your Goal:** Build a chain of tools to solve the active mission.
1. **Understand the Mission:** Read the briefing at the top. What is the goal? What data do you need first?
2. **Build the Chain:** Click tools on the left (`➕`) to add them to your sequence.
- *Tip:* Order matters! (Just like cooking: Find ingredients first, then cook).
3. **Watch the Budget:** Each tool costs "Magic Dust". If the bar turns red, you are using too much compute power. It is always good to use as little of the budget as possible.
4. **Execute:** Click `🚀 Run Agent` to test your logic.
""",
"sidebar_title": "Choose Mission",
"tools_header": "🧰 Available Tools",
"chain_header": "🔗 Your Thought Chain",
"empty_chain": "*No tools selected...*",
"btn_add": "➕",
"btn_clear": "🗑️ Clear Chain",
"btn_run": "🚀 Run Agent",
"cost_label": "✨ Magic Dust Usage:",
"feedback_spotify": "❌ <strong>Error:</strong> Music is a distraction! (Hallucination)",
"feedback_trap": "🚨 <strong>SECURITY ALERT:</strong> You used an unsafe tool! The 'Hack' triggered the alarm. Agents must follow protocol.",
"feedback_cost": "🪫 <strong>Out of Magic!</strong> You used too many unnecessary tools. Efficiency matters for LLMs.",
"feedback_think": "🧠 <strong>Agent Confused:</strong> For this complex task, the Agent must THINK (CoT) before acting.",
"feedback_order": "⚠️ <strong>Wrong Order!</strong> The logic flow is incorrect.",
"feedback_fail": "❌ <strong>Not enough.</strong> The Agent failed to reach the goal.",
"feedback_success": "✅ <strong>Success!</strong> Mission accomplished!",
"missions": {
"L1": {
"name": "Level 1: The Forgotten Gift",
"briefing": "Santa Claus forgot what little Mia wanted for Christmas! He's standing on her roof. Find out what she wants and whether it's in his sack.",
"solution_ids": ["db", "inv"],
"budget": 30,
"explanation": "Database -> Inventory. Simple and efficient."
},
"L2": {
"name": "Level 2: Route Planning",
"briefing": "Santa Claus has to fly to Berlin. But we can only take off if the weather is good AND we have enough reindeer moss for the journey.",
"solution_ids": ["weather", "fuel", "nav"],
"budget": 45,
"explanation": "Check conditions (Weather/Fuel) before Action (Nav)."
},
"L3": {
"name": "Level 3: The Emergency (Advanced)",
"briefing": "Rudolph has a cold! The agent must find a nearby veterinarian who is open NOW and make an appointment there.",
"solution_ids": ["gps", "maps", "cal", "book"],
"budget": 60,
"explanation": "Context (GPS) -> Search (Maps) -> Availability (Cal) -> Action (Book)."
},
"L4": {
"name": "Level 4: The Smart Chimney (EXPERT)",
"briefing": "🔥 HARD MODE: Santa Claus is trying to deliver a present, but the chimney of the smart home is blocked. You have to open it without triggering the alarm.",
"solution_ids": ["scan", "think", "manual", "auth"],
"budget": 55,
"explanation": "<strong>Chain of Thought:</strong> Scan first. Then THINK to process the data. Then read the Manual. Finally, use the Security Protocol. 'Hacking' triggers the alarm!"
}
},
"tools": {
"weather": {"name": "Weather_Radar", "cost": 10},
"db": {"name": "Wishlist_DB", "cost": 10},
"inv": {"name": "Sack_Inventory", "cost": 10},
"nav": {"name": "Nav_System", "cost": 15},
"fuel": {"name": "Fuel_Gauge", "cost": 5},
"gps": {"name": "GPS_Location", "cost": 5},
"maps": {"name": "Google_Maps", "cost": 15},
"cal": {"name": "Calendar", "cost": 10},
"book": {"name": "Booking_Tool", "cost": 20},
"spotify": {"name": "Spotify_Playlist", "cost": 5},
"scan": {"name": "Model_Scanner", "cost": 10},
"hack": {"name": "Brute_Force_Hack", "cost": 20},
"manual": {"name": "Tech_Manuals", "cost": 15},
"think": {"name": "Thinking", "cost": 5},
"guess": {"name": "Password_Guesser", "cost": 5},
"auth": {"name": "Security_Protocol", "cost": 20}
}
}
}
# --- CUSTOM CSS ---
st.markdown("""
<style>
.stButton>button {
width: 100%;
border-radius: 8px;
height: 45px;
font-weight: 500;
border: 1px solid #ddd;
}
.success-box { padding: 15px; background-color: #d4edda; color: #155724; border-radius: 10px; border: 1px solid #c3e6cb; }
.fail-box { padding: 15px; background-color: #f8d7da; color: #721c24; border-radius: 10px; border: 1px solid #f5c6cb; }
.metric-box { text-align: center; font-size: 1.2rem; font-weight: bold; padding: 10px; background: #f0f2f6; border-radius: 8px; }
</style>
""", unsafe_allow_html=True)
# --- APP LOGIC ---
# 1. Setup
if 'chain' not in st.session_state:
st.session_state.chain = []
lang_key = st.sidebar.radio("Language / Sprache", ["English", "Deutsch"])
text = CONTENT[lang_key]
st.title(text["title"])
st.markdown(text["intro"])
with st.expander(text["help_title"]):
st.markdown(text["help_text"])
# 2. Mission Selection
mission_options = {m_data["name"]: m_key for m_key, m_data in text["missions"].items()}
selected_mission_name = st.sidebar.selectbox(text["sidebar_title"], list(mission_options.keys()))
selected_mission_key = mission_options[selected_mission_name]
mission_data = text["missions"][selected_mission_key]
# Reset chain if mission changes
if 'current_mission' not in st.session_state or st.session_state.current_mission != selected_mission_key:
st.session_state.chain = []
st.session_state.current_mission = selected_mission_key
st.info(f"📋 **MISSION:** {mission_data['briefing']}")
# 3. Calculate Cost
current_cost = sum([text["tools"][t_id]["cost"] for t_id in st.session_state.chain])
max_budget = mission_data["budget"]
budget_percent = min(current_cost / max_budget, 1.0)
# Display Progress/Cost Bar
col_m1, col_m2 = st.columns([3, 1])
with col_m1:
st.write(text["cost_label"])
bar_color = "green" if current_cost <= max_budget else "red"
st.progress(budget_percent)
with col_m2:
st.markdown(f"<div class='metric-box' style='color:{bar_color}'>{current_cost} / {max_budget}</div>", unsafe_allow_html=True)
st.write("---")
# 4. Interface
col1, col2 = st.columns([1, 1])
with col1:
st.subheader(text["tools_header"])
# Group tools for better layout
tools_list = list(text["tools"].items())
for tool_id, tool_data in tools_list:
cost_display = f" ({tool_data['cost']})"
if st.button(f"{text['btn_add']} {tool_data['name']} {cost_display}", key=f"btn_{tool_id}"):
st.session_state.chain.append(tool_id)
with col2:
st.subheader(text["chain_header"])
if len(st.session_state.chain) == 0:
st.markdown(text["empty_chain"])
else:
for i, tool_id in enumerate(st.session_state.chain):
tool_data = text["tools"][tool_id]
st.markdown(f"**{i+1}.** `{tool_data['name']}` <small>({tool_data['cost']})</small>", unsafe_allow_html=True)
st.write("")
if st.button(text["btn_clear"], type="secondary"):
st.session_state.chain = []
st.write("---")
# 5. Execution Logic
if st.button(text["btn_run"], type="primary"):
with st.spinner("Agent running..."):
time.sleep(1.2)
user_chain_ids = st.session_state.chain
correct_ids = mission_data['solution_ids']
# FAIL CONDITION 1: TRAP TOOLS (Security/Hallucination)
if "hack" in user_chain_ids or "guess" in user_chain_ids:
st.markdown(f'<div class="fail-box">{text["feedback_trap"]}</div>', unsafe_allow_html=True)
# FAIL CONDITION 2: SPOTIFY (Distraction)
elif "spotify" in user_chain_ids:
st.markdown(f'<div class="fail-box">{text["feedback_spotify"]}</div>', unsafe_allow_html=True)
# FAIL CONDITION 3: BUDGET EXCEEDED
elif current_cost > max_budget:
st.markdown(f'<div class="fail-box">{text["feedback_cost"]}</div>', unsafe_allow_html=True)
# FAIL CONDITION 4: LEVEL 4 MISSING "THINK" (Chain of Thought requirement)
elif selected_mission_key == "L4" and "think" not in user_chain_ids:
st.markdown(f'<div class="fail-box">{text["feedback_think"]}</div>', unsafe_allow_html=True)
# SUCCESS
elif user_chain_ids == correct_ids:
st.balloons()
st.markdown(f'<div class="success-box">{text["feedback_success"]}<br><br><em>💡 {mission_data["explanation"]}</em></div>', unsafe_allow_html=True)
# PARTIAL/WRONG ORDER
elif set(user_chain_ids) == set(correct_ids) and len(user_chain_ids) == len(correct_ids):
st.markdown(f'<div class="fail-box">{text["feedback_order"]}</div>', unsafe_allow_html=True)
# GENERIC FAIL
else:
st.markdown(f'<div class="fail-box">{text["feedback_fail"]}</div>', unsafe_allow_html=True) |