Spaces:

Sheller
/

north-pole-command-center

Sleeping

App Files Files Community

Sheller commited on Dec 1, 2025

Commit

18d129c

verified ·

1 Parent(s): 9b9b539

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +148 -91

src/streamlit_app.py CHANGED Viewed

@@ -2,12 +2,12 @@ import streamlit as st
 import time
 # --- CONFIGURATION & CONTENT ---
-st.set_page_config(page_title="Elf-OS Agent Simulator", page_icon="🤖")
 CONTENT = {
     "Deutsch": {
         "title": "🤖 Elf-OS: Agenten Simulation",
-        "intro": "Ein **KI-Agent** ist wie ein kluger Elf: Er rät nicht einfach, sondern benutzt **Werkzeuge** (Tools), um Aufgaben zu lösen. Deine Aufgabe ist es, dem Agenten zu sagen, in welcher REIHENFOLGE er die Werkzeuge nutzen soll.",
         "sidebar_title": "Wähle eine Mission",
         "tools_header": "🧰 Verfügbare Werkzeuge",
         "chain_header": "🔗 Deine Gedanken-Kette",
@@ -15,90 +15,128 @@ CONTENT = {
         "btn_add": "➕",
         "btn_clear": "🗑️ Kette löschen",
         "btn_run": "🚀 Agent starten",
-        "spinner": "Der Agent denkt nach...",
-        "feedback_spotify": "❌ <strong>Fehler:</strong> Warum Musik hören? Das löst das Problem nicht! (Agenten sollten keine unnötigen Tools nutzen).",
-        "feedback_order": "⚠️ <strong>Reihenfolge falsch!</strong> Du hast die richtigen Tools, aber in der falschen Reihenfolge. (Man kann nichts buchen, bevor man weiß wo!)",
-        "feedback_fail": "❌ <strong>Das reicht nicht.</strong> Der Agent wusste nicht weiter oder hat falsche Informationen. Überdenke die Logik.",
-        "feedback_success": "✅ <strong>Erfolg!</strong> Der Agent hat das Problem gelöst.",
         "missions": {
             "L1": {
                 "name": "Level 1: Das vergessene Geschenk",
-                "briefing": "Santa hat vergessen, was die kleine Mia sich gewünscht hat! Er steht auf ihrem Dach. Finde heraus, was sie will, und ob es im Sack ist.",
                 "solution_ids": ["db", "inv"],
-                "explanation": "Der Agent muss erst WISSEN, was gesucht ist (Datenbank), bevor er PRÜFEN kann, ob er es hat (Inventar)."
             },
             "L2": {
                 "name": "Level 2: Die Routen-Planung",
-                "briefing": "Wir müssen nach Berlin fliegen. Aber wir dürfen nur starten, wenn das Wetter gut ist UND wir genug Rentiermoos für den Rückweg haben.",
                 "solution_ids": ["weather", "fuel", "nav"],
-                "explanation": "Ein Agent muss erst die 'Blocker' prüfen (Wetter & Tank), bevor er die Aktion (Navigation/Flug) startet."
             },
             "L3": {
                 "name": "Level 3: Der Notfall (Advanced)",
-                "briefing": "Rudolph hat Schnupfen! Der Agent muss einen Tierarzt in der Nähe finden, der JETZT offen hat, und dort einen Termin buchen.",
                 "solution_ids": ["gps", "maps", "cal", "book"],
-                "explanation": "Eine klassische 'Agent Chain': Wo bin ich? -> Wer ist hier? -> Haben die Zeit? -> Buche es."
             }
         },
         "tools": {
-            "weather": "Wetter_Radar",
-            "db": "Wunschzettel_Datenbank",
-            "inv": "Geschenksack_Inventar",
-            "nav": "Navigations_System",
-            "fuel": "Tankanzeige_Rentiere",
-            "gps": "GPS_Ortung",
-            "maps": "Google_Maps_Suche",
-            "cal": "Kalender_Bot",
-            "book": "Buchungs_Tool",
-            "spotify": "Spotify_Weihnachtsplaylist"
         }
     },
     "English": {
         "title": "🤖 Elf-OS: Agent Simulator",
-        "intro": "An **AI Agent** is like a smart Elf: It doesn't just guess; it uses **Tools** to solve tasks. Your job is to tell the Agent in which ORDER to use the tools.",
-        "sidebar_title": "Choose a Mission",
         "tools_header": "🧰 Available Tools",
         "chain_header": "🔗 Your Thought Chain",
-        "empty_chain": "*No tools selected yet...*",
         "btn_add": "➕",
         "btn_clear": "🗑️ Clear Chain",
         "btn_run": "🚀 Run Agent",
-        "spinner": "The Agent is thinking...",
-        "feedback_spotify": "❌ <strong>Error:</strong> Why listen to music? That doesn't solve the problem! (Agents shouldn't use unnecessary tools).",
-        "feedback_order": "⚠️ <strong>Wrong Order!</strong> You have the right tools, but in the wrong sequence. (You can't book something before you know where!)",
-        "feedback_fail": "❌ <strong>Not enough.</strong> The Agent got stuck or lacks information. Rethink the logic.",
-        "feedback_success": "✅ <strong>Success!</strong> The Agent solved the problem.",
         "missions": {
             "L1": {
                 "name": "Level 1: The Forgotten Gift",
-                "briefing": "Santa forgot what little Mia wanted! He is on her roof. Find out what she wants, and check if it's in the sack.",
                 "solution_ids": ["db", "inv"],
-                "explanation": "The Agent must KNOW what is needed (Database) before it can CHECK if it has it (Inventory)."
             },
             "L2": {
                 "name": "Level 2: Route Planning",
-                "briefing": "We need to fly to Berlin. But we can only take off if the weather is good AND we have enough reindeer moss for the return trip.",
                 "solution_ids": ["weather", "fuel", "nav"],
-                "explanation": "An Agent must check 'Blockers' (Weather & Fuel) before executing the Action (Navigation/Flight)."
             },
             "L3": {
                 "name": "Level 3: The Emergency (Advanced)",
-                "briefing": "Rudolph has a cold! The Agent needs to find a vet nearby who is open NOW, and book an appointment.",
                 "solution_ids": ["gps", "maps", "cal", "book"],
-                "explanation": "A classic 'Agent Chain': Where am I? -> Who is here? -> Are they free? -> Book it."
             }
         },
         "tools": {
-            "weather": "Weather_Radar",
-            "db": "Wishlist_Database",
-            "inv": "Giftbag_Inventory",
-            "nav": "Navigation_System",
-            "fuel": "Fuel_Gauge_Reindeer",
-            "gps": "GPS_Location",
-            "maps": "Google_Maps_Search",
-            "cal": "Calendar_Bot",
-            "book": "Booking_Tool",
-            "spotify": "Spotify_ChristmasPlaylist"
         }
     }
 }
@@ -108,35 +146,24 @@ st.markdown("""
     <style>
     .stButton>button {
         width: 100%;
-        border-radius: 10px;
-        height: 50px;
         font-weight: 500;
     }
-    .success-box {
-        padding: 20px;
-        background-color: #d4edda;
-        color: #155724;
-        border-radius: 10px;
-        margin-bottom: 10px;
-        border: 1px solid #c3e6cb;
-    }
-    .fail-box {
-        padding: 20px;
-        background-color: #f8d7da;
-        color: #721c24;
-        border-radius: 10px;
-        margin-bottom: 10px;
-        border: 1px solid #f5c6cb;
-    }
-    div[data-testid="stExpander"] {
-        background-color: #f0f2f6;
-        border-radius: 10px;
-    }
     </style>
 """, unsafe_allow_html=True)
 # --- APP LOGIC ---
-lang_key = st.sidebar.radio("Language / Sprache", ["Deutsch", "English"])
 text = CONTENT[lang_key]
 st.title(text["title"])
@@ -148,24 +175,41 @@ selected_mission_name = st.sidebar.selectbox(text["sidebar_title"], list(mission
 selected_mission_key = mission_options[selected_mission_name]
 mission_data = text["missions"][selected_mission_key]
-# 3. Session State Management
-if 'chain' not in st.session_state:
-    st.session_state.chain = [] # Stores Tool IDs (e.g., 'db', 'inv')
 # Reset chain if mission changes
 if 'current_mission' not in st.session_state or st.session_state.current_mission != selected_mission_key:
     st.session_state.chain = []
     st.session_state.current_mission = selected_mission_key
-# 4. Display Mission
 st.info(f"📋 **MISSION:** {mission_data['briefing']}")
 col1, col2 = st.columns([1, 1])
 with col1:
     st.subheader(text["tools_header"])
-    for tool_id, tool_name in text["tools"].items():
-        if st.button(f"{text['btn_add']} {tool_name}", key=f"btn_{tool_id}"):
             st.session_state.chain.append(tool_id)
 with col2:
@@ -174,35 +218,48 @@ with col2:
         st.markdown(text["empty_chain"])
     else:
         for i, tool_id in enumerate(st.session_state.chain):
-            tool_name = text["tools"][tool_id]
-            st.markdown(f"**{i+1}.** `{tool_name}`")
-        if st.button(text["btn_clear"], type="secondary"):
-            st.session_state.chain = []
 st.write("---")
 # 5. Execution Logic
 if st.button(text["btn_run"], type="primary"):
-    with st.spinner(text["spinner"]):
-        time.sleep(1.0) # Simulation delay
         user_chain_ids = st.session_state.chain
         correct_ids = mission_data['solution_ids']
-        # Check 1: Spotify (Hallucination/Distraction)
-        if "spotify" in user_chain_ids:
-            st.markdown(f'<div class="fail-box">{text["feedback_spotify"]}</div>', unsafe_allow_html=True)
-        # Check 2: Exact Match
         elif user_chain_ids == correct_ids:
             st.balloons()
             st.markdown(f'<div class="success-box">{text["feedback_success"]}<br><br><em>💡 {mission_data["explanation"]}</em></div>', unsafe_allow_html=True)
-        # Check 3: Right Tools, Wrong Order (Set comparison)
         elif set(user_chain_ids) == set(correct_ids) and len(user_chain_ids) == len(correct_ids):
              st.markdown(f'<div class="fail-box">{text["feedback_order"]}</div>', unsafe_allow_html=True)
-        # Check 4: Wrong
         else:
              st.markdown(f'<div class="fail-box">{text["feedback_fail"]}</div>', unsafe_allow_html=True)

 import time
 # --- CONFIGURATION & CONTENT ---
+st.set_page_config(page_title="Elf-OS Agent Simulator v2.0", page_icon="🤖", layout="wide")
 CONTENT = {
     "Deutsch": {
         "title": "🤖 Elf-OS: Agenten Simulation",
+        "intro": "Willkommen im erweiterten Modus! KI-Agenten haben nicht unendlich Energie. Achte auf deinen **Magie-Staub (Tokens)**. Jedes Werkzeug kostet Staub. Wenn er leer ist, bricht der Agent ab!",
         "sidebar_title": "Wähle eine Mission",
         "tools_header": "🧰 Verfügbare Werkzeuge",
         "chain_header": "🔗 Deine Gedanken-Kette",
         "btn_add": "➕",
         "btn_clear": "🗑️ Kette löschen",
         "btn_run": "🚀 Agent starten",
+        "cost_label": "✨ Magie-Staub Verbrauch:",
+        "feedback_spotify": "❌ <strong>Fehler:</strong> Musik lenkt nur ab! (Halluzination)",
+        "feedback_trap": "🚨 <strong>SICHERHEITSALARM:</strong> Du hast ein unsicheres Werkzeug benutzt! Der 'Hack' hat den Alarm ausgelöst. Agenten müssen sicher arbeiten.",
+        "feedback_cost": "🪫 <strong>Zu wenig Magie!</strong> Du hast zu viele unnötige Werkzeuge benutzt. Effizienz ist wichtig bei LLMs.",
+        "feedback_think": "🧠 <strong>Agent verwirrt:</strong> Bei so einer schweren Aufgabe muss der Agent erst NACHDENKEN (CoT), bevor er handelt.",
+        "feedback_order": "⚠️ <strong>Reihenfolge falsch!</strong> Die Logik stimmt nicht.",
+        "feedback_fail": "❌ <strong>Das reicht nicht.</strong> Der Agent konnte das Ziel nicht erreichen.",
+        "feedback_success": "✅ <strong>Erfolg!</strong> Mission erfüllt!",
         "missions": {
             "L1": {
                 "name": "Level 1: Das vergessene Geschenk",
+                "briefing": "Santa weiß nicht mehr, was Mia will. Finde es heraus und prüfe den Sack.",
                 "solution_ids": ["db", "inv"],
+                "budget": 30,
+                "explanation": "Datenbank -> Inventar. Simpel und effizient."
             },
             "L2": {
                 "name": "Level 2: Die Routen-Planung",
+                "briefing": "Flug nach Berlin. Nur bei gutem Wetter UND genug Moos im Tank starten.",
                 "solution_ids": ["weather", "fuel", "nav"],
+                "budget": 45,
+                "explanation": "Erst die Bedingungen (Wetter/Tank) prüfen, dann handeln."
             },
             "L3": {
                 "name": "Level 3: Der Notfall (Advanced)",
+                "briefing": "Rudolph ist krank. Finde einen offenen Tierarzt und buche sofort.",
                 "solution_ids": ["gps", "maps", "cal", "book"],
+                "budget": 60,
+                "explanation": "Kontext (GPS) -> Suche (Maps) -> Verfügbarkeit (Kalender) -> Aktion (Buchen)."
+            },
+            "L4": {
+                "name": "Level 4: Der Smart-Chimney (EXPERT)",
+                "briefing": "🔥 <strong>HÄRTEFALL:</strong> Ein Smart-Home Schornstein ist versperrt. Du musst ihn öffnen, ohne den Alarm auszulösen. Errate nicht das Passwort!",
+                "solution_ids": ["scan", "think", "manual", "auth"],
+                "budget": 55,
+                "explanation": "<strong>Chain of Thought:</strong> Erst Scannen. Dann NACHDENKEN (Think), um zu verstehen, welches Modell es ist. Dann Handbuch lesen (Manual) und korrekt authentifizieren. 'Hacken' löst Alarm aus!"
             }
         },
         "tools": {
+            "weather": {"name": "Wetter_Radar", "cost": 10},
+            "db": {"name": "Wunschzettel_DB", "cost": 10},
+            "inv": {"name": "Sack_Inventar", "cost": 10},
+            "nav": {"name": "Navi_System", "cost": 15},
+            "fuel": {"name": "Tankanzeige", "cost": 5},
+            "gps": {"name": "GPS_Ortung", "cost": 5},
+            "maps": {"name": "Google_Maps", "cost": 15},
+            "cal": {"name": "Kalender_Bot", "cost": 10},
+            "book": {"name": "Buchungs_Tool", "cost": 20},
+            "spotify": {"name": "Spotify_Playlist", "cost": 5},
+            "scan": {"name": "Struktur_Scanner", "cost": 10},
+            "hack": {"name": "Brute_Force_Hack", "cost": 20},
+            "manual": {"name": "Technische_Handbücher", "cost": 15},
+            "think": {"name": "🧠 Chain_of_Thought", "cost": 5},
+            "guess": {"name": "Passwort_Raten", "cost": 5},
+            "auth": {"name": "Security_Protokoll", "cost": 20}
         }
     },
     "English": {
         "title": "🤖 Elf-OS: Agent Simulator",
+        "intro": "Welcome to Advanced Mode! AI Agents consume energy. Watch your **Magic Dust (Tokens)**. Each tool costs dust. If you run out, the agent crashes!",
+        "sidebar_title": "Choose Mission",
         "tools_header": "🧰 Available Tools",
         "chain_header": "🔗 Your Thought Chain",
+        "empty_chain": "*No tools selected...*",
         "btn_add": "➕",
         "btn_clear": "🗑️ Clear Chain",
         "btn_run": "🚀 Run Agent",
+        "cost_label": "✨ Magic Dust Usage:",
+        "feedback_spotify": "❌ <strong>Error:</strong> Music is a distraction! (Hallucination)",
+        "feedback_trap": "🚨 <strong>SECURITY ALERT:</strong> You used an unsafe tool! The 'Hack' triggered the alarm. Agents must follow protocol.",
+        "feedback_cost": "🪫 <strong>Out of Magic!</strong> You used too many unnecessary tools. Efficiency matters for LLMs.",
+        "feedback_think": "🧠 <strong>Agent Confused:</strong> For this complex task, the Agent must THINK (CoT) before acting.",
+        "feedback_order": "⚠️ <strong>Wrong Order!</strong> The logic flow is incorrect.",
+        "feedback_fail": "❌ <strong>Not enough.</strong> The Agent failed to reach the goal.",
+        "feedback_success": "✅ <strong>Success!</strong> Mission accomplished!",
         "missions": {
             "L1": {
                 "name": "Level 1: The Forgotten Gift",
+                "briefing": "Santa forgot what Mia wants. Find out and check the sack.",
                 "solution_ids": ["db", "inv"],
+                "budget": 30,
+                "explanation": "Database -> Inventory. Simple and efficient."
             },
             "L2": {
                 "name": "Level 2: Route Planning",
+                "briefing": "Fly to Berlin. Only take off if weather is good AND fuel is sufficient.",
                 "solution_ids": ["weather", "fuel", "nav"],
+                "budget": 45,
+                "explanation": "Check conditions (Weather/Fuel) before Action (Nav)."
             },
             "L3": {
                 "name": "Level 3: The Emergency (Advanced)",
+                "briefing": "Rudolph is sick. Find an open vet nearby and book now.",
                 "solution_ids": ["gps", "maps", "cal", "book"],
+                "budget": 60,
+                "explanation": "Context (GPS) -> Search (Maps) -> Availability (Cal) -> Action (Book)."
+            },
+            "L4": {
+                "name": "Level 4: The Smart Chimney (EXPERT)",
+                "briefing": "🔥 <strong>HARD MODE:</strong> A Smart-Home chimney is locked. You need to open it without triggering the alarm. Do NOT guess the password!",
+                "solution_ids": ["scan", "think", "manual", "auth"],
+                "budget": 55,
+                "explanation": "<strong>Chain of Thought:</strong> Scan first. Then THINK to process the data. Then read the Manual. Finally, use the Security Protocol. 'Hacking' triggers the alarm!"
             }
         },
         "tools": {
+            "weather": {"name": "Weather_Radar", "cost": 10},
+            "db": {"name": "Wishlist_DB", "cost": 10},
+            "inv": {"name": "Sack_Inventory", "cost": 10},
+            "nav": {"name": "Nav_System", "cost": 15},
+            "fuel": {"name": "Fuel_Gauge", "cost": 5},
+            "gps": {"name": "GPS_Location", "cost": 5},
+            "maps": {"name": "Google_Maps", "cost": 15},
+            "cal": {"name": "Calendar_Bot", "cost": 10},
+            "book": {"name": "Booking_Tool", "cost": 20},
+            "spotify": {"name": "Spotify_Playlist", "cost": 5},
+            "scan": {"name": "Structure_Scanner", "cost": 10},
+            "hack": {"name": "Brute_Force_Hack", "cost": 20},
+            "manual": {"name": "Tech_Manuals", "cost": 15},
+            "think": {"name": "🧠 Chain_of_Thought", "cost": 5},
+            "guess": {"name": "Password_Guesser", "cost": 5},
+            "auth": {"name": "Security_Protocol", "cost": 20}
         }
     }
 }
     <style>
     .stButton>button {
         width: 100%;
+        border-radius: 8px;
+        height: 45px;
         font-weight: 500;
+        border: 1px solid #ddd;
     }
+    .success-box { padding: 15px; background-color: #d4edda; color: #155724; border-radius: 10px; border: 1px solid #c3e6cb; }
+    .fail-box { padding: 15px; background-color: #f8d7da; color: #721c24; border-radius: 10px; border: 1px solid #f5c6cb; }
+    .metric-box { text-align: center; font-size: 1.2rem; font-weight: bold; padding: 10px; background: #f0f2f6; border-radius: 8px; }
     </style>
 """, unsafe_allow_html=True)
 # --- APP LOGIC ---
+# 1. Setup
+if 'chain' not in st.session_state:
+    st.session_state.chain = []
+lang_key = st.sidebar.radio("Language / Sprache", ["English", "Deutsch"])
 text = CONTENT[lang_key]
 st.title(text["title"])
 selected_mission_key = mission_options[selected_mission_name]
 mission_data = text["missions"][selected_mission_key]
 # Reset chain if mission changes
 if 'current_mission' not in st.session_state or st.session_state.current_mission != selected_mission_key:
     st.session_state.chain = []
     st.session_state.current_mission = selected_mission_key
 st.info(f"📋 **MISSION:** {mission_data['briefing']}")
+# 3. Calculate Cost
+current_cost = sum([text["tools"][t_id]["cost"] for t_id in st.session_state.chain])
+max_budget = mission_data["budget"]
+budget_percent = min(current_cost / max_budget, 1.0)
+# Display Progress/Cost Bar
+col_m1, col_m2 = st.columns([3, 1])
+with col_m1:
+    st.write(text["cost_label"])
+    bar_color = "green" if current_cost <= max_budget else "red"
+    st.progress(budget_percent)
+with col_m2:
+    st.markdown(f"<div class='metric-box' style='color:{bar_color}'>{current_cost} / {max_budget}</div>", unsafe_allow_html=True)
+st.write("---")
+# 4. Interface
 col1, col2 = st.columns([1, 1])
 with col1:
     st.subheader(text["tools_header"])
+    # Group tools for better layout
+    tools_list = list(text["tools"].items())
+    for tool_id, tool_data in tools_list:
+        cost_display = f" ({tool_data['cost']})"
+        if st.button(f"{text['btn_add']} {tool_data['name']} {cost_display}", key=f"btn_{tool_id}"):
             st.session_state.chain.append(tool_id)
 with col2:
         st.markdown(text["empty_chain"])
     else:
         for i, tool_id in enumerate(st.session_state.chain):
+            tool_data = text["tools"][tool_id]
+            st.markdown(f"**{i+1}.** `{tool_data['name']}` <small>({tool_data['cost']})</small>", unsafe_allow_html=True)
+    st.write("")
+    if st.button(text["btn_clear"], type="secondary"):
+        st.session_state.chain = []
 st.write("---")
 # 5. Execution Logic
 if st.button(text["btn_run"], type="primary"):
+    with st.spinner("Agent running..."):
+        time.sleep(1.2)
         user_chain_ids = st.session_state.chain
         correct_ids = mission_data['solution_ids']
+        # FAIL CONDITION 1: TRAP TOOLS (Security/Hallucination)
+        if "hack" in user_chain_ids or "guess" in user_chain_ids:
+             st.markdown(f'<div class="fail-box">{text["feedback_trap"]}</div>', unsafe_allow_html=True)
+        # FAIL CONDITION 2: SPOTIFY (Distraction)
+        elif "spotify" in user_chain_ids:
+            st.markdown(f'<div class="fail-box">{text["feedback_spotify"]}</div>', unsafe_allow_html=True)
+        # FAIL CONDITION 3: BUDGET EXCEEDED
+        elif current_cost > max_budget:
+             st.markdown(f'<div class="fail-box">{text["feedback_cost"]}</div>', unsafe_allow_html=True)
+        # FAIL CONDITION 4: LEVEL 4 MISSING "THINK" (Chain of Thought requirement)
+        elif selected_mission_key == "L4" and "think" not in user_chain_ids:
+             st.markdown(f'<div class="fail-box">{text["feedback_think"]}</div>', unsafe_allow_html=True)
+        # SUCCESS
         elif user_chain_ids == correct_ids:
             st.balloons()
             st.markdown(f'<div class="success-box">{text["feedback_success"]}<br><br><em>💡 {mission_data["explanation"]}</em></div>', unsafe_allow_html=True)
+        # PARTIAL/WRONG ORDER
         elif set(user_chain_ids) == set(correct_ids) and len(user_chain_ids) == len(correct_ids):
              st.markdown(f'<div class="fail-box">{text["feedback_order"]}</div>', unsafe_allow_html=True)
+        # GENERIC FAIL
         else:
              st.markdown(f'<div class="fail-box">{text["feedback_fail"]}</div>', unsafe_allow_html=True)