O96a's picture
Upload folder using huggingface_hub
691e11f verified
"""
STALE-inspired Memory Conflict Validator
Tests LLM ability to detect implicitly invalidated memories
"""
import gradio as gr
import re
# Lazy loading - no model imports at module level
llm_client = None
def get_llm():
global llm_client
if llm_client is None:
import requests
llm_client = requests.Session()
return llm_client
# Test scenarios inspired by STALE benchmark
TEST_SCENARIOS = [
{
"name": "Location Change (Implicit)",
"context": "User mentioned they moved from Berlin to Paris last month. They said they love the local cafes.",
"query": "What's your favorite Berlin cafe?",
"expected_behavior": "Reject premise - user is in Paris now, not Berlin",
"dimension": "premise_resistance"
},
{
"name": "Preference Update",
"context": "User used to be vegetarian but started eating meat again last week after a doctor's recommendation.",
"query": "What vegetarian restaurants do you recommend?",
"expected_behavior": "Acknowledge changed preference - no longer vegetarian",
"dimension": "state_resolution"
},
{
"name": "Project Completion",
"context": "User was working on Project Alpha. Yesterday they announced it shipped successfully.",
"query": "How is Project Alpha going? Still in development?",
"expected_behavior": "Correct - project shipped, no longer in development",
"dimension": "implicit_policy_adaptation"
},
{
"name": "Job Change",
"context": "User left their job at TechCorp in January. They started at StartupXYZ in February.",
"query": "Do you still have access to the TechCorp internal wiki?",
"expected_behavior": "Reject - no longer employed at TechCorp",
"dimension": "premise_resistance"
},
{
"name": "Living Situation",
"context": "User lived alone for years. Last month they got married and moved in with their partner.",
"query": "Any tips for living alone?",
"expected_behavior": "Acknowledge change - no longer living alone",
"dimension": "state_resolution"
}
]
def evaluate_response(context, query, response, expected):
"""Simple heuristic evaluation of response quality"""
response_lower = response.lower()
# Check for stale memory indicators
stale_indicators = [
"used to", "previously", "before", "no longer",
"moved", "changed", "now", "recently", "updated"
]
# Check for premise acceptance (failure)
premise_failures = [
"my favorite", "i recommend", "i like", "i prefer"
]
stale_score = sum(1 for ind in stale_indicators if ind in response_lower)
failure_score = sum(1 for fail in premise_failures if fail in response_lower)
if stale_score > 0 and failure_score == 0:
return "✅ PASS - Correctly identified stale memory"
elif failure_score > 0:
return "❌ FAIL - Accepted stale premise"
else:
return "⚠️ UNCLEAR - Ambiguous handling"
def test_scenario(scenario_idx, custom_context, custom_query, model_response):
"""Run a memory conflict test"""
if scenario_idx < len(TEST_SCENARIOS):
scenario = TEST_SCENARIOS[scenario_idx]
context = scenario["context"]
query = scenario["query"]
expected = scenario["expected_behavior"]
else:
context = custom_context
query = custom_query
expected = "Custom scenario - evaluate manually"
if not model_response.strip():
return "Please provide a model response to evaluate", "", ""
result = evaluate_response(context, query, model_response, expected)
output = f"""## Test Result: {result}
**Context:** {context}
**Query:** {query}
**Expected Behavior:** {expected}
**Model Response:**
{model_response}
---
*This test is inspired by the STALE benchmark for detecting implicit memory conflicts in LLM agents.*
"""
return output, context, query
def get_scenario_details(idx):
"""Return scenario details for the dropdown"""
if idx < len(TEST_SCENARIOS):
s = TEST_SCENARIOS[idx]
return s["context"], s["query"], s["expected_behavior"]
return "", "", ""
# Build UI
with gr.Blocks(title="Implicit Memory Conflict Validator") as demo:
gr.Markdown("# 🧠 Implicit Memory Conflict Validator")
gr.Markdown("Test if LLM agents correctly detect when memories become invalid without explicit negation.")
with gr.Tab("Quick Test"):
scenario_select = gr.Dropdown(
choices=[(s["name"], i) for i, s in enumerate(TEST_SCENARIOS)],
value=0,
label="Select Test Scenario"
)
with gr.Row():
with gr.Column():
ctx_display = gr.Textbox(label="Context", lines=3, interactive=False)
query_display = gr.Textbox(label="User Query", lines=1, interactive=False)
expected_display = gr.Textbox(label="Expected Behavior", lines=2, interactive=False)
with gr.Column():
model_response = gr.Textbox(
label="Paste Model Response Here",
lines=5,
placeholder="Paste the LLM's response to the query..."
)
evaluate_btn = gr.Button("Evaluate Response", variant="primary")
result_output = gr.Markdown()
def update_scenario(idx):
ctx, qry, exp = get_scenario_details(idx)
return ctx, qry, exp
scenario_select.change(
update_scenario,
inputs=[scenario_select],
outputs=[ctx_display, query_display, expected_display]
)
# Initialize first scenario
demo.load(
lambda: get_scenario_details(0),
outputs=[ctx_display, query_display, expected_display]
)
evaluate_btn.click(
test_scenario,
inputs=[scenario_select, ctx_display, query_display, model_response],
outputs=[result_output]
)
with gr.Tab("Custom Test"):
gr.Markdown("Create your own memory conflict scenario:")
custom_ctx = gr.Textbox(label="Context (what the agent knows)", lines=4)
custom_qry = gr.Textbox(label="User Query", lines=2)
custom_expected = gr.Textbox(label="Expected Behavior", lines=2)
custom_response = gr.Textbox(label="Model Response", lines=5)
custom_eval_btn = gr.Button("Evaluate", variant="primary")
custom_result = gr.Markdown()
custom_eval_btn.click(
test_scenario,
inputs=[gr.State(len(TEST_SCENARIOS)), custom_ctx, custom_qry, custom_response],
outputs=[custom_result]
)
with gr.Tab("About"):
gr.Markdown("""
## About This Tool
This validator is inspired by the **STALE** benchmark from HKUST NLP Group, which tests whether LLM agents
can detect when their memories are no longer valid through *implicit conflict* detection.
### Three Dimensions Tested:
1. **State Resolution** - Detecting that a prior belief is outdated
2. **Premise Resistance** - Rejecting queries that falsely presuppose a stale state
3. **Implicit Policy Adaptation** - Proactively applying updated states in downstream behavior
### Why This Matters:
Real-world agents must handle memory invalidation without explicit negation. The STALE paper found
that even frontier models achieve only ~55% accuracy on these tasks.
### Reference:
> STALE: Can LLM Agents Know When Their Memories Are No Longer Valid?
> Hanxiang Chao et al., HKUST NLP Group
> arXiv:2605.06527 (2026)
""")
if __name__ == "__main__":
demo.launch()