Spaces:

osunlp
/

QUEST

Running

App Files Files Community

Lzy01241010 commited on 20 days ago

Commit

67a025a

1 Parent(s): 49586ec

Memory strategies: rename to upstream (condenser/discard_all/hide_tool_result), describe per inference code; quest-name letter-spacing matches paper microsite

Browse files

Files changed (1) hide show

app.py +28 -19

app.py CHANGED Viewed

@@ -343,12 +343,13 @@ gradio-app > div {
   text-transform: none !important;
   color: var(--q-text) !important;
 }
 .quest-name {
   font-family: "Source Serif 4", "Source Serif Pro", ui-serif, Georgia, serif !important;
   font-style: italic !important;
   font-weight: 700 !important;
   color: inherit !important;
-  letter-spacing: 0.005em;
   margin: 4px 0 14px 0 !important;
 }
 .hero-subtitle {
@@ -1361,26 +1362,34 @@ def _trace_to_json(state: "AgentState", used_model: str) -> str:
     )
-MEMORY_STRATEGIES = ("vanilla", "condenser", "discard-all", "hide-tool-results")
 def _normalize_memory_strategy(strategy: str) -> str:
-    s = (strategy or "condenser").strip().lower().replace("_", "-")
     return s if s in MEMORY_STRATEGIES else "condenser"
 def _apply_memory_strategy(messages: List[Dict[str, str]], strategy: str, turn: int) -> None:
-    """Keep the message history inside a manageable context budget.
-    - condenser: no-op (the main loop also injects a periodic trusted-note
-      summary; that is the light "condenser" this Space ships with).
-    - discard-all: every 8 turns, reset history to [system, user question]
-      so the model pays for fresh context rather than replaying old tool
-      results.
-    - hide-tool-results: cap the number of surviving tool-response user
-      messages at 3 — older ones get their content replaced with a stub.
     """
-    if strategy == "discard-all":
         if turn > 1 and turn % 8 == 0 and len(messages) > 2:
             system_msg = messages[0]
             question_msg = messages[1]
@@ -1394,8 +1403,8 @@ def _apply_memory_strategy(messages: List[Dict[str, str]], strategy: str, turn:
                     f"{turn} — continue the research from the original question]",
                 }
             )
-    elif strategy == "hide-tool-results":
-        keep_tail = 3
         tool_indices = [
             i for i, m in enumerate(messages)
             if m.get("role") == "user" and str(m.get("content", "")).startswith("<tool_response>")
@@ -1805,10 +1814,10 @@ with gr.Blocks(
                 )
                 gr.HTML(
                     '<div class="memory-help">'
-                    '<b>vanilla</b> — full history kept every turn, no management.<br>'
-                    '<b>condenser</b> — keep history, inject a research-state summary every 3 turns.<br>'
-                    '<b>discard-all</b> — every 8 turns, reset to system prompt + original question only.<br>'
-                    '<b>hide-tool-results</b> — keep at most the 3 most recent tool responses; older ones are stubbed out.'
                     '</div>'
                 )
                 max_turns = gr.Slider(

   text-transform: none !important;
   color: var(--q-text) !important;
 }
+/* Match the .brand mark from the Quest microsite (github-page branch). */
 .quest-name {
   font-family: "Source Serif 4", "Source Serif Pro", ui-serif, Georgia, serif !important;
   font-style: italic !important;
   font-weight: 700 !important;
   color: inherit !important;
+  letter-spacing: -0.005em;
   margin: 4px 0 14px 0 !important;
 }
 .hero-subtitle {
     )
+MEMORY_STRATEGIES = ("vanilla", "condenser", "discard_all", "hide_tool_result")
 def _normalize_memory_strategy(strategy: str) -> str:
+    s = (strategy or "condenser").strip().lower().replace("-", "_")
+    if s == "hide_tool_results":
+        s = "hide_tool_result"
     return s if s in MEMORY_STRATEGIES else "condenser"
 def _apply_memory_strategy(messages: List[Dict[str, str]], strategy: str, turn: int) -> None:
+    """Lightweight port of the strategies defined in the Quest inference
+    code (`inference/react_agent.py`). Upstream is token-threshold-driven;
+    this Space approximates each strategy on a turn-count basis for demo
+    purposes.
+    - vanilla: no-op (matches MEMORY_ENABLED=false upstream).
+    - condenser: no-op here; the main loop injects a compact research-state
+      summary every few turns (a poor-man's stand-in for the upstream
+      State Summarizer LLM that emits a structured trusted/untrusted/
+      uncertain JSON when the token threshold is hit).
+    - discard_all: every 8 turns, reset history to [system, user question]
+      (upstream resets when token_count crosses the threshold).
+    - hide_tool_result: keep only the most recent tool-response user
+      message; older ones get their content replaced with a stub
+      (mirrors upstream behavior).
     """
+    if strategy == "discard_all":
         if turn > 1 and turn % 8 == 0 and len(messages) > 2:
             system_msg = messages[0]
             question_msg = messages[1]
                     f"{turn} — continue the research from the original question]",
                 }
             )
+    elif strategy == "hide_tool_result":
+        keep_tail = 1
         tool_indices = [
             i for i, m in enumerate(messages)
             if m.get("role") == "user" and str(m.get("content", "")).startswith("<tool_response>")
                 )
                 gr.HTML(
                     '<div class="memory-help">'
+                    '<b>vanilla</b> — memory management disabled; the full conversation history is kept.<br>'
+                    '<b>condenser</b> (default) — when context grows large, a State Summarizer LLM compresses earlier turns into a structured JSON of trusted/untrusted/uncertain claims, visited sources, and prior search queries; the agent continues with that compact state.<br>'
+                    '<b>discard_all</b> — when context grows large, the entire message history is reset, restarting the agent from the original question with no accumulated context.<br>'
+                    '<b>hide_tool_result</b> — when context grows large, older tool responses are pruned; only the most recent tool result is kept.'
                     '</div>'
                 )
                 max_turns = gr.Slider(