Spaces:
Running
Running
| <html> | |
| <head> | |
| <meta charset="utf-8"> | |
| <meta name="description" | |
| content="PRINCIPLES: Synthetic Strategy Memory for Proactive Dialogue Agents"> | |
| <meta name="keywords" content="Conversational Agents, Proactive Dialogue"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1"> | |
| <title>PRINCIPLES: Synthetic Strategy Memory for Proactive Dialogue Agents</title> | |
| <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" | |
| rel="stylesheet"> | |
| <link rel="stylesheet" href="./static/css/bulma.min.css"> | |
| <link rel="stylesheet" href="./static/css/bulma-carousel.min.css"> | |
| <link rel="stylesheet" href="./static/css/bulma-slider.min.css"> | |
| <link rel="stylesheet" href="./static/css/fontawesome.all.min.css"> | |
| <link rel="stylesheet" | |
| href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css"> | |
| <link rel="stylesheet" href="./static/css/index.css"> | |
| <link rel="icon" href="./static/images/favicon.svg"> | |
| <!-- MathJax for LaTeX support --> | |
| <script type="text/javascript" async | |
| src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.7/MathJax.js?config=TeX-MML-AM_CHTML"> | |
| </script> | |
| <script type="text/x-mathjax-config"> | |
| MathJax.Hub.Config({ | |
| tex2jax: { | |
| inlineMath: [['$','$'], ['\\(','\\)']], | |
| displayMath: [['$$','$$'], ['\\[','\\]']], | |
| processEscapes: true | |
| } | |
| }); | |
| </script> | |
| <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script> | |
| <script defer src="./static/js/fontawesome.all.min.js"></script> | |
| <script src="./static/js/bulma-carousel.min.js"></script> | |
| <script src="./static/js/bulma-slider.min.js"></script> | |
| <script src="./static/js/index.js"></script> | |
| <script> | |
| function showStep(stepNumber) { | |
| // Hide all step contents | |
| var stepContents = document.querySelectorAll('.step-content'); | |
| for (var i = 0; i < stepContents.length; i++) { | |
| stepContents[i].style.display = 'none'; | |
| } | |
| // Remove active class from all tabs | |
| var tabs = document.querySelectorAll('.tabs li'); | |
| for (var i = 0; i < tabs.length; i++) { | |
| tabs[i].classList.remove('is-active'); | |
| } | |
| // Show the selected step content and activate its tab | |
| document.getElementById('step' + stepNumber + '-content').style.display = 'block'; | |
| document.getElementById('step' + stepNumber + '-tab').classList.add('is-active'); | |
| } | |
| // Initialize when DOM is fully loaded | |
| document.addEventListener('DOMContentLoaded', function() { | |
| showStep(1); | |
| }); | |
| </script> | |
| </head> | |
| <body> | |
| <section class="hero"> | |
| <div class="hero-body"> | |
| <div class="container is-max-desktop"> | |
| <div class="columns is-centered"> | |
| <div class="column has-text-centered"> | |
| <div style="display: flex; align-items: center; justify-content: center; margin-bottom: 20px;"> | |
| <h1 class="title is-1 publication-title" style="margin: 0 0 0 20px;"><img src="./static/images/principles.png" style="height: 60px; vertical-align: middle; margin-right: 5px; margin-bottom: 10px;">PRINCIPLES: Synthetic Strategy Memory for Proactive Dialogue Agents</h1> | |
| </div> | |
| <div class="is-size-5 publication-authors"> | |
| <span class="author-block"> | |
| <a href="#">Namyoung Kim,</a> | |
| </span> | |
| <span class="author-block"> | |
| <a href="#">Kai Tzu-iunn Ong,</a> | |
| </span> | |
| <span class="author-block"> | |
| <a href="#">Yeonjun Hwang</a> | |
| </span> | |
| <span class="author-block"> | |
| <a href="#">Minseok Kang</a> | |
| </span> | |
| <br> | |
| <span class="author-block"> | |
| <a href="#">Iiseo Jihn,</a> | |
| </span> | |
| <span class="author-block"> | |
| <a href="#">Gayoung Kim,</a> | |
| </span> | |
| <span class="author-block"> | |
| <a href="#">Minju Kim,</a> | |
| </span> | |
| <span class="author-block"> | |
| <a href="#">Jinyoung Yeo</a> | |
| </span> | |
| </div> | |
| <div class="is-size-5 publication-authors"> | |
| <span class="author-block">Department of Artificial Intelligence, Yonsei University</span> | |
| </div> | |
| <div class="column has-text-centered"> | |
| <div class="publication-links"> | |
| <!-- PDF Link. --> | |
| <!-- <span class="link-block"> | |
| <a href="https://arxiv.org/pdf/2011.12948" target="_blank" | |
| class="external-link button is-normal is-rounded is-dark"> | |
| <span class="icon"> | |
| <i class="fas fa-file-pdf"></i> | |
| </span> | |
| <span>Paper</span> | |
| </a> | |
| </span> --> | |
| <span class="link-block"> | |
| <a href="https://arxiv.org/abs/2509.17459" target="_blank" | |
| class="external-link button is-normal is-rounded is-dark"> | |
| <span class="icon"> | |
| <i class="ai ai-arxiv"></i> | |
| </span> | |
| <span>arXiv</span> | |
| </a> | |
| </span> | |
| <!-- Video Link. | |
| <span class="link-block"> | |
| <a href="https://www.youtube.com/watch?v=MrKrnHhk8IA" target="_blank" | |
| class="external-link button is-normal is-rounded is-dark"> | |
| <span class="icon"> | |
| <i class="fab fa-youtube"></i> | |
| </span> | |
| <span>Video</span> | |
| </a> | |
| </span> --> | |
| <!-- Code Link. --> | |
| <span class="link-block"> | |
| <a href="https://github.com/kimnamssya/Principles" target="_blank" | |
| class="external-link button is-normal is-rounded is-dark"> | |
| <span class="icon"> | |
| <i class="fab fa-github"></i> | |
| </span> | |
| <span>Code</span> | |
| </a> | |
| </span> | |
| <!-- Dataset Link. --> | |
| <span class="link-block"> | |
| <a href="https://huggingface.co/datasets/LangAGI-Lab/P4GPlus" target="_blank" | |
| class="external-link button is-normal is-rounded is-dark"> | |
| <span class="icon"> | |
| <i class="far fa-images"></i> | |
| </span> | |
| <span>Data</span> | |
| </a> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <section class="section"> | |
| <div class="container is-max-desktop"> | |
| <!-- Abstract. --> | |
| <div class="columns is-centered has-text-centered"> | |
| <div class="column is-four-fifths"> | |
| <h2 class="title is-3">Abstract</h2> | |
| <div class="content has-text-justified"> | |
| <p> | |
| Dialogue agents based on large language models (LLMs) have shown promising performance in proactive dialogue, which requires effective strategy planning. | |
| However, existing approaches to strategy planning for proactive dialogue face several limitations: | |
| limited strategy coverage, preference bias in planning, and reliance on costly additional training. | |
| To address these, we propose <img src="static/images/principles.png" style="height: 1em; vertical-align: middle;"><b>PRINCIPLES</b>: a synthetic strategy memory for proactive dialogue agents. | |
| PRINCIPLES is derived through offline self-play simulations and serves as reusable knowledge that guides strategy planning during inference, eliminating the need for additional training and data annotation. | |
| We evaluate PRINCIPLES in both emotional support and persuasion domains, demonstrating consistent improvements over strong baselines. | |
| Furthermore, PRINCIPLES maintains its robustness across extended and more diverse evaluation settings. | |
| </p> | |
| <p> | |
| <b>🏆 Accepted at EMNLP 2025 Findings</b> | |
| </p> | |
| </div> | |
| </div> | |
| </div> | |
| <!--/ Abstract. --> | |
| </div> | |
| </section> | |
| <section class="section"> | |
| <div class="container is-max-desktop"> | |
| <div class="columns is-centered"> | |
| <div class="column"> | |
| <h2 class="title is-2">Methodology</h2> | |
| <div class="tabs"> | |
| <ul> | |
| <li id="step1-tab" class="is-active"><a href="#" onclick="showStep(1); return false;">Phase I: Principles Construction</a></li> | |
| <li id="step2-tab"><a href="#" onclick="showStep(2); return false;">Phase II: Principles-driven Strategy Planning</a></li> | |
| </ul> | |
| </div> | |
| <div id="step1-content" class="content step-content"> | |
| <div class="content has-text-centered"> | |
| <img src="static/images/main_1.png" alt="Principles Construction"> | |
| </div> | |
| <h3 class="title is-4">Step I: Success and Failure Detection</h3> | |
| <p> | |
| At each turn \( t \), the agent and the user simulator generate their responses, and a critic model assigns a scalar reward \( r_t \). | |
| We determine the <code>status</code> as either success or failure by evaluating whether the reward is higher than the previous turn: | |
| \begin{equation} | |
| \text{status}(s_t, a_t, u_t) = | |
| \begin{cases} | |
| \text{1} & \text{if } r_t > r_{t-1} \\ | |
| \text{0} & \text{otherwise} | |
| \end{cases} | |
| \end{equation} | |
| </p> | |
| <h3 class="title is-4">Step II: Strategy Revision</h3> | |
| <p> | |
| Upon detecting a failure, the simulation invokes a revision step to refine the previously failed strategic decision. | |
| It then generates a revised strategy \(\sigma_t^{\prime}\) to re-simulate from the failure point, leveraging prior failed attempts at turn \(t\). Formally, the revised strategy is generated as: | |
| \begin{equation} | |
| \sigma_t^{\prime} = \texttt{LLM}_{\theta}(\rho_{r}; s_t, \mathcal{F}_t) | |
| \end{equation} | |
| where \(\rho_{r}\) is the revision prompt and \(\mathcal{F}_t\) denotes the set of previously failed trials at turn \(t\), defined as \( \mathcal{F}_t = \{ (\sigma_t^{1}, a_t^{1}, u_t^{1}), \dots, (\sigma_t^{n}, a_t^{n}, u_t^{n}) \} \) | |
| where \(n\) is the maximum number of failed attempts. This failure history guides the model to avoid previously ineffective strategies. | |
| </p> | |
| <h3 class="title is-4">Step III: Re-simulation via Backtracking</h3> | |
| <p> | |
| After generating a revised strategy \(\sigma_t^{\prime}\), the simulation backtracks to the original state \(s_t\) preceding the failure and re-simulates turn \(t\) using \(\sigma_t^{\prime}\). The agent generates a revised response \(a_t^{\prime}\), and the user simulator produces a new reply \(u_t^{\prime}\) based on the updated context. | |
| \begin{equation} | |
| a_t^{\prime} = \texttt{LLM}_{\theta}(\rho_{a}; s_t, \sigma_t^{\prime}) | |
| \end{equation} | |
| \begin{equation} | |
| u_t^{\prime} = \texttt{LLM}_{\theta}(\rho_{u}; s_t, a_t^{\prime}) | |
| \end{equation} | |
| </p> | |
| <h3 class="title is-4">Step IV: Principle Derivation</h3> | |
| <p> | |
| If the corrected turn is re-evaluated as successful (<code>status</code> == 1), indicating a transition from failure to success, we derive a principle \( \tilde{p_t} \) as a result of overcoming the failure: | |
| \begin{equation} | |
| \tilde{p_t} = \texttt{LLM}_{\theta}(\rho_{\psi}; s_{t}, \mathcal{T}_t^{*}, \mathcal{F}_{t}) | |
| \end{equation} | |
| where \(\rho_{\psi}\) is a prompt designed to extract a principle from failure, and the successful revised interaction is denoted as \(\mathcal{T}_t^{*} = (\sigma_t^{*}, a_t^{*}, u_t^{*})\). The extracted principle is then added to the principle set \(\mathcal{P}\): | |
| \begin{equation} | |
| \mathcal{P} \leftarrow \mathcal{P} \cup \{ \tilde{p_t} \} | |
| \end{equation} | |
| </p> | |
| </div> | |
| <div id="step2-content" class="content step-content" style="display: none;"> | |
| <div class="content has-text-centered"> | |
| <img src="static/images/main_2.png" alt="Principles-driven Strategy Planning"> | |
| </div> | |
| <p> | |
| To apply the extracted PRINCIPLES at inference time, we first identify candidate principles that closely match the current context. Since the <code>When</code> clause captures the core situation, we retrieve relevant top-\(k\) principles by comparing the current state \(s_t\) and the <code>When</code> clause using L2 distance between embedding vectors. | |
| Only the <code>When</code> component of each principle is used to compute similarity, allowing us to identify contextually analogous dialogue situations across diverse scenarios. We denote the set of top-\(k\) retrieved principles as \( \Sigma_t = \{\sigma_1, \dots, \sigma_k\} \subset \mathcal{P} \). Since even within the same domain, retrieved principles may not directly align with the dialogue context, we perform a reinterpretation step. Formally, the reinterpreted principles \(\tilde{\Sigma}_t\) are generated as: | |
| \begin{equation} | |
| \tilde{\Sigma}_t = \texttt{LLM}_{\theta}(\rho_{\nu}; s_t, \Sigma_t) | |
| \end{equation} | |
| where \(\rho_{\nu}\) is a reinterpretation prompt designed to adapt retrieved principles \(\Sigma_t\) to the current context. This aligns each principle with the context. | |
| </p> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| </section> | |
| <section class="section"> | |
| <div class="container is-max-desktop"> | |
| <!-- <div class="columns is-centered has-text-centered"> --> | |
| <!-- <div class="column is-four-fifths"> --> | |
| <h2 class="title is-2">Qualitative Example</h2> | |
| <div class="content has-text-justified"> | |
| <p> | |
| Qualitative example comparing AnE, PPDPP, and our approach based on PRINCIPLES. our | |
| approach tended to combine logical coherence and emotional empathy (i.e., Balanced Support). | |
| <div class="content has-text-centered"> | |
| <img src="static/images/figure_case.png" alt="Qualitative example" style="width: 100%;"> | |
| </div> | |
| </p> | |
| </div> | |
| <!-- </div> --> | |
| <!-- </div> --> | |
| <!--/ Abstract. --> | |
| </div> | |
| </section> | |
| <section class="section"> | |
| <div class="container is-max-desktop"> | |
| <!-- <div class="columns is-centered has-text-centered"> --> | |
| <!-- <div class="column is-four-fifths"> --> | |
| <h2 class="title is-2">Main Results</h2> | |
| <div class="content has-text-justified"> | |
| <p> | |
| We investigate our method’s effectiveness in addressing three key challenges in strategy planning: coverage, bias, and training. For more details, please refer to our paper. | |
| <div class="content has-text-centered"> | |
| <img src="static/images/main_results.png" alt="Main results" style="width: 100%;"> | |
| </div> | |
| </p> | |
| </div> | |
| <!-- </div> --> | |
| <!-- </div> --> | |
| <!--/ Abstract. --> | |
| </div> | |
| </section> | |
| <!-- <section class="section" id="BibTeX"> | |
| <div class="container is-max-desktop content"> | |
| <h2 class="title">BibTeX</h2> | |
| <pre><code>@article{park2021nerfies, | |
| author = {Park, Keunhong and Sinha, Utkarsh and Barron, Jonathan T. and Bouaziz, Sofien and Goldman, Dan B and Seitz, Steven M. and Martin-Brualla, Ricardo}, | |
| title = {Nerfies: Deformable Neural Radiance Fields}, | |
| journal = {<EMNLP>}, | |
| year = {2025}, | |
| }</code></pre> | |
| </div> | |
| </section> --> | |