feat: Include clean final answer in OpenAI stream payload

- Updated `ZIPRCSampler.openai()` to calculate the best-performing trajectory (Top-1) at the end of generation.
- The final stream chunk (action="finished") now includes a `final_text` field within the `zip_rc` payload.
- This allows clients to capture and display the coherent "winning" answer, separating it from the noisy, branching thought process visible during the stream.

Files changed (1) hide show

ziprc.py +15 -1

ziprc.py CHANGED Viewed

@@ -342,13 +342,27 @@ class ZIPRCSampler:
             }
             yield OpenAIObject(chunk_dict)
         yield OpenAIObject({
             "id": chat_id,
             "object": "chat.completion.chunk",
             "created": created_ts,
             "model": model_name,
             "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
-            "zip_rc": {"action": "finished"}
         })
     def generate_stream(self, prompt, max_new_tokens=512, initial_samples=2):

             }
             yield OpenAIObject(chunk_dict)
+        # Calculate Final Best Answer (clean from swaps/backtracks)
+        # Include running candidates in case max_tokens was hit before EOS
+        all_trajs = finished_trajectories + candidates
+        best_traj = self.select_best_trajectory(all_trajs)
+        final_answer = ""
+        if best_traj:
+             # Decode only the generated response (exclude prompt)
+             prompt_len = input_ids.shape[1]
+             final_ids = best_traj['ids'][0][prompt_len:]
+             final_answer = self.model.tokenizer.decode(final_ids, skip_special_tokens=True)
         yield OpenAIObject({
             "id": chat_id,
             "object": "chat.completion.chunk",
             "created": created_ts,
             "model": model_name,
             "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+            "zip_rc": {
+                "action": "finished",
+                "final_text": final_answer
+            }
         })
     def generate_stream(self, prompt, max_new_tokens=512, initial_samples=2):