File size: 4,431 Bytes
e7fd66f
 
81da2d5
 
196a48f
81da2d5
 
 
e7fd66f
 
 
 
 
 
 
 
 
 
 
 
196a48f
 
 
 
 
 
e7fd66f
 
 
 
 
81da2d5
 
 
 
 
e7fd66f
 
 
 
 
 
 
 
 
 
 
 
 
 
7a28b9f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196a48f
 
 
 
 
7a28b9f
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
from __future__ import annotations

from inference.response_clean import (
    prepare_display_reply,
    reply_ends_complete_sentence,
    strip_reasoning_output,
    strip_thinking_blocks,
)

_RT_OPEN = "<" + "redacted_thinking" + ">"
_RT_CLOSE = "</" + "redacted_thinking" + ">"
_THINK_OPEN = "<" + "think" + ">"
_THINK_CLOSE = "</" + "think" + ">"


def test_strips_redacted_thinking_block():
    raw = f"{_RT_OPEN}\nplanning...\n{_RT_CLOSE}\n\nThe capital of France is Paris."
    assert strip_reasoning_output(raw) == "The capital of France is Paris."


def test_strips_unclosed_redacted_thinking_block():
    raw = f"{_RT_OPEN}\nplanning without a final answer that never closes"
    assert strip_reasoning_output(raw) == ""
    assert strip_thinking_blocks(raw) == ""


def test_strips_think_block():
    raw = f"{_THINK_OPEN}\nplanning...\n{_THINK_CLOSE}\n\nAgents use memory [1]."
    assert strip_reasoning_output(raw) == "Agents use memory [1]."


def test_strip_thinking_blocks_preserves_json_payload():
    raw = f"{_THINK_OPEN}\nplanning...\n{_THINK_CLOSE}\n\n{{\"title\": \"T\"}}"
    assert strip_thinking_blocks(raw) == '{"title": "T"}'


def test_strips_malformed_think_prefix_and_extracts_summary():
    raw = """think> We need to summarize the document. First, identify sources.

Let's draft:

Summary: This review covers AI agent applications, evaluation, and future work [1]."""
    out = strip_reasoning_output(raw)
    assert out.startswith("This review covers")
    assert "We need to summarize" not in out


def test_preserves_normal_answer():
    text = "AI agents combine perception, planning, and action [1]."
    assert strip_reasoning_output(text) == text


def test_extracts_final_answer_from_plain_chain_of_thought():
    raw = """First, I need to explain finetuning in plain language. I should keep it concise.

Let me draft:
1. Finetuning adjusts a model for a task.
2. Best practices include good data.

Final answer:

Finetuning small model adjusts a model to improve its performance on a specific task.
For example, fine-tuning a language model can enhance its ability to understand complex queries.
Best practices include using diverse and high-quality data.

That's about 3 sentences. I think it covers it.

Let me write:

Finetuning small model involves training the model with additional data to specialize in a task.
For instance, fine-tuning a computer vision model can improve its object"""
    out = strip_reasoning_output(raw)
    assert out.startswith("Finetuning small model adjusts")
    assert "First, I need" not in out
    assert "Let me draft" not in out
    assert "That's about 3 sentences" not in out


def test_prepare_display_reply_collapses_plain_chain_of_thought():
    raw = """First, I need to plan the answer.

Final answer:

Finetuning teaches a small model to specialize on your task using extra training data."""
    out = prepare_display_reply(raw)
    assert out.startswith(_THINK_OPEN)
    assert _THINK_CLOSE in out
    assert "Finetuning teaches a small model" in out
    assert "First, I need to plan" in out


def test_extracts_labeled_sentence_draft():
    raw = """First, the user wants me to explain finetuning.

Let me outline my response:
1. Start with a simple definition.

Now, write the response:

Sentence 1: Finetuning is training a small model to improve its performance on a specific task, like recognizing objects in photos.

Sentence 2: For example, a model might be fine-tuned on a dataset of medical scans to detect tumors more accurately.

That's two sentences. I can add one more if needed.

Sentence 3: This process enhances efficiency and reduces overfitting.

So, three"""
    out = strip_reasoning_output(raw)
    assert "Finetuning is training a small model" in out
    assert "medical scans" in out
    assert "enhances efficiency" in out
    assert "First, the user" not in out
    assert "Sentence 1:" not in out


def test_reply_ends_complete_sentence():
    assert reply_ends_complete_sentence("Finetuning teaches a small model to specialize.")
    assert not reply_ends_complete_sentence("The lesson aims to teach how to fine-tune small")


def test_prepare_display_reply_wraps_malformed_think_prefix():
    raw = "think> We need to plan the answer.\n\nThe answer is 42."
    out = prepare_display_reply(raw)
    assert out.startswith(_THINK_OPEN)
    assert _THINK_CLOSE in out
    assert "We need to plan the answer." in out