dev-strender commited on
Commit
c421c84
·
1 Parent(s): ed17f2a

Single-tab feedback UI (Good/NotBad/Critical); FT dedup tighter

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__/
app.py CHANGED
@@ -1,22 +1,17 @@
1
- """Chosun proofreading pipeline comparator — Gradio UI.
2
 
3
- Tabs:
4
- 1. 비교 (Comparator): 2-column A (Baseline 251231) vs B (🏆 v16)
5
- 2. 블라인드 테스트: label-hidden pairwise preference voting, Supabase-backed
6
  """
7
 
8
  import os
9
- from collections.abc import Iterator
10
- from concurrent.futures import ThreadPoolExecutor, as_completed
11
 
12
  import gradio as gr
13
- from diff_utils import highlight_diff
14
  from dotenv import load_dotenv
15
  from openai import OpenAI
16
- from pipelines import PIPELINES, list_prompts, run_pipeline
17
  from postprocess import load_vocabulary
18
 
19
- from blindtest import build_blindtest_tab
20
 
21
  load_dotenv()
22
 
@@ -27,163 +22,27 @@ _vocab_path = os.path.join(os.path.dirname(__file__), "data", "vocabulary.csv")
27
  vocabulary = load_vocabulary(_vocab_path)
28
 
29
 
30
- _PENDING_TEXT = "⏳ 실행 중..."
31
- _SLOTS = ("A", "B")
32
-
33
-
34
- def _build_state(
35
- input_text: str,
36
- results: dict[str, dict | None],
37
- ) -> tuple:
38
- """Build 2-pipeline output tuple: 2 outputs + 2 diffs + meta."""
39
- outs = [
40
- results[s]["output"] if results[s] else _PENDING_TEXT for s in _SLOTS
41
- ]
42
- diffs = [
43
- highlight_diff(input_text, results[s]["output"]) if results[s] else ""
44
- for s in _SLOTS
45
- ]
46
-
47
- def _label(result: dict | None, default: str) -> str:
48
- if not result:
49
- return default
50
- t = f"{result['processing_time']:.1f}s"
51
- errs = result.get("step_errors") or []
52
- if errs:
53
- t += f" ({len(errs)} err: {', '.join(errs[:2])}{'...' if len(errs) > 2 else ''})"
54
- return t
55
-
56
- times = [_label(results[s], "실행 중...") for s in _SLOTS]
57
- meta = " | ".join(f"Pipeline {s}: {t}" for s, t in zip(_SLOTS, times))
58
-
59
- return (*outs, *diffs, meta)
60
-
61
-
62
- def compare(
63
- input_text: str,
64
- pipe_a: str, model_a: str, prompt_a: str,
65
- pipe_b: str, model_b: str, prompt_b: str,
66
- ) -> Iterator[tuple]:
67
- """Run 2 pipelines concurrently and yield partial results as each finishes."""
68
- empty = tuple([""] * 4) + ("입력 텍스트를 입력해주세요.",)
69
-
70
- if not input_text or not input_text.strip():
71
- yield empty
72
- return
73
-
74
- if not client:
75
- yield tuple([""] * 4) + (
76
- "UPSTAGE_API_KEY 환경변수가 설정되지 않았습니다. .env 파일을 확인해주세요.",
77
- )
78
- return
79
-
80
- results: dict[str, dict | None] = {s: None for s in _SLOTS}
81
- yield _build_state(input_text, results)
82
-
83
- configs = {
84
- "A": (pipe_a, model_a, prompt_a),
85
- "B": (pipe_b, model_b, prompt_b),
86
- }
87
-
88
- with ThreadPoolExecutor(max_workers=2) as executor:
89
- futures = {
90
- executor.submit(
91
- run_pipeline, input_text, pipe, model, prompt, client, vocabulary
92
- ): slot
93
- for slot, (pipe, model, prompt) in configs.items()
94
- }
95
-
96
- for fut in as_completed(futures):
97
- slot = futures[fut]
98
- try:
99
- result = fut.result()
100
- except Exception as exc:
101
- result = {"output": f"에러: {exc}", "processing_time": 0.0}
102
- results[slot] = result
103
- yield _build_state(input_text, results)
104
-
105
-
106
- # --- UI ---
107
-
108
- pipeline_choices = list(PIPELINES.keys())
109
- model_choices = ["solar-pro2", "solar-pro3"]
110
- prompt_choices = list_prompts() or ["prod_251231"]
111
 
112
 
113
- def _default_prompt(preferred_prefix: str, fallback_index: int) -> str:
114
  matches = [p for p in prompt_choices if p.startswith(preferred_prefix)]
115
  if matches:
116
  return matches[-1]
117
- return prompt_choices[fallback_index]
118
 
119
 
120
- _default_prompt_a = _default_prompt("prod_251231", 0)
121
- _default_prompt_b = _default_prompt("dev_260408_v16", -1)
122
 
123
  with gr.Blocks(title="Chosun 교정교열 데모") as demo:
124
  gr.Markdown("# Chosun 교정교열 데모")
 
125
 
126
- with gr.Tabs():
127
- with gr.Tab("블라인드 테스트"):
128
- build_blindtest_tab(
129
- client=client,
130
- vocabulary=vocabulary,
131
- baseline_config=("251231_default", "solar-pro2", _default_prompt_a),
132
- candidate_config=("260408_v16", "solar-pro3", _default_prompt_b),
133
- )
134
-
135
- with gr.Tab("파이프라인 비교"):
136
- gr.Markdown("Baseline(251231 Pro2×3)과 신규 v16(Pro3×1)을 나란히 비교합니다.")
137
-
138
- input_text = gr.Textbox(
139
- label="원문 입력",
140
- lines=8,
141
- placeholder="교정할 텍스트를 입력하세요.",
142
- )
143
-
144
- with gr.Row():
145
- with gr.Column():
146
- gr.Markdown("### A (Baseline 251231)")
147
- pipe_a = gr.Dropdown(pipeline_choices, value="251231_default", label="파이프라인")
148
- model_a = gr.Dropdown(model_choices, value="solar-pro2", label="모델")
149
- prompt_a = gr.Dropdown(prompt_choices, value=_default_prompt_a, label="프롬프트")
150
- with gr.Column():
151
- gr.Markdown("### B (🏆 v16)")
152
- pipe_b = gr.Dropdown(pipeline_choices, value="260408_v16", label="파이프라인")
153
- model_b = gr.Dropdown(model_choices, value="solar-pro3", label="모델")
154
- prompt_b = gr.Dropdown(prompt_choices, value=_default_prompt_b, label="프롬프트")
155
-
156
- btn = gr.Button(
157
- "비교 실행 (⌘+Enter / Ctrl+Enter)",
158
- variant="primary",
159
- elem_id="compare-run-btn",
160
- )
161
-
162
- with gr.Row():
163
- output_a = gr.Textbox(label="A 결과", lines=10)
164
- output_b = gr.Textbox(label="B 결과", lines=10)
165
-
166
- with gr.Accordion("Diff 상세 비교", open=True):
167
- gr.Markdown("#### 원문 vs A")
168
- diff_orig_a_html = gr.HTML()
169
- gr.Markdown("#### 원문 vs B")
170
- diff_orig_b_html = gr.HTML()
171
-
172
- meta_info = gr.Textbox(label="실행 정보", interactive=False)
173
-
174
- btn.click(
175
- fn=compare,
176
- inputs=[
177
- input_text,
178
- pipe_a, model_a, prompt_a,
179
- pipe_b, model_b, prompt_b,
180
- ],
181
- outputs=[
182
- output_a, output_b,
183
- diff_orig_a_html, diff_orig_b_html,
184
- meta_info,
185
- ],
186
- )
187
 
188
  _SHORTCUT_JS = """
189
  () => {
 
1
+ """Chosun proofreading demo — Gradio UI.
2
 
3
+ Single tab: solar-pro3 v16 pipeline + Good/Not Bad/Critical feedback.
 
 
4
  """
5
 
6
  import os
 
 
7
 
8
  import gradio as gr
 
9
  from dotenv import load_dotenv
10
  from openai import OpenAI
11
+ from pipelines import list_prompts
12
  from postprocess import load_vocabulary
13
 
14
+ from blindtest import build_feedback_tab
15
 
16
  load_dotenv()
17
 
 
22
  vocabulary = load_vocabulary(_vocab_path)
23
 
24
 
25
+ prompt_choices = list_prompts() or ["dev_260408_v16"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
 
28
+ def _default_prompt(preferred_prefix: str, fallback: str) -> str:
29
  matches = [p for p in prompt_choices if p.startswith(preferred_prefix)]
30
  if matches:
31
  return matches[-1]
32
+ return fallback
33
 
34
 
35
+ _v16_prompt = _default_prompt("dev_260408_v16", prompt_choices[-1])
 
36
 
37
  with gr.Blocks(title="Chosun 교정교열 데모") as demo:
38
  gr.Markdown("# Chosun 교정교열 데모")
39
+ gr.Markdown("solar-pro3 기반 교정 결과를 확인하고 피드백을 남겨주세요.")
40
 
41
+ build_feedback_tab(
42
+ client=client,
43
+ vocabulary=vocabulary,
44
+ pipeline_config=("260408_v16", "solar-pro3", _v16_prompt),
45
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  _SHORTCUT_JS = """
48
  () => {
blindtest/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
- """Blind test module — pairwise preference voting, Supabase-backed."""
2
 
3
- from .ui import build_blindtest_tab
4
 
5
- __all__ = ["build_blindtest_tab"]
 
1
+ """Feedback module — single-output Good/Not Bad/Critical rating, Supabase-backed."""
2
 
3
+ from .ui import build_feedback_tab
4
 
5
+ __all__ = ["build_feedback_tab"]
blindtest/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (407 Bytes)
 
blindtest/__pycache__/db.cpython-312.pyc DELETED
Binary file (6.09 kB)
 
blindtest/__pycache__/ui.cpython-312.pyc DELETED
Binary file (12 kB)
 
blindtest/db.py CHANGED
@@ -147,6 +147,39 @@ def save_task(
147
  return None
148
 
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  def save_vote(task_id: int | None, choice: str, comment: str) -> bool:
151
  if not is_configured() or task_id is None:
152
  return False
 
147
  return None
148
 
149
 
150
+ def save_rating(pipeline_run_id: int | None, rating: str, comment: str) -> bool:
151
+ """Save Good / Not Bad / Critical rating for a single pipeline run."""
152
+ if not is_configured() or pipeline_run_id is None:
153
+ return False
154
+ try:
155
+ _post(
156
+ "ratings",
157
+ {
158
+ "pipeline_run_id": pipeline_run_id,
159
+ "rating": rating,
160
+ "comment": comment or None,
161
+ },
162
+ )
163
+ return True
164
+ except Exception as exc:
165
+ _record_error("save_rating", exc)
166
+ return False
167
+
168
+
169
+ def fetch_rating_counts() -> dict[str, int]:
170
+ if not is_configured():
171
+ return {}
172
+ try:
173
+ rows = _get("ratings", {"select": "rating"})
174
+ counts: dict[str, int] = {}
175
+ for row in rows:
176
+ counts[row["rating"]] = counts.get(row["rating"], 0) + 1
177
+ return counts
178
+ except Exception as exc:
179
+ _record_error("fetch_rating_counts", exc)
180
+ return {}
181
+
182
+
183
  def save_vote(task_id: int | None, choice: str, comment: str) -> bool:
184
  if not is_configured() or task_id is None:
185
  return False
blindtest/schema.sql CHANGED
@@ -38,12 +38,24 @@ create table if not exists votes (
38
 
39
  create index if not exists votes_task_idx on votes (task_id);
40
 
 
 
 
 
 
 
 
 
 
 
 
41
  -- Allow anon key (used by the Gradio app) to insert/select.
42
  -- RLS is enabled by default on public tables; without policies, anon writes silently fail.
43
  alter table articles enable row level security;
44
  alter table pipeline_runs enable row level security;
45
  alter table tasks enable row level security;
46
  alter table votes enable row level security;
 
47
 
48
  create policy "anon insert articles" on articles for insert to anon with check (true);
49
  create policy "anon select articles" on articles for select to anon using (true);
@@ -53,6 +65,8 @@ create policy "anon insert tasks" on tasks for insert to anon wi
53
  create policy "anon select tasks" on tasks for select to anon using (true);
54
  create policy "anon insert votes" on votes for insert to anon with check (true);
55
  create policy "anon select votes" on votes for select to anon using (true);
 
 
56
 
57
  -- Aggregation view: winner is identified by pipeline_key (un-blinded).
58
  create or replace view vote_summary as
 
38
 
39
  create index if not exists votes_task_idx on votes (task_id);
40
 
41
+ -- Single-output rating table (current UI: Good / Not Bad / Critical).
42
+ create table if not exists ratings (
43
+ id bigserial primary key,
44
+ pipeline_run_id bigint references pipeline_runs(id) on delete cascade,
45
+ rating text not null check (rating in ('good','not_bad','critical')),
46
+ comment text,
47
+ rated_at timestamptz default now()
48
+ );
49
+
50
+ create index if not exists ratings_run_idx on ratings (pipeline_run_id);
51
+
52
  -- Allow anon key (used by the Gradio app) to insert/select.
53
  -- RLS is enabled by default on public tables; without policies, anon writes silently fail.
54
  alter table articles enable row level security;
55
  alter table pipeline_runs enable row level security;
56
  alter table tasks enable row level security;
57
  alter table votes enable row level security;
58
+ alter table ratings enable row level security;
59
 
60
  create policy "anon insert articles" on articles for insert to anon with check (true);
61
  create policy "anon select articles" on articles for select to anon using (true);
 
65
  create policy "anon select tasks" on tasks for select to anon using (true);
66
  create policy "anon insert votes" on votes for insert to anon with check (true);
67
  create policy "anon select votes" on votes for select to anon using (true);
68
+ create policy "anon insert ratings" on ratings for insert to anon with check (true);
69
+ create policy "anon select ratings" on ratings for select to anon using (true);
70
 
71
  -- Aggregation view: winner is identified by pipeline_key (un-blinded).
72
  create or replace view vote_summary as
blindtest/ui.py CHANGED
@@ -1,20 +1,16 @@
1
- """Gradio UI for pairwise blind-test voting.
2
 
3
  Flow:
4
- 1. User enters source text and clicks "생성".
5
- 2. Both pipelines run in parallel; randomized assignment decides which
6
- becomes slot A vs slot B in the UI.
7
- 3. Outputs are shown with neutral labels and a diff against the source.
8
- 4. User picks A / B / 비슷 / 둘 다 나쁨 (+ optional comment) and submits.
9
- 5. On submit: vote is saved, the real pipeline identity is revealed, and
10
- the running tally is refreshed.
11
  """
12
 
13
  from __future__ import annotations
14
 
15
- import random
16
  import time
17
- from concurrent.futures import ThreadPoolExecutor
18
  from typing import Any
19
 
20
  import gradio as gr
@@ -26,202 +22,142 @@ from . import db
26
  PipelineConfig = tuple[str, str, str] # (pipeline_key, model, prompt_key)
27
 
28
 
29
- def _run_one(
30
- client: Any,
31
- vocabulary: list[dict],
32
- text: str,
33
- config: PipelineConfig,
34
- ) -> dict:
35
- pipeline_key, model, prompt_key = config
36
- start = time.time()
37
- try:
38
- result = run_pipeline(text, pipeline_key, model, prompt_key, client, vocabulary)
39
- result.setdefault("processing_time", time.time() - start)
40
- return result
41
- except Exception as exc: # pragma: no cover
42
- return {"output": f"에러: {exc}", "processing_time": time.time() - start}
43
-
44
-
45
- def _format_summary(summary: list[dict], counts: dict[str, int]) -> str:
46
  total = sum(counts.values())
47
  if total == 0:
48
- return "아직 투표가 없습니다."
49
- lines = [f"**총 투표**: {total}"]
50
- if summary:
51
- lines.append("\n**파이프라인별 승수 (A/B 선택만 집계)**")
52
- for row in sorted(summary, key=lambda r: -r.get("wins", 0)):
53
- lines.append(f"- `{row['winner_pipeline']}`: {row['wins']}")
54
- tie = counts.get("tie", 0)
55
- bad = counts.get("both_bad", 0)
56
- if tie or bad:
57
- lines.append(f"\n비슷: {tie} · 둘 다 나쁨: {bad}")
58
- return "\n".join(lines)
59
-
60
-
61
- def build_blindtest_tab(
62
  client: Any,
63
  vocabulary: list[dict],
64
- baseline_config: PipelineConfig,
65
- candidate_config: PipelineConfig,
66
  ) -> None:
67
- """Build the blind-test Gradio tab. Must be called inside a gr.Blocks/Tab."""
68
-
69
- configured = db.is_configured()
70
 
71
- gr.Markdown(
72
- "두 교정 결과 중 어느 쪽이 더 낫다고 느끼는지 익명으로 투표합니다. "
73
- "라벨은 제출 후 공개됩니다."
74
- )
75
 
76
- # Hidden state: which config is in slot A (True = baseline, False = candidate)
77
- slot_a_is_baseline = gr.State(True)
78
- task_id_state = gr.State(None)
79
- run_a_id_state = gr.State(None)
80
- run_b_id_state = gr.State(None)
81
 
82
  input_text = gr.Textbox(
83
  label="원문 입력",
84
- lines=6,
85
  placeholder="교정할 텍스트를 입력하세요.",
86
  )
87
- generate_btn = gr.Button("A / B 생성", variant="primary")
 
 
 
 
 
88
 
89
  status = gr.Markdown("")
90
 
 
 
 
 
91
  with gr.Row():
92
- with gr.Column():
93
- gr.Markdown("### 결과 A")
94
- output_a = gr.Textbox(label="교정 결과 A", lines=10, interactive=False)
95
- diff_a = gr.HTML(label="원문 대비 diff A")
96
- with gr.Column():
97
- gr.Markdown("### 결과 B")
98
- output_b = gr.Textbox(label="교정 결과 B", lines=10, interactive=False)
99
- diff_b = gr.HTML(label="원문 대비 diff B")
100
-
101
- gr.Markdown("### 어느 쪽이 더 낫나요?")
102
- with gr.Row():
103
- vote_a = gr.Button("A 가 낫다", variant="primary")
104
- vote_b = gr.Button("B 가 낫다", variant="primary")
105
- vote_tie = gr.Button("비슷하다")
106
- vote_bad = gr.Button("둘 다 나쁘다")
107
 
108
  comment = gr.Textbox(label="코멘트 (선택)", lines=2)
109
-
110
- reveal = gr.Markdown("")
111
  summary_md = gr.Markdown("")
112
 
113
- def _on_generate(text: str):
114
  if not text or not text.strip():
115
  return (
116
  gr.update(value="입력 텍스트가 비어있습니다."),
117
- gr.update(), gr.update(), gr.update(), gr.update(),
118
- True, None, None, None,
 
119
  gr.update(value=""),
120
  )
121
  if client is None:
122
  return (
123
  gr.update(value="UPSTAGE_API_KEY 미설정."),
124
- gr.update(), gr.update(), gr.update(), gr.update(),
125
- True, None, None, None,
 
126
  gr.update(value=""),
127
  )
128
 
129
- # Randomize which config gets slot A
130
- a_is_baseline = random.random() < 0.5
131
- cfg_slot_a = baseline_config if a_is_baseline else candidate_config
132
- cfg_slot_b = candidate_config if a_is_baseline else baseline_config
 
 
 
 
 
 
 
133
 
134
- with ThreadPoolExecutor(max_workers=2) as ex:
135
- fut_a = ex.submit(_run_one, client, vocabulary, text, cfg_slot_a)
136
- fut_b = ex.submit(_run_one, client, vocabulary, text, cfg_slot_b)
137
- res_a = fut_a.result()
138
- res_b = fut_b.result()
139
 
140
- article_id = db.save_article(text) if configured else None
141
- # Persist runs using the TRUE pipeline key (not slot label)
142
- run_slot_a_id = db.save_pipeline_run(
143
- article_id,
144
- pipeline_key=cfg_slot_a[0],
145
- prompt_key=cfg_slot_a[2],
146
- model=cfg_slot_a[1],
147
- output=res_a["output"],
148
- processing_time_s=float(res_a.get("processing_time", 0.0)),
149
- ) if configured else None
150
- run_slot_b_id = db.save_pipeline_run(
151
  article_id,
152
- pipeline_key=cfg_slot_b[0],
153
- prompt_key=cfg_slot_b[2],
154
- model=cfg_slot_b[1],
155
- output=res_b["output"],
156
- processing_time_s=float(res_b.get("processing_time", 0.0)),
157
- ) if configured else None
158
- task_id = db.save_task(article_id, run_slot_a_id, run_slot_b_id) if configured else None
159
-
160
- diff_a_html = highlight_diff(text, res_a["output"])
161
- diff_b_html = highlight_diff(text, res_b["output"])
162
 
163
  return (
164
- gr.update(value=f"생성 완료 · A {res_a.get('processing_time', 0):.1f}s · B {res_b.get('processing_time', 0):.1f}s"),
165
- gr.update(value=res_a["output"]),
166
- gr.update(value=res_b["output"]),
167
- gr.update(value=diff_a_html),
168
- gr.update(value=diff_b_html),
169
- a_is_baseline, task_id, run_slot_a_id, run_slot_b_id,
170
- gr.update(value=""), # clear reveal
171
  )
172
 
173
- generate_btn.click(
174
- _on_generate,
175
  inputs=[input_text],
176
- outputs=[
177
- status,
178
- output_a, output_b,
179
- diff_a, diff_b,
180
- slot_a_is_baseline, task_id_state, run_a_id_state, run_b_id_state,
181
- reveal,
182
- ],
183
  )
184
 
185
- def _make_vote_handler(choice: str):
186
- def handler(task_id, a_is_baseline, comment_text):
187
- saved = db.save_vote(task_id, choice, comment_text) if configured else False
188
- if a_is_baseline:
189
- label_a = baseline_config[0]
190
- label_b = candidate_config[0]
191
- else:
192
- label_a = candidate_config[0]
193
- label_b = baseline_config[0]
194
- reveal_md = (
195
- f"**공개** — A: `{label_a}` · B: `{label_b}`\n\n"
196
- f"선택: **{choice}**"
197
- )
198
- summary = _format_summary(db.fetch_summary(), db.fetch_vote_counts())
199
- return gr.update(value=reveal_md), gr.update(value=summary)
200
  return handler
201
 
202
- vote_a.click(
203
- _make_vote_handler("A"),
204
- inputs=[task_id_state, slot_a_is_baseline, comment],
205
- outputs=[reveal, summary_md],
206
- )
207
- vote_b.click(
208
- _make_vote_handler("B"),
209
- inputs=[task_id_state, slot_a_is_baseline, comment],
210
- outputs=[reveal, summary_md],
211
  )
212
- vote_tie.click(
213
- _make_vote_handler("tie"),
214
- inputs=[task_id_state, slot_a_is_baseline, comment],
215
- outputs=[reveal, summary_md],
216
  )
217
- vote_bad.click(
218
- _make_vote_handler("both_bad"),
219
- inputs=[task_id_state, slot_a_is_baseline, comment],
220
- outputs=[reveal, summary_md],
221
  )
222
 
223
  refresh_btn = gr.Button("집계 새로고침", size="sm")
224
  refresh_btn.click(
225
- lambda: gr.update(value=_format_summary(db.fetch_summary(), db.fetch_vote_counts())),
226
  outputs=[summary_md],
227
  )
 
1
+ """Gradio UI for single-output proofreading feedback.
2
 
3
  Flow:
4
+ 1. User enters source text and clicks "교정 실행".
5
+ 2. The configured pipeline runs.
6
+ 3. Output + diff are shown.
7
+ 4. User picks Good / Not Bad / Critical (+ optional comment).
8
+ 5. On submit: rating is saved to Supabase ratings table.
 
 
9
  """
10
 
11
  from __future__ import annotations
12
 
 
13
  import time
 
14
  from typing import Any
15
 
16
  import gradio as gr
 
22
  PipelineConfig = tuple[str, str, str] # (pipeline_key, model, prompt_key)
23
 
24
 
25
+ def _format_summary(counts: dict[str, int]) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  total = sum(counts.values())
27
  if total == 0:
28
+ return "아직 피드백이 없습니다."
29
+ g = counts.get("good", 0)
30
+ n = counts.get("not_bad", 0)
31
+ c = counts.get("critical", 0)
32
+ return (
33
+ f"**총 피드백**: {total}\n\n"
34
+ f"- Good: **{g}**\n"
35
+ f"- Not Bad: **{n}**\n"
36
+ f"- Critical: **{c}**"
37
+ )
38
+
39
+
40
+ def build_feedback_tab(
 
41
  client: Any,
42
  vocabulary: list[dict],
43
+ pipeline_config: PipelineConfig,
 
44
  ) -> None:
45
+ """Build the single-pipeline feedback UI. Call inside a gr.Blocks/Tab."""
 
 
46
 
47
+ pipeline_key, model, prompt_key = pipeline_config
 
 
 
48
 
49
+ pipeline_run_id_state = gr.State(None)
 
 
 
 
50
 
51
  input_text = gr.Textbox(
52
  label="원문 입력",
53
+ lines=8,
54
  placeholder="교정할 텍스트를 입력하세요.",
55
  )
56
+
57
+ run_btn = gr.Button(
58
+ "교정 실행 (⌘+Enter / Ctrl+Enter)",
59
+ variant="primary",
60
+ elem_id="compare-run-btn",
61
+ )
62
 
63
  status = gr.Markdown("")
64
 
65
+ output = gr.Textbox(label="교정 결과", lines=12, interactive=False)
66
+ diff_html = gr.HTML(label="원문 대비 diff")
67
+
68
+ gr.Markdown("### 피드백")
69
  with gr.Row():
70
+ rate_good = gr.Button("👍 Good", variant="primary")
71
+ rate_notbad = gr.Button("🆗 Not Bad")
72
+ rate_critical = gr.Button("🚨 Critical", variant="stop")
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  comment = gr.Textbox(label="코멘트 (선택)", lines=2)
75
+ rating_status = gr.Markdown("")
 
76
  summary_md = gr.Markdown("")
77
 
78
+ def _on_run(text: str):
79
  if not text or not text.strip():
80
  return (
81
  gr.update(value="입력 텍스트가 비어있습니다."),
82
+ gr.update(value=""),
83
+ gr.update(value=""),
84
+ None,
85
  gr.update(value=""),
86
  )
87
  if client is None:
88
  return (
89
  gr.update(value="UPSTAGE_API_KEY 미설정."),
90
+ gr.update(value=""),
91
+ gr.update(value=""),
92
+ None,
93
  gr.update(value=""),
94
  )
95
 
96
+ start = time.time()
97
+ try:
98
+ result = run_pipeline(text, pipeline_key, model, prompt_key, client, vocabulary)
99
+ except Exception as exc:
100
+ return (
101
+ gr.update(value=f"에러: {exc}"),
102
+ gr.update(value=""),
103
+ gr.update(value=""),
104
+ None,
105
+ gr.update(value=""),
106
+ )
107
 
108
+ elapsed = result.get("processing_time", time.time() - start)
109
+ out_text = result.get("output", "")
 
 
 
110
 
111
+ article_id = db.save_article(text)
112
+ run_id = db.save_pipeline_run(
 
 
 
 
 
 
 
 
 
113
  article_id,
114
+ pipeline_key=pipeline_key,
115
+ prompt_key=prompt_key,
116
+ model=model,
117
+ output=out_text,
118
+ processing_time_s=float(elapsed),
119
+ )
 
 
 
 
120
 
121
  return (
122
+ gr.update(value=f"완료 · {elapsed:.1f}s"),
123
+ gr.update(value=out_text),
124
+ gr.update(value=highlight_diff(text, out_text)),
125
+ run_id,
126
+ gr.update(value=""),
 
 
127
  )
128
 
129
+ run_btn.click(
130
+ _on_run,
131
  inputs=[input_text],
132
+ outputs=[status, output, diff_html, pipeline_run_id_state, rating_status],
 
 
 
 
 
 
133
  )
134
 
135
+ def _make_rating_handler(rating: str):
136
+ def handler(run_id, comment_text):
137
+ saved = db.save_rating(run_id, rating, comment_text)
138
+ note = "✅ 피드백 저장됨" if saved else "⚠️ 저장되지 않았습니다 (먼저 교정 실행 후 피드백을 남겨주세요)"
139
+ summary = _format_summary(db.fetch_rating_counts())
140
+ return gr.update(value=note), gr.update(value=summary)
 
 
 
 
 
 
 
 
 
141
  return handler
142
 
143
+ rate_good.click(
144
+ _make_rating_handler("good"),
145
+ inputs=[pipeline_run_id_state, comment],
146
+ outputs=[rating_status, summary_md],
 
 
 
 
 
147
  )
148
+ rate_notbad.click(
149
+ _make_rating_handler("not_bad"),
150
+ inputs=[pipeline_run_id_state, comment],
151
+ outputs=[rating_status, summary_md],
152
  )
153
+ rate_critical.click(
154
+ _make_rating_handler("critical"),
155
+ inputs=[pipeline_run_id_state, comment],
156
+ outputs=[rating_status, summary_md],
157
  )
158
 
159
  refresh_btn = gr.Button("집계 새로고침", size="sm")
160
  refresh_btn.click(
161
+ lambda: gr.update(value=_format_summary(db.fetch_rating_counts())),
162
  outputs=[summary_md],
163
  )
pipelines.py CHANGED
@@ -857,23 +857,33 @@ def _process_single_bulk(
857
  )
858
  # FT model returns raw text (no JSON). Other steps return JSON with "output" field.
859
  if is_ft_model:
860
- # FT duplication guard: certain short inputs (e.g. those starting with
861
- # a dash `-` or bullet) trigger the FT model to echo its input twice
862
- # in sequence. If the response is ≥1.5× the input length and the input
863
- # appears as the leading prefix, strip everything after the first
864
- # occurrence to recover the intended single-pass output.
865
- if response and len(response) >= len(bulk) * 1.5:
866
- # Look for the input (or a light variant) appearing twice
 
867
  stripped = response.strip()
 
 
 
 
 
 
 
 
 
 
 
868
  first_end = len(stripped) // 2
869
  head = stripped[:first_end].rstrip()
870
  tail = stripped[first_end:].strip()
871
- # If the two halves are ~identical (>=0.8 char-level equality),
872
- # or one is a substring of the other, keep only the first half.
873
  if head and (head == tail or head in tail or tail in head):
874
  return head
875
- # Fallback: if length-duplicated but halves differ, return input
876
- # unchanged rather than propagating a duplicated mess downstream.
877
  return bulk
878
  return response
879
  # Fallback to original_text when LLM returns non-JSON hallucination — matches
 
857
  )
858
  # FT model returns raw text (no JSON). Other steps return JSON with "output" field.
859
  if is_ft_model:
860
+ # FT duplication guard. Symptoms seen in the wild:
861
+ # (a) Half-half echo: model returns input twice (length ~2×).
862
+ # (b) Prefix echo: model returns full input verbatim then appends a
863
+ # partial re-correction tail (length ~1.2–1.5×).
864
+ # Both yield a downstream "duplicated paragraph" bug, so we trigger the
865
+ # guard at 1.25× and try several recovery patterns before falling back
866
+ # to the original input.
867
+ if response and len(response) >= len(bulk) * 1.25:
868
  stripped = response.strip()
869
+ bulk_stripped = bulk.strip()
870
+
871
+ # (b) Output starts with full input verbatim → strip the appended tail.
872
+ if (
873
+ bulk_stripped
874
+ and stripped.startswith(bulk_stripped)
875
+ and len(stripped) > len(bulk_stripped) * 1.05
876
+ ):
877
+ return bulk_stripped
878
+
879
+ # (a) Half-half echo.
880
  first_end = len(stripped) // 2
881
  head = stripped[:first_end].rstrip()
882
  tail = stripped[first_end:].strip()
 
 
883
  if head and (head == tail or head in tail or tail in head):
884
  return head
885
+
886
+ # Unknown duplication pattern safer to return the input.
887
  return bulk
888
  return response
889
  # Fallback to original_text when LLM returns non-JSON hallucination — matches