#!/usr/bin/env python3 """Build a Google-Docs-ready comparison of 5 models' input/output from JSONL files.""" import json import os BASE = os.path.dirname(os.path.abspath(__file__)) def load_by_row_indices(path, indices_wanted): out = {} with open(path) as f: for line in f: row = json.loads(line) ri = row.get("row_index") if ri in indices_wanted and ri not in out: out[ri] = row if len(out) >= len(indices_wanted): break return out def get_input(row): return (row.get("input_text") or row.get("prompt") or row.get("summary_text") or "").strip() def get_output(row): return (row.get("generated_text") or row.get("prediction") or "").strip() def main(): indices = [0, 2, 3] vllm = load_by_row_indices(os.path.join(BASE, "vllm_model_result/vllm_inference_320_en_only_srcCov_v5.jsonl"), indices) gpt5 = load_by_row_indices(os.path.join(BASE, "gpt5mini-nano_inference/gpt5_inference_gpt-5_20260302_201653.jsonl"), indices) gpt5mini = load_by_row_indices(os.path.join(BASE, "gpt5mini-nano_inference/gpt5_inference_gpt-5-mini_20260213_025254_cleaned_by_verified_combined_0-80_clean200.jsonl"), indices) gpt5nano = load_by_row_indices(os.path.join(BASE, "gpt5mini-nano_inference/gpt5_inference_gpt-5-nano_20260213_025254_cleaned_by_verified_combined_0-80_clean200.jsonl"), indices) qwen4b = load_by_row_indices(os.path.join(BASE, "vllm_model_result/qwen3-4b-instruct-base-result.jsonl"), indices) models = [ ("vllm_inference_320 (trained RL)", vllm), ("gpt-5", gpt5), ("gpt-5-mini", gpt5mini), ("gpt-5-nano", gpt5nano), ("qwen3-4B-instruct (base, no RL)", qwen4b), ] # Build HTML for Google Docs (paste into doc) html_lines = [ "

Model input/output examples: five models comparison

", "

Models: (1) vllm_inference_320 — your trained RL model; (2) gpt-5; (3) gpt-5-mini; (4) gpt-5-nano; (5) qwen3-4B-instruct — base without RL.

", "

Task: simplified medical/summary text (low health literacy style).

", "

Note: Example 3 — GPT-5-mini and GPT-5-nano were run on a subset; their row_index 3 may refer to a different case than the other three models.

", "
", ] for ex_num, ri in enumerate(indices, 1): inp = get_input(vllm[ri]) html_lines.append(f'

Example {ex_num}

') html_lines.append("

Input (source text):

") html_lines.append(f"

{inp.replace(chr(10), '
')}

") html_lines.append("

Outputs by model:

") for label, data in models: if ri not in data: html_lines.append(f"

{label}: — (no row for this index)

") continue out = get_output(data[ri]) html_lines.append(f"

{label}

") html_lines.append(f"

{out.replace(chr(10), '
')}

") html_lines.append("
") html_path = os.path.join(BASE, "model_comparison_for_google_doc.html") with open(html_path, "w", encoding="utf-8") as f: f.write("\n".join(html_lines)) print("Wrote:", html_path) # Markdown version md_lines = [ "# Model input/output examples: five models comparison", "", "**Models:** (1) vllm_inference_320 — trained RL model; (2) gpt-5; (3) gpt-5-mini; (4) gpt-5-nano; (5) qwen3-4B-instruct — base without RL.", "", "Task: simplified medical/summary text (low health literacy style).", "", "*Note: Example 3 — GPT-5-mini and GPT-5-nano were run on a subset; their row_index 3 may refer to a different case.*", "", "---", "", ] for ex_num, ri in enumerate(indices, 1): inp = get_input(vllm[ri]) md_lines.append(f"## Example {ex_num}") md_lines.append("") md_lines.append("**Input (source text):**") md_lines.append("") md_lines.append(inp) md_lines.append("") md_lines.append("**Outputs by model:**") md_lines.append("") for label, data in models: if ri not in data: md_lines.append(f"- **{label}:** — (no row for this index)") continue out = get_output(data[ri]) md_lines.append(f"- **{label}:**") md_lines.append(" " + out.replace("\n", " ")) md_lines.append("") md_lines.append("---") md_lines.append("") md_path = os.path.join(BASE, "model_comparison_for_google_doc.md") with open(md_path, "w", encoding="utf-8") as f: f.write("\n".join(md_lines)) print("Wrote:", md_path) if __name__ == "__main__": main()