Model input/output examples: five models comparison

#!/usr/bin/env python3
"""Build a Google-Docs-ready comparison of 5 models' input/output from JSONL files."""

import json
import os

BASE = os.path.dirname(os.path.abspath(__file__))

def load_by_row_indices(path, indices_wanted):
    out = {}
    with open(path) as f:
        for line in f:
            row = json.loads(line)
            ri = row.get("row_index")
            if ri in indices_wanted and ri not in out:
                out[ri] = row
            if len(out) >= len(indices_wanted):
                break
    return out

def get_input(row):
    return (row.get("input_text") or row.get("prompt") or row.get("summary_text") or "").strip()

def get_output(row):
    return (row.get("generated_text") or row.get("prediction") or "").strip()

def main():
    indices = [0, 2, 3]

    vllm = load_by_row_indices(os.path.join(BASE, "vllm_model_result/vllm_inference_320_en_only_srcCov_v5.jsonl"), indices)
    gpt5 = load_by_row_indices(os.path.join(BASE, "gpt5mini-nano_inference/gpt5_inference_gpt-5_20260302_201653.jsonl"), indices)
    gpt5mini = load_by_row_indices(os.path.join(BASE, "gpt5mini-nano_inference/gpt5_inference_gpt-5-mini_20260213_025254_cleaned_by_verified_combined_0-80_clean200.jsonl"), indices)
    gpt5nano = load_by_row_indices(os.path.join(BASE, "gpt5mini-nano_inference/gpt5_inference_gpt-5-nano_20260213_025254_cleaned_by_verified_combined_0-80_clean200.jsonl"), indices)
    qwen4b = load_by_row_indices(os.path.join(BASE, "vllm_model_result/qwen3-4b-instruct-base-result.jsonl"), indices)

    models = [
        ("vllm_inference_320 (trained RL)", vllm),
        ("gpt-5", gpt5),
        ("gpt-5-mini", gpt5mini),
        ("gpt-5-nano", gpt5nano),
        ("qwen3-4B-instruct (base, no RL)", qwen4b),
    ]

    # Build HTML for Google Docs (paste into doc)
    html_lines = [
        "<h1>Model input/output examples: five models comparison</h1>",
        "<p><strong>Models:</strong> (1) vllm_inference_320 — your trained RL model; (2) gpt-5; (3) gpt-5-mini; (4) gpt-5-nano; (5) qwen3-4B-instruct — base without RL.</p>",
        "<p>Task: simplified medical/summary text (low health literacy style).</p>",
        "<p><em>Note: Example 3 — GPT-5-mini and GPT-5-nano were run on a subset; their row_index 3 may refer to a different case than the other three models.</em></p>",
        "<hr>",
    ]

    for ex_num, ri in enumerate(indices, 1):
        inp = get_input(vllm[ri])
        html_lines.append(f'<h2>Example {ex_num}</h2>')
        html_lines.append("<p><strong>Input (source text):</strong></p>")
        html_lines.append(f"<p>{inp.replace(chr(10), '<br>')}</p>")
        html_lines.append("<p><strong>Outputs by model:</strong></p>")
        for label, data in models:
            if ri not in data:
                html_lines.append(f"<p><strong>{label}</strong>: — (no row for this index)</p>")
                continue
            out = get_output(data[ri])
            html_lines.append(f"<p><strong>{label}</strong></p>")
            html_lines.append(f"<p>{out.replace(chr(10), '<br>')}</p>")
        html_lines.append("<hr>")

    html_path = os.path.join(BASE, "model_comparison_for_google_doc.html")
    with open(html_path, "w", encoding="utf-8") as f:
        f.write("\n".join(html_lines))
    print("Wrote:", html_path)

    # Markdown version
    md_lines = [
        "# Model input/output examples: five models comparison",
        "",
        "**Models:** (1) vllm_inference_320 — trained RL model; (2) gpt-5; (3) gpt-5-mini; (4) gpt-5-nano; (5) qwen3-4B-instruct — base without RL.",
        "",
        "Task: simplified medical/summary text (low health literacy style).",
        "",
        "*Note: Example 3 — GPT-5-mini and GPT-5-nano were run on a subset; their row_index 3 may refer to a different case.*",
        "",
        "---",
        "",
    ]
    for ex_num, ri in enumerate(indices, 1):
        inp = get_input(vllm[ri])
        md_lines.append(f"## Example {ex_num}")
        md_lines.append("")
        md_lines.append("**Input (source text):**")
        md_lines.append("")
        md_lines.append(inp)
        md_lines.append("")
        md_lines.append("**Outputs by model:**")
        md_lines.append("")
        for label, data in models:
            if ri not in data:
                md_lines.append(f"- **{label}:** — (no row for this index)")
                continue
            out = get_output(data[ri])
            md_lines.append(f"- **{label}:**")
            md_lines.append("  " + out.replace("\n", " "))
            md_lines.append("")
        md_lines.append("---")
        md_lines.append("")

    md_path = os.path.join(BASE, "model_comparison_for_google_doc.md")
    with open(md_path, "w", encoding="utf-8") as f:
        f.write("\n".join(md_lines))
    print("Wrote:", md_path)

if __name__ == "__main__":
    main()