File size: 7,209 Bytes
3e8cddb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eb9c861
3e8cddb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# agents.py
import os, json, textwrap, pathlib
from typing import Dict, Any, Tuple
from openai import OpenAI

from files_process import load_input_text

# ---------- Instructions ----------

def build_case_study_instructions() -> str:
    return textwrap.dedent("""\
    You are a senior medical writer specializing in AI in healthcare.
    Using ONLY the provided INPUT (no fabrication), produce a professional case study in **Markdown**.

    STRICT REQUIREMENTS (cite inline in prose, not as links):
    - Use established reporting guidance:
      • CARE case reports for completeness/transparency (CARE, 2013/2017).
      • CONSORT-AI (trial reports) and SPIRIT-AI (protocols) where applicable (2020).
      • TRIPOD+AI (2024) for prediction model reporting (discrimination, calibration, validation).
      • HIPAA de-identification Safe Harbor: state that 18 identifiers are removed or avoided; avoid re-identification risk.
      • FDA AI/ML SaMD perspective: risk controls, monitoring, change management.
    - If a section lacks data, write “Not specified.”
    - Use a neutral, clinical tone (avoid marketing fluff).
    - Prefer short paragraphs and tables where appropriate.

    MANDATORY SECTIONS (use these exact headings):
    # Title
    ## Executive Summary
    - 3–6 bullets

    ## Clinical Context & Problem
    ## Patient/Population & Setting
    ## Data Sources & Governance
    - Provenance; access; quality checks; de-identification approach (HIPAA Safe Harbor)
    - Security/compliance: HIPAA (and GDPR if applicable)

    ## AI/ML Approach
    - Task definition; target(s)
    - Features/data preparation
    - Model(s); training/validation split; external validation if any
    - Fairness/bias checks

    ## Evaluation & Metrics if provided
    - Classification/regression metrics
    - Calibration; confidence estimation
    - Clinical outcomes (if available)
    - Reference TRIPOD+AI for what to report

    ## Workflow Integration & Safety
    - Human oversight; failure modes; alerting
    - Monitoring & model updates (FDA SaMD AI/ML perspective)

    ## Results & Impact
    - Clinical impact; operational efficiency; ROI/costs where applicable

    ## Ethics & Bias Mitigation
    ## Regulatory, Privacy & Security
    - HIPAA/GDPR; access controls; audit
    ## Limitations & Generalizability
    ## Conclusion

    LENGTH: aim 4,000–5,800 words.

    OUTPUT: Valid Markdown only.
    DO NOT include code fences around the Markdown.
    """)

def build_manager_instructions() -> str:
    return textwrap.dedent("""\
    You are the manager reviewing three case study drafts on AI in healthcare.

    TASKS:
    1) Rate each draft on a 1–10 scale for:
       - Clinical completeness (CARE)
       - AI reporting rigor (TRIPOD+AI)
       - Trial/protocol framing where relevant (CONSORT-AI / SPIRIT-AI)
       - Privacy & regulatory correctness (HIPAA Safe Harbor; FDA SaMD)
       - Clarity & structure
    2) Briefly justify each rating (1–3 sentences).
    3) Pick a **single winner** among the three drafts (best overall).

    OUTPUT JSON (strict):
    {
      "scores": [
        {"agent": "agent1", "clinical_completeness": int, "ai_rigor": int, "trial_framing": int, "privacy_regulatory": int, "clarity_structure": int, "justification": "..."},
        {"agent": "agent2", "clinical_completeness": int, "ai_rigor": int, "trial_framing": int, "privacy_regulatory": int, "clarity_structure": int, "justification": "..."},
        {"agent": "agent3", "clinical_completeness": int, "ai_rigor": int, "trial_framing": int, "privacy_regulatory": int, "clarity_structure": int, "justification": "..."}
      ],
      "winner": "agent1|agent2|agent3"
    }

    IMPORTANT:
    - Return only JSON.
    """)

# ---------- Agent calls (OpenAI SDK) ----------

def _openai_client() -> OpenAI:
    return OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

def call_openai_case_study(input_text: str, model: str = "gpt-4o-mini") -> str:
    client = _openai_client()
    system = build_case_study_instructions()
    prompt = f"INPUT:\n{input_text}\n\nFollow the instructions strictly."
    resp = client.responses.create(
        model=model,
        instructions=system,
        input=prompt,
        temperature=0.3,
    )
    return resp.output_text.strip()

def call_gemini_case_study(input_text: str, model: str = "gpt-4.1-nano") -> str:
    client = _openai_client()
    system = build_case_study_instructions()
    prompt = f"INPUT:\n{input_text}\n\nFollow the instructions strictly."
    resp = client.responses.create(
        model=model,
        instructions=system,
        input=prompt,
        temperature=0.3,
    )
    return resp.output_text.strip()

def call_deepseek_case_study(input_text: str, model: str = "gpt-4.1-mini") -> str:
    client = _openai_client()
    system = build_case_study_instructions()
    prompt = f"INPUT:\n{input_text}\n\nFollow the instructions strictly."
    resp = client.responses.create(
        model=model,
        instructions=system,
        input=prompt,
        temperature=0.3,
    )
    return resp.output_text.strip()

def call_openai_manager(agent1: str, agent2: str, agent3: str,
                        model: str = "gpt-4o") -> Dict[str, Any]:
    client = _openai_client()
    manager_instr = build_manager_instructions()
    payload = {
        "agent1_draft": agent1,
        "agent2_draft": agent2,
        "agent3_draft": agent3,
    }
    resp = client.responses.create(
        model=model,
        instructions=manager_instr,
        input=json.dumps(payload),
        temperature=0.2,
        #response_format={"type": "json_object"},
    )
    raw = resp.output_text
    try:
        return json.loads(raw)
    except Exception as e:
        # Fallback JSON slice
        s, e2 = raw.find("{"), raw.rfind("}")
        if s != -1 and e2 != -1 and e2 > s:
            return json.loads(raw[s:e2+1])
        raise RuntimeError(f"Manager returned non-JSON: {raw}") from e

# ---------- Orchestration ----------

def run_pipeline(file: str,
                 oai_model: str = "gpt-4o-mini",
                 gem_model: str = "gpt-4.1-nano",
                 ds_model: str = "gpt-4.1-mini") -> Dict[str, Any]:
    """
    - Reads text (string or path) with load_input_text
    - Calls three agents
    - Saves their drafts as agent1.md / agent2.md / agent3.md
    - Calls manager and returns its JSON (scores + winner)
    """
    source_text = load_input_text(file)

    print("Generating case studies with three agents...")
    print("Generating Agent 1 Output...")
    a1 = call_openai_case_study(source_text, model=oai_model)
    print("Generating Agent 2 Output...")
    a2 = call_gemini_case_study(source_text, model=gem_model)
    print("Generating Agent 3 Output...")
    a3 = call_deepseek_case_study(source_text, model=ds_model)

    pathlib.Path("agent1.md").write_text(a1, encoding="utf-8")
    pathlib.Path("agent2.md").write_text(a2, encoding="utf-8")
    pathlib.Path("agent3.md").write_text(a3, encoding="utf-8")
    print("Saved agent outputs to agent1.md, agent2.md, agent3.md")

    print("Manager evaluating...")
    result = call_openai_manager(a1, a2, a3)
    return result