import os, json
def return_promptst(reference_summary, generated_summary, subclaims_json, difficulty_level):
    prompt=f'''
            **SYSTEM / ROLE INSTRUCTION:**
            You are a **medical readability evaluator**.
            Your task is to judge whether omitted subclaims (those with `"result": 0"`) from a generated summary are *reasonably omitted* based on the intended **readability level**: *easy*, *intermediate*, or *hard*.
            You evaluate this from the standpoint of clarity, faithfulness, and readability goals.

            ---

            ### **READABILITY GUIDELINES**

            | Level            | Target Audience                          | Content Expectation                                             | Technical Detail Allowed                                         |
            | :--------------- | :--------------------------------------- | :-------------------------------------------------------------- | :--------------------------------------------------------------- |
            | **Easy**         | General public                           | Focus on main events, outcomes, and diagnoses in plain Spanish. | Minimal — avoid measurements, anatomy, and test results.         |
            | **Intermediate** | Educated lay readers or medical students | Include key findings and procedures in simplified form.         | Moderate — basic terms and causes allowed.                       |
            | **Hard**         | Medical professionals                    | Retain most technical information and precision.                | High — measurements, anatomy, and test interpretations expected. |

            ---

            ### **INPUT FIELDS**

            **Reference summary:**
            {reference_summary}

            **Generated summary ({difficulty_level}):**
            {generated_summary}

            **Subclaims and results:**
            {subclaims_json}

            ---

            ### **TASK INSTRUCTIONS**

            1. Focus on subclaims with `"result": 0"` (not supported by the generated summary).
            2. For each omitted subclaim:

            * Decide whether omission is **reasonable** given the readability level.
            * Label as: `"yes"`, `"no"`, or `"borderline"`.
            * Write a brief justification (1–2 sentences).
            3. After individual evaluations, assign a **reasonableness score (0–5)** using this scale:

            * **5** = All omissions appropriate for target readability.
            * **4** = Minor omissions could improve completeness.
            * **3** = Some omissions reduce understanding or medical clarity.
            * **2** = Many important omissions harm faithfulness.
            * **1** = Major omissions misrepresent case.
            * **0** = Summary fails to reflect key medical information.
            4. End with an **overall explanation (3–5 sentences)** describing:

            * The main reasoning behind the score.
            * Whether the summary fits its intended readability level.
            * Suggestions for improvement if needed.

            ---

            ### **OUTPUT FORMAT (strict JSON)**

            ```json
            {{
            "evaluation_table": [
                {{
                "id": <subclaim_id>,
                "subclaim": "<text>",
                "reasonable_omission": "<yes | no | borderline>",
                "explanation": "<short reason>"
                }}
            ],
            "reasonableness_score": <0-5>,
            "overall_explanation": "<concise paragraph>"
            }}
            ```
            '''
    return prompt

from openai import OpenAI

file_path = "/home/mshahidul/api_new.json"
with open(file_path, "r") as file:
    api_keys = json.load(file)

openai_api_key = api_keys.get("openai")

client = OpenAI(api_key=openai_api_key)
def openai_return(prompt):
    response = client.chat.completions.create(
        model="gpt-5-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt}
        ]
    )
    cleaned_response = response.choices[0].message.content.strip().replace("```json", "").replace("```", "")
    return json.loads(cleaned_response)

import json
file_path = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json"

with open(file_path, 'r') as f:
    synthetic_data = json.load(f)

file_path_qwen3_32B = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json"

with open(file_path_qwen3_32B, 'r') as f:
    qwen3_32B_results = json.load(f)

# dict_keys(['id', 'full_text', 'ref_summary', 'readability_versions'])
# print(f"Full text: {synthetic_data[0]['full_text']}")
res=[]
save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/resonability_check_100_gpt5.json"
if os.path.exists(save_path):
    with open(save_path, 'r') as f:
        res = json.load(f)
print(f"Resuming from {len(res)} entries")
import tqdm
for ind in tqdm.tqdm(range(len(res),100)):
    print(f"Processing index: {ind}")
    for version in ["easy", "intermediate", "hard"]:
        ref_summary = (f"{synthetic_data[ind]['ref_summary']['text']}")
        generated_summary = (f"{synthetic_data[ind]['readability_versions'][version]['text']}")
        subclaims_results = (f"{qwen3_32B_results[ind]['completeness']['results']}")
        try:
            prompt = return_promptst(ref_summary, generated_summary, subclaims_results, version)
            res.append({
                "id": synthetic_data[ind]['id'],
                "difficulty_level": version,
                "prompt": openai_return(prompt)
            })
            if len(res)%2==0:
                print(f"Completed {len(res)} out of 300")
                with open(save_path, 'w') as outfile:
                    json.dump(res, outfile, indent=2)
        except Exception as e:
            print(f"Error at {ind} {version}: {e}")
        # print(prompt)
        # assert False
with open(save_path, 'w') as outfile:
    json.dump(res, outfile, indent=2)