{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "311c1d16", "metadata": {}, "outputs": [], "source": [ "# /home/mshahidul/readctrl/data/annotators_validate_data\n", "import os\n", "print(os.listdir('/home/mshahidul/readctrl/data/annotators_validate_data')[:3])\n", "all_folders = os.listdir('/home/mshahidul/readctrl/data/annotators_validate_data')\n", "print(os.listdir(f'/home/mshahidul/readctrl/data/annotators_validate_data/{all_folders[0]}'))\n", "file_path = f'/home/mshahidul/readctrl/data/annotators_validate_data/{all_folders[0]}/annotation_results.json'\n", "import json\n", "with open(file_path, 'r') as f:\n", " data = json.load(f)\n", "print(data[0].keys())" ] }, { "cell_type": "code", "execution_count": null, "id": "ea2f9f3b", "metadata": {}, "outputs": [], "source": [ "(all_folders)" ] }, { "cell_type": "code", "execution_count": null, "id": "08ae6eaa", "metadata": {}, "outputs": [], "source": [ "import os\n", "import json\n", "import pandas as pd\n", "from collections import Counter\n", "\n", "# Configuration\n", "input_dir = '/home/mshahidul/readctrl/data/annotators_validate_data'\n", "output_dir = '/home/mshahidul/readctrl/data/final_result'\n", "output_file = os.path.join(output_dir, 'consolidated_ratings.json')\n", "\n", "# 1. Create the output directory if it doesn't exist\n", "if not os.path.exists(output_dir):\n", " os.makedirs(output_dir, exist_ok=True)\n", "\n", "all_data = []\n", "\n", "# 2. Collect data from all folders\n", "folders = [f for f in os.listdir(input_dir) if os.path.isdir(os.path.join(input_dir, f))]\n", "avg = []\n", "for folder in folders:\n", " json_path = os.path.join(input_dir, folder, 'annotation_results.json')\n", " if os.path.exists(json_path):\n", " with open(json_path, 'r') as f:\n", " try:\n", " entries = json.load(f)\n", " if len(entries) <=3 :\n", " # print(f\"No entries found in {json_path}, skipping.\")\n", " avg.append(len(entries))\n", " avg\n", " for item in entries:\n", " all_data.append({\n", " 'doc_id': item.get('doc_id'),\n", " 'health_literacy_label': item.get('health_literacy_label'),\n", " 'rating': item.get('doc_rating')\n", " })\n", " except Exception as e:\n", " print(f\"Skipping error in {json_path}: {e}\")\n", "\n", "# 3. Process data\n", "df = pd.DataFrame(all_data)\n", "\n", "# Ensure we drop rows where any of our keys or the rating are missing\n", "df = df.dropna(subset=['doc_id', 'health_literacy_label', 'rating'])\n", "\n", "# 4. Aggregation Logic using both doc_id and health_literacy_label\n", "def get_mode(series):\n", " # Returns the most common rating for this specific doc + literacy level\n", " return Counter(series).most_common(1)[0][0]\n", "\n", "# Grouping by the composite key\n", "summary = df.groupby(['doc_id', 'health_literacy_label'])['rating'].agg([\n", " ('num_annotations', 'count'),\n", " ('mean_rating', 'mean'),\n", " ('consensus_rating', get_mode),\n", " ('rating_distribution', lambda x: list(x))\n", "]).reset_index()\n", "\n", "# 5. Save to JSON\n", "# orient='records' creates a list of dictionaries\n", "summary.to_json(output_file, orient='records', indent=4)\n", "\n", "print(f\"Success! Processed {len(summary)} unique (doc_id, literacy_label) pairs.\")\n", "print(f\"File saved at: {output_file}\")\n", "\n", "# Preview the first few entries\n", "print(summary.head())" ] }, { "cell_type": "code", "execution_count": null, "id": "75197961", "metadata": {}, "outputs": [], "source": [ "import os\n", "import json\n", "import pandas as pd\n", "from collections import Counter\n", "\n", "# Configuration\n", "input_dir = '/home/mshahidul/readctrl/data/annotators_validate_data'\n", "output_dir = '/home/mshahidul/readctrl/data/final_result'\n", "output_file_match = os.path.join(output_dir, 'consolidated_ratings.json')\n", "output_file_mismatch = os.path.join(output_dir, 'mismatched_ratings.json')\n", "\n", "if not os.path.exists(output_dir):\n", " os.makedirs(output_dir, exist_ok=True)\n", "\n", "all_data = []\n", "folders = [f for f in os.listdir(input_dir) if os.path.isdir(os.path.join(input_dir, f))]\n", "\n", "# 1. Collect data\n", "for folder in folders:\n", " json_path = os.path.join(input_dir, folder, 'annotation_results.json')\n", " if os.path.exists(json_path):\n", " with open(json_path, 'r') as f:\n", " try:\n", " entries = json.load(f)\n", " for item in entries:\n", " all_data.append({\n", " 'doc_id': item.get('doc_id'),\n", " 'health_literacy_label': item.get('health_literacy_label'),\n", " 'rating': item.get('doc_rating')\n", " })\n", " except Exception as e:\n", " print(f\"Skipping error in {json_path}: {e}\")\n", "\n", "df = pd.DataFrame(all_data).dropna(subset=['doc_id', 'health_literacy_label', 'rating'])\n", "\n", "# 2. Aggregation Logic\n", "def get_mode(series):\n", " return Counter(series).most_common(1)[0][0]\n", "\n", "summary = df.groupby(['doc_id', 'health_literacy_label'])['rating'].agg([\n", " ('num_annotations', 'count'),\n", " ('mean_rating', 'mean'),\n", " ('consensus_rating', get_mode),\n", " ('rating_distribution', lambda x: list(x))\n", "]).reset_index()\n", "\n", "# 3. Validation Logic\n", "def check_match(row):\n", " label = row['health_literacy_label']\n", " rating = row['consensus_rating']\n", " \n", " if label == \"low_health_literacy\":\n", " return rating in [1, 2]\n", " elif label == \"intermediate_health_literacy\":\n", " return rating == 3\n", " elif label == \"proficient_health_literacy\":\n", " return rating in [4, 5]\n", " return False\n", "\n", "# Apply the check\n", "summary['is_match'] = summary.apply(check_match, axis=1)\n", "\n", "# 4. Split and Save\n", "matches = summary[summary['is_match'] == True].drop(columns=['is_match'])\n", "mismatches = summary[summary['is_match'] == False].drop(columns=['is_match'])\n", "\n", "matches.to_json(output_file_match, orient='records', indent=4)\n", "mismatches.to_json(output_file_mismatch, orient='records', indent=4)\n", "\n", "print(f\"Success!\")\n", "print(f\"Matching entries saved: {len(matches)} -> {output_file_match}\")\n", "print(f\"Mismatched entries saved: {len(mismatches)} -> {output_file_mismatch}\")\n", "\n", "if not mismatches.empty:\n", " print(\"\\nPreview of Mismatches:\")\n", " print(mismatches.head())" ] }, { "cell_type": "code", "execution_count": null, "id": "e8773257", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "3f1f1045", "metadata": {}, "outputs": [], "source": [ "min(avg), max(avg), sum(avg)/len(avg)" ] }, { "cell_type": "code", "execution_count": null, "id": "877ebaac", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import json\n", "\n", "# 1. Load your consolidated JSON file\n", "file_path = '/home/mshahidul/readctrl/data/final_result/consolidated_ratings.json'\n", "with open(file_path, 'r') as f:\n", " data = json.load(f)\n", "\n", "df = pd.DataFrame(data)\n", "\n", "# 2. Define the \"OK\" logic function\n", "def check_if_ok(row):\n", " label = str(row['health_literacy_label']).lower()\n", " rating = row['consensus_rating']\n", " \n", " if label == 'low_health_literacy':\n", " return 1 if rating in [1, 2] else 0\n", " elif label == 'intermediate_health_literacy':\n", " return 1 if rating == 3 else 0\n", " elif label == 'proficient_health_literacy':\n", " return 1 if rating in [4, 5] else 0\n", " return 0\n", "\n", "# 3. Apply logic and calculate stats\n", "df['is_ok'] = df.apply(check_if_ok, axis=1)\n", "\n", "# Group by literacy label to see performance\n", "stats = df.groupby('health_literacy_label')['is_ok'].agg(['count', 'sum']).reset_index()\n", "stats.columns = ['Literacy Level', 'Total Docs', 'Number OK']\n", "stats['Success Rate (%)'] = (stats['Number OK'] / stats['Total Docs'] * 100).round(2)\n", "\n", "print(\"--- Accuracy / Success Report ---\")\n", "print(stats)\n", "\n", "# 4. Total overall success\n", "total_docs = len(df)\n", "total_ok = df['is_ok'].sum()\n", "print(f\"\\nOverall Summary: {total_ok}/{total_docs} documents meet the literacy criteria ({round(total_ok/total_docs*100, 2)}%)\")" ] }, { "cell_type": "code", "execution_count": null, "id": "065399a1", "metadata": {}, "outputs": [], "source": [ "with open(\"/home/mshahidul/readctrl/data/annotators_validate_data/Sharmin Sultana_2025-12-31_14-19-30/annotation_results.json\", 'r') as f:\n", " data = json.load(f)\n", "print(data[0].keys())" ] }, { "cell_type": "code", "execution_count": null, "id": "5835ec3b", "metadata": {}, "outputs": [], "source": [ "import json\n", "import os\n", "import math\n", "\n", "# Define paths\n", "input_path = \"/home/mshahidul/readctrl/data/annotators_validate_data/Sharmin Sultana_2025-12-31_14-19-30/annotation_results.json\"\n", "output_path = \"/home/mshahidul/readctrl/data/annotators_validate_data/Sharmin Sultana_2025-12-31_14-19-30/annotation_results_rescaled.json\"\n", "\n", "def rescale_rating(val):\n", " if val is None:\n", " return None\n", " # Converts 1-10 to 1-5 (e.g., 10 becomes 5, 1 becomes 1)\n", " return math.ceil(val / 2)\n", "\n", "# Load data\n", "with open(input_path, 'r') as f:\n", " data = json.load(f)\n", "\n", "# Process ratings\n", "for entry in data:\n", " if 'doc_rating' in entry:\n", " entry['doc_rating'] = rescale_rating(entry['doc_rating'])\n", " if 'wiki_rating' in entry:\n", " entry['wiki_rating'] = rescale_rating(entry['wiki_rating'])\n", "\n", "# Save updated data\n", "with open(output_path, 'w') as f:\n", " json.dump(data, f, indent=4)\n", "\n", "print(f\"Successfully saved rescaled data to: {output_path}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "f5865b65", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "de9b3b2a", "metadata": {}, "outputs": [], "source": [ "# /home/mshahidul/readctrl/data/final_result/mismatched_ratings.json\n", "with open(\"/home/mshahidul/readctrl/data/final_result/mismatched_ratings.json\", 'r') as f:\n", " data = json.load(f)\n", "id=0\n", "index=data[id]['doc_id']\n", "label=data[id]['health_literacy_label']\n", "print(data[id])" ] }, { "cell_type": "code", "execution_count": null, "id": "e307543c", "metadata": {}, "outputs": [], "source": [ "# /home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_syn_data_with_gs_summary_en.json\n", "with open(\"/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_syn_data_with_gs_summary_en.json\", 'r') as f:\n", " data2 = json.load(f)\n", "src_lang=\"English\"\n", "summary=data2[index]['summary']\n", "fulltext=data2[index]['fulltext']\n", "gen_summary=data2[index]['diff_label_texts'][label]\n", "f=open(\"/home/mshahidul/readctrl/prompts/syn_data_gen_diff_label.txt\",\"r\").read()\n", "txt=f.replace(\"<<>>\",src_lang).replace(\"<<>>\",summary).replace(\"<<>>\",fulltext)\n", "print(txt)" ] }, { "cell_type": "code", "execution_count": null, "id": "59c72954", "metadata": {}, "outputs": [], "source": [ "print(gen_summary)" ] }, { "cell_type": "code", "execution_count": null, "id": "4b2a2595", "metadata": {}, "outputs": [], "source": [ "# /home/mshahidul/readctrl/data/final_result/consolidated_ratings_edit.json\n", "import json\n", "with open(\"/home/mshahidul/readctrl/data/final_result/consolidated_ratings_edit.json\", 'r') as f:\n", " data = json.load(f)\n", "print(data[0].keys())" ] }, { "cell_type": "code", "execution_count": null, "id": "46f54097", "metadata": {}, "outputs": [], "source": [ "set([x[\"health_literacy_label\"] for x in data])" ] }, { "cell_type": "code", "execution_count": null, "id": "9b1264ea", "metadata": {}, "outputs": [], "source": [ "# /home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_syn_data_with_gs_summary_en.json\n", "with open(\"/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_syn_data_with_gs_summary_en.json\", 'r') as f:\n", " data2 = json.load(f)\n", "print(data2[0].keys())\n", "print(data2[0]['diff_label_texts'].keys())" ] }, { "cell_type": "markdown", "id": "d847270d", "metadata": {}, "source": [ "## Step 0: Prepare Your Dataset" ] }, { "cell_type": "code", "execution_count": null, "id": "44047dbb", "metadata": {}, "outputs": [], "source": [ "import json\n", "\n", "# 1. Load the datasets\n", "with open(\"/home/mshahidul/readctrl/data/final_result/consolidated_ratings_edit.json\", 'r') as f:\n", " ratings_data = json.load(f)\n", "ratings_data=ratings_data[7:]\n", "with open(\"/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_syn_data_with_gs_summary_en.json\", 'r') as f:\n", " text_data = json.load(f)\n", "\n", "# 2. Updated mapping: Store the whole item or specific keys for fulltext and summary\n", "# We map the index to a dictionary containing the variations and the original full text/summary\n", "text_map = {\n", " item['index']: {\n", " 'variations': item['diff_label_texts'],\n", " 'fulltext': item.get('fulltext', \"\"),\n", " 'summary': item.get('summary', \"\")\n", " } \n", " for item in text_data\n", "}\n", "\n", "cleaned_data = []\n", "\n", "# 3. Iterate through ratings and extract data\n", "for entry in ratings_data:\n", " doc_id = entry['doc_id']\n", " label = entry['health_literacy_label']\n", " \n", " if doc_id in text_map:\n", " source_info = text_map[doc_id]\n", " \n", " # Retrieve the specific text version based on the label\n", " # .get() handles cases where a specific label might be missing\n", " labeled_text = source_info['variations'].get(label, \"\")\n", " \n", " # Construct the expanded object\n", " cleaned_data.append({\n", " \"doc_id\": doc_id,\n", " \"label\": label,\n", " \"gen_text\": labeled_text,\n", " \"fulltext\": source_info['fulltext'],\n", " \"gs_summary\": source_info['summary']\n", " })\n", "\n", "# 4. Output the clean JSON\n", "output_path = \"/home/mshahidul/readctrl/data/new_exp/cleaned_health_literacy_data.json\"\n", "with open(output_path, 'w') as f:\n", " json.dump(cleaned_data, f, indent=4, ensure_ascii=False)\n", "\n", "print(f\"Successfully processed {len(cleaned_data)} examples.\")" ] }, { "cell_type": "markdown", "id": "a1e6b0ae", "metadata": {}, "source": [ "## Step 1: Pick Few-Shot Examples" ] }, { "cell_type": "code", "execution_count": null, "id": "71e83ac8", "metadata": {}, "outputs": [], "source": [ "import json\n", "import requests\n", "from collections import defaultdict\n", "\n", "# Configuration\n", "API_URL = \"http://172.16.34.29:8004/v1/chat/completions\"\n", "MODEL_NAME = \"Qwen/Qwen3-30B-A3B-Instruct-2507\"\n", "INPUT_FILE = \"/home/mshahidul/readctrl/data/new_exp/cleaned_health_literacy_data.json\"\n", "OUTPUT_FILE = \"/home/mshahidul/readctrl/data/new_exp/few_shot_examples.json\"\n", "\n", "def get_text_metadata(text):\n", " \"\"\"Ask the LLM to identify the topic and medical complexity of a text.\"\"\"\n", " prompt = f\"\"\"Analyze the following medical text and provide a 1-word topic (e.g., Cardiology, Nutrition, Medication) and a 1-word complexity level (Simple, Moderate, Technical).\n", " Text: {text}...\n", " Format: Topic | Complexity\"\"\"\n", " \n", " try:\n", " response = requests.post(API_URL, json={\n", " \"model\": MODEL_NAME,\n", " \"messages\": [{\"role\": \"user\", \"content\": prompt}],\n", " \"temperature\": 0.1\n", " })\n", " return response.json()['choices'][0]['message']['content'].strip()\n", " except:\n", " return \"General | Unknown\"\n", "\n", "# 1. Load the cleaned data\n", "with open(INPUT_FILE, 'r') as f:\n", " data = json.load(f)\n", "\n", "# 2. Group data by label\n", "grouped_data = defaultdict(list)\n", "for item in data:\n", " grouped_data[item['label']].append(item)\n", "\n", "# 3. Select diverse examples for each label\n", "few_shot_selection = {}\n", "\n", "for label, examples in grouped_data.items():\n", " print(f\"Processing label: {label}...\")\n", " \n", " # Analyze a subset (or all) to find diversity\n", " scored_examples = []\n", " for ex in examples: \n", " metadata = get_text_metadata(ex['gen_text'])\n", " ex['metadata'] = metadata\n", " scored_examples.append(ex)\n", " \n", " # Heuristic: Sort by metadata to group similar topics, then pick spread-out indices\n", " scored_examples.sort(key=lambda x: x['metadata'])\n", " \n", " # Pick 5 examples spread across the sorted metadata for maximum diversity\n", " step = max(1, len(scored_examples) // 5)\n", " selected = scored_examples[::step][:5]\n", " few_shot_selection[label] = selected\n", "\n", "# 4. Save the result\n", "with open(OUTPUT_FILE, 'w') as f:\n", " json.dump(few_shot_selection, f, indent=4)\n", "\n", "print(f\"Few-shot examples saved to: {OUTPUT_FILE}\")" ] }, { "cell_type": "markdown", "id": "d48720a6", "metadata": {}, "source": [ "## Step 2: Decide on LLM(s)" ] }, { "cell_type": "markdown", "id": "4396ac94", "metadata": {}, "source": [ "### V1" ] }, { "cell_type": "code", "execution_count": null, "id": "f96d976b", "metadata": {}, "outputs": [], "source": [ "import json\n", "import requests\n", "\n", "# Configuration\n", "API_URL = \"http://172.16.34.29:8004/v1/chat/completions\"\n", "MODEL_NAME = \"Qwen/Qwen3-30B-A3B-Instruct-2507\"\n", "FEW_SHOT_FILE = \"/home/mshahidul/readctrl/data/new_exp/few_shot_examples.json\"\n", "\n", "# 1. Load the 15 selected examples\n", "with open(FEW_SHOT_FILE, 'r') as f:\n", " few_shot_data = json.load(f)\n", "\n", "def get_reasoning(fulltext, gen_text, label):\n", " \"\"\"Ask the LLM to explain why the text fits the label compared to the source context.\"\"\"\n", " prompt = f\"\"\"Compare the 'Target Text' to the 'Original Fulltext'. \n", "Explain why the Target Text fits the health literacy label: {label}.\n", "Focus on how vocabulary, jargon, and sentence structure were adapted.\n", "\n", "Original Fulltext: {fulltext}\n", "Target Text: {gen_text}\n", "Label: {label}\n", "\n", "Reasoning (1-2 sentences):\"\"\"\n", " \n", " try:\n", " response = requests.post(API_URL, json={\n", " \"model\": MODEL_NAME,\n", " \"messages\": [{\"role\": \"user\", \"content\": prompt}],\n", " \"temperature\": 0\n", " })\n", " return response.json()['choices'][0]['message']['content'].strip()\n", " except Exception as e:\n", " return \"Reasoning could not be generated.\"\n", "\n", "# 2. Build the few-shot string\n", "few_shot_string = \"\"\n", "\n", "for label in [\"low_health_literacy\", \"intermediate_health_literacy\", \"proficient_health_literacy\"]:\n", " examples = few_shot_data.get(label, [])\n", " for ex in examples:\n", " # Pass fulltext to the reasoning generator\n", " reason = get_reasoning(ex.get('fulltext', \"\"), ex['gen_text'], label)\n", " \n", " few_shot_string += f\"Original Fulltext: \\\"{ex.get('fulltext', '')}\\\"\\n\"\n", " few_shot_string += f\"Target Text: \\\"{ex['gen_text']}\\\"\\n\"\n", " few_shot_string += f\"Reasoning: {reason}\\n\"\n", " few_shot_string += f\"Label: {label}\\n\"\n", " few_shot_string += \"-\" * 30 + \"\\n\"\n", "\n", "# 3. Define the Final Prompt Structure\n", "instruction = \"\"\"You are an expert in health communication. Your task is to judge the health literacy level of a target text based on its original medical source.\n", "\n", "Classify the text into one of three categories:\n", "1. low_health_literacy: Uses common words (everyday language), very short sentences, and eliminates all medical jargon.\n", "2. intermediate_health_literacy: Uses some medical terms with explanation, standard sentence length, requires basic health knowledge.\n", "3. proficient_health_literacy: Uses high-level medical jargon, technical language, and academic or professional structures.\n", "\n", "### Few-Shot Examples:\n", "\"\"\"\n", "\n", "# 4. Save the prompt template\n", "# The placeholder now expects both fulltext and input_text\n", "final_prompt_template = (\n", " instruction + \n", " few_shot_string + \n", " \"\\n### Now judge this text:\\n\"\n", " \"Original Fulltext: \\\"{fulltext}\\\"\\n\"\n", " \"Target Text: \\\"{input_text}\\\"\\n\"\n", " \"Reasoning:\"\n", ")\n", "\n", "output_path = \"/home/mshahidul/readctrl/data/new_exp/final_prompt_template.txt\"\n", "with open(output_path, 'w') as f:\n", " f.write(final_prompt_template)\n", "\n", "print(f\"Prompt template with fulltext context saved to {output_path}\")" ] }, { "cell_type": "code", "execution_count": 2, "id": "3bc0564f", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3396\n" ] } ], "source": [ "# /home/mshahidul/readctrl/data/synthetic_dataset_diff_labels/syn_data_with_gs_summary_en.json\n", "import json\n", "with open(\"/home/mshahidul/readctrl/data/processed_test_raw_data/multiclinsum_test_en.json\", 'r') as f:\n", " data = json.load(f)\n", "print(len(data))" ] }, { "cell_type": "code", "execution_count": 4, "id": "882507f2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dict_keys(['id', 'fulltext', 'fulltext_subclaims', 'summary', 'summary_subclaims'])\n", "3396\n" ] } ], "source": [ "# /home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_syn_data_with_gs_summary_en.json\n", "import json\n", "with open(\"/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_multiclinsum_test_en_full.json\", 'r') as f:\n", " data = json.load(f)\n", "print(data[0].keys())\n", "print(len(data))" ] }, { "cell_type": "code", "execution_count": null, "id": "b0fcc380", "metadata": {}, "outputs": [], "source": [ "LOCAL_API_URL = \"http://172.16.34.29:8004/v1\"\n", "LOCAL_MODEL_NAME = \"/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-extraction-8b_ctx_fp16\"" ] }, { "cell_type": "code", "execution_count": 1, "id": "d8b235a6", "metadata": {}, "outputs": [ { "ename": "JSONDecodeError", "evalue": "Extra data: line 2 column 1 (char 22694)", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mJSONDecodeError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 4\u001b[39m\n\u001b[32m 2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mjson\u001b[39;00m\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(\u001b[33m\"\u001b[39m\u001b[33m/home/mshahidul/LLM_guard/CKA-Agent/results/single_run_20260203_213455/inter_result_sample_0.json\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mr\u001b[39m\u001b[33m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[32m----> \u001b[39m\u001b[32m4\u001b[39m data = \u001b[43mjson\u001b[49m\u001b[43m.\u001b[49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 5\u001b[39m \u001b[38;5;28mprint\u001b[39m(data[\u001b[32m0\u001b[39m].keys())\n\u001b[32m 6\u001b[39m \u001b[38;5;28mprint\u001b[39m(data[\u001b[32m0\u001b[39m][\u001b[33m'\u001b[39m\u001b[33minter_result\u001b[39m\u001b[33m'\u001b[39m])\n", "\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/un/lib/python3.11/json/__init__.py:293\u001b[39m, in \u001b[36mload\u001b[39m\u001b[34m(fp, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[39m\n\u001b[32m 274\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mload\u001b[39m(fp, *, \u001b[38;5;28mcls\u001b[39m=\u001b[38;5;28;01mNone\u001b[39;00m, object_hook=\u001b[38;5;28;01mNone\u001b[39;00m, parse_float=\u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[32m 275\u001b[39m parse_int=\u001b[38;5;28;01mNone\u001b[39;00m, parse_constant=\u001b[38;5;28;01mNone\u001b[39;00m, object_pairs_hook=\u001b[38;5;28;01mNone\u001b[39;00m, **kw):\n\u001b[32m 276\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Deserialize ``fp`` (a ``.read()``-supporting file-like object containing\u001b[39;00m\n\u001b[32m 277\u001b[39m \u001b[33;03m a JSON document) to a Python object.\u001b[39;00m\n\u001b[32m 278\u001b[39m \n\u001b[32m (...)\u001b[39m\u001b[32m 291\u001b[39m \u001b[33;03m kwarg; otherwise ``JSONDecoder`` is used.\u001b[39;00m\n\u001b[32m 292\u001b[39m \u001b[33;03m \"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m293\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mloads\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfp\u001b[49m\u001b[43m.\u001b[49m\u001b[43mread\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 294\u001b[39m \u001b[43m \u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobject_hook\u001b[49m\u001b[43m=\u001b[49m\u001b[43mobject_hook\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 295\u001b[39m \u001b[43m \u001b[49m\u001b[43mparse_float\u001b[49m\u001b[43m=\u001b[49m\u001b[43mparse_float\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparse_int\u001b[49m\u001b[43m=\u001b[49m\u001b[43mparse_int\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m 296\u001b[39m \u001b[43m \u001b[49m\u001b[43mparse_constant\u001b[49m\u001b[43m=\u001b[49m\u001b[43mparse_constant\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mobject_pairs_hook\u001b[49m\u001b[43m=\u001b[49m\u001b[43mobject_pairs_hook\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/un/lib/python3.11/json/__init__.py:346\u001b[39m, in \u001b[36mloads\u001b[39m\u001b[34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[39m\n\u001b[32m 341\u001b[39m s = s.decode(detect_encoding(s), \u001b[33m'\u001b[39m\u001b[33msurrogatepass\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m 343\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[32m 344\u001b[39m parse_int \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m parse_float \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[32m 345\u001b[39m parse_constant \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_pairs_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kw):\n\u001b[32m--> \u001b[39m\u001b[32m346\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_default_decoder\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 347\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 348\u001b[39m \u001b[38;5;28mcls\u001b[39m = JSONDecoder\n", "\u001b[36mFile \u001b[39m\u001b[32m~/miniconda3/envs/un/lib/python3.11/json/decoder.py:340\u001b[39m, in \u001b[36mJSONDecoder.decode\u001b[39m\u001b[34m(self, s, _w)\u001b[39m\n\u001b[32m 338\u001b[39m end = _w(s, end).end()\n\u001b[32m 339\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m end != \u001b[38;5;28mlen\u001b[39m(s):\n\u001b[32m--> \u001b[39m\u001b[32m340\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m JSONDecodeError(\u001b[33m\"\u001b[39m\u001b[33mExtra data\u001b[39m\u001b[33m\"\u001b[39m, s, end)\n\u001b[32m 341\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m obj\n", "\u001b[31mJSONDecodeError\u001b[39m: Extra data: line 2 column 1 (char 22694)" ] } ], "source": [ "# /home/mshahidul/LLM_guard/CKA-Agent/results/single_run_20260203_213455/inter_result_sample_0.json\n", "import json\n", "with open(\"/home/mshahidul/LLM_guard/CKA-Agent/results/single_run_20260203_213455/inter_result_sample_0.json\", 'r') as f:\n", " data = json.load(f)\n", "print(data[0].keys())\n", "print(data[0]['inter_result'])\n", "\n" ] }, { "cell_type": "markdown", "id": "74b07429", "metadata": {}, "source": [ "## V2" ] }, { "cell_type": "code", "execution_count": null, "id": "912f3d85", "metadata": {}, "outputs": [], "source": [ "import json\n", "import requests\n", "from openai import OpenAI\n", "\n", "# --- Configuration ---\n", "LOCAL_API_URL = \"http://172.16.34.29:8004/v1/chat/completions\"\n", "LOCAL_MODEL_NAME = \"Qwen/Qwen3-30B-A3B-Instruct-2507\"\n", "\n", "api_file = \"/home/mshahidul/api_new.json\"\n", "with open(api_file, \"r\") as f:\n", " api_keys = json.load(f)\n", "\n", "openai_client = OpenAI(api_key=api_keys[\"openai\"])\n", "OPENAI_MODEL_NAME = \"gpt-5\" # Note: Ensure your model version is correct\n", "\n", "FEW_SHOT_FILE = \"/home/mshahidul/readctrl/data/new_exp/few_shot_examples.json\"\n", "OUTPUT_PATH = \"/home/mshahidul/readctrl/data/new_exp/final_prompt_template.txt\"\n", "\n", "# --- Logic ---\n", "\n", "def get_reasoning(fulltext, gen_text, label, provider=\"local\"):\n", " \"\"\"\n", " Ask an LLM to explain why the text fits the label in JSON format.\n", " \"\"\"\n", " # Explicitly asking for JSON in the prompt\n", " prompt = f\"\"\"Compare the 'Target Text' to the 'Original Fulltext'. \n", "Explain why the Target Text fits the health literacy label: {label}.\n", "Focus on how vocabulary, jargon, and sentence structure were adapted.\n", "\n", "Original Fulltext: {fulltext}\n", "Target Text: {gen_text}\n", "Label: {label}\n", "\n", "Return your response ONLY as a JSON object with the following key:\n", "\"reasoning\": \"your 1-2 sentence explanation\"\n", "\"\"\"\n", "\n", " try:\n", " if provider == \"openai\":\n", " response = openai_client.chat.completions.create(\n", " model=OPENAI_MODEL_NAME,\n", " messages=[{\"role\": \"user\", \"content\": prompt}],\n", " response_format={ \"type\": \"json_object\" } # Force JSON for OpenAI\n", " )\n", " content = response.choices[0].message.content.strip()\n", " else:\n", " response = requests.post(LOCAL_API_URL, json={\n", " \"model\": LOCAL_MODEL_NAME,\n", " \"messages\": [{\"role\": \"user\", \"content\": prompt}],\n", " \"temperature\": 0\n", " })\n", " content = response.json()['choices'][0]['message']['content'].strip()\n", " \n", " # Parse JSON and extract reasoning\n", " data = json.loads(content)\n", " return data.get(\"reasoning\", \"Reasoning key not found.\")\n", " \n", " except Exception as e:\n", " print(f\"Error with {provider}: {e}\")\n", " return \"Reasoning could not be generated.\"\n", "\n", "# 1. Load the selected examples\n", "with open(FEW_SHOT_FILE, 'r') as f:\n", " few_shot_data = json.load(f)\n", "\n", "# 2. Build the few-shot string\n", "few_shot_string = \"\"\n", "REASONING_PROVIDER = \"openai\" \n", "\n", "print(f\"Generating reasoning using: {REASONING_PROVIDER}...\")\n", "info=[]\n", "for label in [\"low_health_literacy\", \"intermediate_health_literacy\", \"proficient_health_literacy\"]:\n", " examples = few_shot_data.get(label, [])\n", " for ex in examples:\n", " reason = get_reasoning(ex.get('fulltext', \"\"), ex['gen_text'], label, provider=REASONING_PROVIDER)\n", " \n", " # Adding structured few-shot examples to the string\n", " few_shot_string += f\"Original Fulltext: \\\"{ex.get('fulltext', '')}\\\"\\n\"\n", " few_shot_string += f\"Target Text: \\\"{ex['gen_text']}\\\"\\n\"\n", " few_shot_string += f\"Reasoning: {reason}\\n\"\n", " few_shot_string += f\"Label: {label}\\n\"\n", " few_shot_string += \"-\" * 30 + \"\\n\"\n", " info.append({\n", " \"doc_id\": ex.get('doc_id', \"\"),\n", " \"fulltext\": ex.get('fulltext', \"\"),\n", " \"gen_text\": ex['gen_text'],\n", " \"reasoning\": reason,\n", " \"label\": label\n", " }) \n", "\n", "# 3. Define the Final Prompt Structure\n", "instruction = \"\"\"You are an expert in health communication. Your task is to judge the health literacy level of a target text based on its original medical source.\n", "\n", "Classify the text into one of three categories:\n", "1. low_health_literacy: Uses common words (everyday language), very short sentences, and eliminates all medical jargon.\n", "2. intermediate_health_literacy: Uses some medical terms with explanation, standard sentence length, requires basic health knowledge.\n", "3. proficient_health_literacy: Uses high-level medical jargon, technical language, and academic or professional structures.\n", "\n", "### Few-Shot Examples:\n", "\"\"\"\n", "\n", "# 4. Final Template Construction\n", "final_prompt_template = (\n", " instruction + \n", " few_shot_string + \n", " \"\\n### Now judge this text:\\n\"\n", " \"Original Fulltext: \\\"{fulltext}\\\"\\n\"\n", " \"Target Text: \\\"{input_text}\\\"\\n\"\n", " \"Reasoning:\"\n", ")\n", "\n", "with open(OUTPUT_PATH, 'w') as f:\n", " f.write(final_prompt_template)\n", "with open(OUTPUT_PATH.replace('.txt', '_info.json'), 'w') as f:\n", " json.dump(info, f, indent=4)\n", "print(f\"Structured prompt template saved to {OUTPUT_PATH}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "feafa46d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dict_keys(['doc_id', 'ai_label', 'rating_plaban', 'category_plaban', 'rating_mahi', 'category_mahi', 'rating_shama', 'category_shama', 'agreement_count'])\n" ] } ], "source": [ "import json\n", "# /home/mshahidul/readctrl/data/synthetic_dataset_diff_labels/syn_data_diff_labels_en_0_80_full.json\n", "with open(\"/home/mshahidul/readctrl/data/synthetic_dataset_diff_labels/syn_data_diff_labels_en_0_80_full.json\", 'r') as f:\n", " data = json.load(f)\n", "print(data[0].keys())\n", "print(data[0]['diff_label_texts'].keys())" ] }, { "cell_type": "markdown", "id": "8c470dd5", "metadata": {}, "source": [ "## Fewshot data selection" ] }, { "cell_type": "code", "execution_count": null, "id": "06158d8d", "metadata": {}, "outputs": [], "source": [ "import json\n", "import os\n", "\n", "# --- Configuration ---\n", "# Path to your existing data (containing 'reasoning', 'gen_text', and 'label')\n", "INPUT_INFO_FILE = \"/home/mshahidul/readctrl/data/new_exp/final_prompt_template_info.json\"\n", "OUTPUT_PATH = \"/home/mshahidul/readctrl/data/new_exp/new_prompt_template.txt\"\n", "\n", "# Decide how many few-shot examples you want to include for each label\n", "FEW_SHOT_PER_LABEL = 2 # Change this to 1, 3, etc.\n", "\n", "# --- Logic ---\n", "\n", "def generate_prompt_from_json(input_json_path, num_per_label):\n", " if not os.path.exists(input_json_path):\n", " return f\"Error: File {input_json_path} not found. Please check the path.\"\n", " \n", " with open(input_json_path, 'r') as f:\n", " data = json.load(f)\n", " \n", " # Organize the data by label to ensure even distribution\n", " labeled_data = {}\n", " for entry in data:\n", " label = entry['label']\n", " if label not in labeled_data:\n", " labeled_data[label] = []\n", " labeled_data[label].append(entry)\n", " \n", " # Build the few-shot section\n", " few_shot_string = \"\"\n", " # Define labels in a logical order\n", " target_labels = [\"low_health_literacy\", \"intermediate_health_literacy\", \"proficient_health_literacy\"]\n", " \n", " for label in target_labels:\n", " examples = labeled_data.get(label, [])\n", " # Slice the list based on your variable\n", " selected_examples = examples[:num_per_label]\n", " \n", " for ex in selected_examples:\n", " # Construct the example block WITHOUT the fulltext\n", " few_shot_string += f\"Target Text: \\\"{ex['gen_text']}\\\"\\n\"\n", " few_shot_string += f\"Reasoning: {ex['reasoning']}\\n\"\n", " few_shot_string += f\"Label: {label}\\n\"\n", " few_shot_string += \"-\" * 30 + \"\\n\"\n", "\n", " # Define the final instruction structure (no mention of fulltext comparison)\n", " instruction = \"\"\"You are an expert in health communication. Your task is to judge the health literacy level of the provided text.\n", "\n", "Classify the text into one of three categories:\n", "1. low_health_literacy: Uses common words (everyday language), very short sentences, and avoids medical jargon.\n", "2. intermediate_health_literacy: Uses some medical terms with explanation, standard sentence length, requires basic health knowledge.\n", "3. proficient_health_literacy: Uses high-level medical jargon, technical language, and academic or professional structures.\n", "\n", "### Examples:\n", "\"\"\"\n", "\n", " # Final Template Construction\n", " final_template = (\n", " instruction + \n", " few_shot_string + \n", " \"\\n### Task:\\n\"\n", " \"Target Text: \\\"{input_text}\\\"\\n\"\n", " \"Reasoning:\"\n", " )\n", " \n", " return final_template\n", "\n", "# 1. Generate the string\n", "new_prompt_template = generate_prompt_from_json(INPUT_INFO_FILE, FEW_SHOT_PER_LABEL)\n", "\n", "# 2. Save to file\n", "with open(OUTPUT_PATH, 'w') as f:\n", " f.write(new_prompt_template)\n", "\n", "print(f\"Successfully created a prompt with {FEW_SHOT_PER_LABEL} examples per label.\")\n", "print(f\"Saved to: {OUTPUT_PATH}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "f78d4619", "metadata": {}, "outputs": [], "source": [ "\n", "with open(\"/home/mshahidul/readctrl/data/new_exp/cleaned_health_literacy_data.json\", 'r') as f:\n", " cleaned_data = json.load(f)\n", "with open(\"/home/mshahidul/readctrl/data/new_exp/few_shot_examples.json\", 'r') as f:\n", " few_shot_examples = json.load(f)\n", "\n", "list_data = []\n", "for item in few_shot_examples:\n", " for ex in few_shot_examples[item]:\n", " list_data.append((ex['doc_id'], ex['label']))\n", "\n", "test_set = []\n", "for item in cleaned_data:\n", " if (item['doc_id'], item['label']) not in list_data:\n", " test_set.append(item)\n", "with open(\"/home/mshahidul/readctrl/data/new_exp/test_health_literacy_data.json\", 'w') as f:\n", " json.dump(test_set, f, indent=4)" ] }, { "cell_type": "markdown", "id": "9d33bb77", "metadata": {}, "source": [ "## Testing V1" ] }, { "cell_type": "code", "execution_count": null, "id": "e2e888eb", "metadata": {}, "outputs": [], "source": [ "import json\n", "import requests\n", "\n", "# --- Configuration ---\n", "TEMPLATE_PATH = \"/home/mshahidul/readctrl/data/new_exp/final_prompt_template_v3.txt\"\n", "LOCAL_API_URL = \"http://172.16.34.29:8004/v1/chat/completions\"\n", "LOCAL_MODEL_NAME = \"Qwen/Qwen3-30B-A3B-Instruct-2507\"\n", "\n", "# --- 1. Load the Template ---\n", "with open(TEMPLATE_PATH, \"r\") as f:\n", " prompt_template = f.read()\n", "\n", "# --- 2. Define Test Cases ---\n", "with open(\"/home/mshahidul/readctrl/data/new_exp/cleaned_health_literacy_data.json\", 'r') as f:\n", " cleaned_data = json.load(f)\n", "with open(\"/home/mshahidul/readctrl/data/new_exp/few_shot_examples.json\", 'r') as f:\n", " few_shot_examples = json.load(f)\n", "\n", "list_data = []\n", "for item in few_shot_examples:\n", " for ex in few_shot_examples[item]:\n", " list_data.append((ex['doc_id'], ex['label']))\n", "\n", "test_set = []\n", "for item in cleaned_data:\n", " if (item['doc_id'], item['label']) not in list_data:\n", " test_set.append(item)\n", "\n", "def run_test(fulltext, input_text):\n", " final_prompt = prompt_template.format(fulltext=fulltext, input_text=input_text)\n", " \n", " payload = {\n", " \"model\": LOCAL_MODEL_NAME,\n", " \"messages\": [{\"role\": \"user\", \"content\": final_prompt}],\n", " \"temperature\": 0 \n", " }\n", " \n", " try:\n", " response = requests.post(LOCAL_API_URL, json=payload, timeout=30)\n", " return response.json()['choices'][0]['message']['content'].strip()\n", " except Exception as e:\n", " return f\"Error: {e}\"\n", "\n", "# --- 3. Execute and Compare ---\n", "print(f\"--- Starting Template Evaluation on {len(test_set)} cases ---\\n\")\n", "\n", "correct_count = 0\n", "results_log = []\n", "\n", "def text_return(text):\n", " if \"low\" in text.lower():\n", " return \"low_health_literacy\"\n", " elif \"intermediate\" in text.lower():\n", " return \"intermediate_health_literacy\"\n", " elif \"proficient\" in text.lower():\n", " return \"proficient_health_literacy\"\n", " return \"unknown\"\n", "\n", "for i, case in enumerate(test_set):\n", " expected = str(case['label']).strip().lower()\n", " result = run_test(case['fulltext'], case['gen_text'])\n", " \n", " # Clean LLM output for comparison (case-insensitive and removing trailing periods)\n", " prediction = result.strip().lower().rstrip('.')\n", " \n", " # Check if the expected label is the primary answer in the result\n", " is_correct = (text_return(expected) == text_return(prediction) )\n", " \n", " if is_correct:\n", " correct_count += 1\n", " \n", " print(f\"Test Case {i+1}:\")\n", " print(f\"Expected: {case['label']}\")\n", " print(f\"LLM Output: {result}\")\n", " print(f\"Match: {'✅' if is_correct else '❌'}\")\n", " print(\"-\" * 50)\n", "\n", "# --- 4. Final Accuracy Calculation ---\n", "total_cases = len(test_set)\n", "if total_cases > 0:\n", " accuracy = (correct_count / total_cases) * 100\n", " print(f\"\\n--- Evaluation Summary ---\")\n", " print(f\"Total Tested: {total_cases}\")\n", " print(f\"Correct: {correct_count}\")\n", " print(f\"Accuracy: {accuracy:.2f}%\")\n", "else:\n", " print(\"No test cases found.\")" ] }, { "cell_type": "markdown", "id": "0531d7c3", "metadata": {}, "source": [ "## Testing V2" ] }, { "cell_type": "code", "execution_count": null, "id": "ab8b4c96", "metadata": {}, "outputs": [], "source": [ "import json\n", "import requests\n", "import os\n", "\n", "# --- Configuration ---\n", "DEV_SET_PATH = \"/home/mshahidul/readctrl/data/new_exp/test_health_literacy_data.json\"\n", "FEW_SHOT_SET_PATH = \"/home/mshahidul/readctrl/data/new_exp/final_prompt_template_info.json\" # Using the one with reasoning\n", "LOCAL_API_URL = \"http://172.16.34.29:8004/v1/chat/completions\"\n", "LOCAL_MODEL_NAME = \"Qwen/Qwen3-30B-A3B-Instruct-2507\"\n", "\n", "# Define the range of few-shots per label you want to test\n", "# e.g., [0, 1, 2, 3] will test 0-shot, 1-shot (3 total), 2-shot (6 total), etc.\n", "SHOTS_TO_EVALUATE = [0, 1, 2, 3]\n", "\n", "# --- Core Functions ---\n", "\n", "def build_dynamic_prompt(few_shot_data, k_per_label):\n", " \"\"\"Constructs a prompt with k examples per literacy category.\"\"\"\n", " instruction = (\n", " \"You are an expert in health communication. Your task is to judge the health literacy level of the provided text.\\n\"\n", " \"Classify the text into: low_health_literacy, intermediate_health_literacy, or proficient_health_literacy.\\n\\n\"\n", " )\n", " \n", " if k_per_label == 0:\n", " return instruction + \"### Task:\\nTarget Text: \\\"{input_text}\\\"\\nReasoning:\"\n", "\n", " # Organize few-shot data by label\n", " categorized = {}\n", " for entry in few_shot_data:\n", " label = entry['label']\n", " categorized.setdefault(label, []).append(entry)\n", "\n", " few_shot_blocks = \"### Examples:\\n\"\n", " labels = [\"low_health_literacy\", \"intermediate_health_literacy\", \"proficient_health_literacy\"]\n", " \n", " for label in labels:\n", " examples = categorized.get(label, [])[:k_per_label]\n", " for ex in examples:\n", " few_shot_blocks += f\"Target Text: \\\"{ex['gen_text']}\\\"\\n\"\n", " few_shot_blocks += f\"Reasoning: {ex['reasoning']}\\n\"\n", " few_shot_blocks += f\"Label: {label}\\n\"\n", " few_shot_blocks += \"-\" * 30 + \"\\n\"\n", " \n", " return instruction + few_shot_blocks + \"\\n### Task:\\nTarget Text: \\\"{input_text}\\\"\\nReasoning:\"\n", "\n", "def get_prediction(prompt_template, input_text):\n", " \"\"\"Sends the formatted prompt to the local LLM.\"\"\"\n", " final_prompt = prompt_template.format(input_text=input_text)\n", " payload = {\n", " \"model\": LOCAL_MODEL_NAME,\n", " \"messages\": [{\"role\": \"user\", \"content\": final_prompt}],\n", " \"temperature\": 0 \n", " }\n", " try:\n", " response = requests.post(LOCAL_API_URL, json=payload, timeout=30)\n", " return response.json()['choices'][0]['message']['content'].strip()\n", " except Exception:\n", " return \"Error\"\n", "\n", "def parse_label(text):\n", " \"\"\"Normalizes LLM output to match dataset labels.\"\"\"\n", " text = text.lower()\n", " if \"low\" in text: return \"low_health_literacy\"\n", " if \"intermediate\" in text: return \"intermediate_health_literacy\"\n", " if \"proficient\" in text: return \"proficient_health_literacy\"\n", " return \"unknown\"\n", "\n", "# --- Main Execution ---\n", "\n", "# 1. Load Data\n", "with open(DEV_SET_PATH, 'r') as f:\n", " dev_set = json.load(f)\n", "with open(FEW_SHOT_SET_PATH, 'r') as f:\n", " few_shot_pool = json.load(f)\n", "\n", "# 2. Filter Dev Set\n", "# Ensure no overlap between few-shot examples and dev set\n", "shot_ids = {item['doc_id'] for item in few_shot_pool}\n", "clean_dev_set = [item for item in dev_set if item['doc_id'] not in shot_ids]\n", "\n", "results_summary = []\n", "\n", "print(f\"Starting Evaluation on {len(clean_dev_set)} samples...\\n\")\n", "\n", "# 3. Loop through shot counts\n", "for k in SHOTS_TO_EVALUATE:\n", " print(f\"Evaluating {k}-shot per label (Total {k*3} examples)...\")\n", " \n", " current_template = build_dynamic_prompt(few_shot_pool, k)\n", " correct = 0\n", " \n", " for case in clean_dev_set:\n", " raw_output = get_prediction(current_template, case['gen_text'])\n", " pred = parse_label(raw_output)\n", " actual = parse_label(case['label'])\n", " \n", " if pred == actual:\n", " correct += 1\n", " \n", " accuracy = (correct / len(clean_dev_set)) * 100\n", " results_summary.append({\"shots_per_label\": k, \"accuracy\": accuracy})\n", " print(f\"-> Accuracy: {accuracy:.2f}%\\n\")\n", "\n", "# --- Final Report ---\n", "print(\"-\" * 30)\n", "print(f\"{'Shots/Label':<15} | {'Accuracy':<10}\")\n", "print(\"-\" * 30)\n", "for res in results_summary:\n", " print(f\"{res['shots_per_label']:<15} | {res['accuracy']:.2f}%\")" ] }, { "cell_type": "markdown", "id": "d5cd799a", "metadata": {}, "source": [ "## Step 3: Design Initial Prompt using dspy" ] }, { "cell_type": "markdown", "id": "d916470f", "metadata": {}, "source": [ "## V1" ] }, { "cell_type": "code", "execution_count": null, "id": "793a47c7", "metadata": {}, "outputs": [], "source": [ "import dspy\n", "import json\n", "from dspy.teleprompt import BootstrapFewShot\n", "\n", "# --- 1. Configure the LLM via your vLLM Endpoint ---\n", "# DSPy uses an OpenAI-compatible client for vLLM\n", "vllm_model = dspy.LM(\n", " model='openai/Qwen/Qwen3-30B-A3B-Instruct-2507', # Use 'openai/' prefix for local endpoints\n", " api_base=\"http://172.16.34.29:8004/v1\",\n", " api_key=\"EMPTY\",\n", " temperature=0.0\n", ")\n", "dspy.configure(lm=vllm_model)\n", "\n", "# --- 2. Define the Task Signature ---\n", "class HealthLiteracySignature(dspy.Signature):\n", " \"\"\"\n", " Judge the health literacy difficulty of a medical text.\n", " Classify into: low_health_literacy, intermediate_health_literacy, or proficient_health_literacy.\n", " \"\"\"\n", " text = dspy.InputField(desc=\"The medical text or patient note to analyze.\")\n", " reasoning = dspy.OutputField(desc=\"Step-by-step logic identifying jargon, sentence structure, and complexity.\")\n", " label = dspy.OutputField(desc=\"The final classification: low_health_literacy, intermediate_health_literacy, or proficient_health_literacy.\")\n", "\n", "# --- 3. Load Training Data ---\n", "with open(\"/home/mshahidul/readctrl/data/new_exp/few_shot_examples.json\", 'r') as f:\n", " raw_examples = json.load(f)\n", "\n", "# Convert your 15 examples into DSPy format\n", "trainset = []\n", "for label_key, examples in raw_examples.items():\n", " for ex in examples:\n", " trainset.append(dspy.Example(text=ex['text'], label=label_key).with_inputs('text'))\n", "\n", "# --- 4. Define the Program (Chain of Thought) ---\n", "class HealthLiteracyClassifier(dspy.Module):\n", " def __init__(self):\n", " super().__init__()\n", " # ChainOfThought automatically adds \"Reasoning\" steps to the prompt\n", " self.predictor = dspy.ChainOfThought(HealthLiteracySignature)\n", "\n", " def forward(self, text):\n", " return self.predictor(text=text)\n", "\n", "# --- 5. Define the Metric (Success = Label Match) ---\n", "def metric(gold, pred, trace=None):\n", " return gold.label == pred.label\n", "\n", "# --- 6. Run the Optimizer (Teleprompter) ---\n", "# BootstrapFewShot will test variations of the prompt to see which one works best\n", "optimizer = BootstrapFewShot(metric=metric, max_bootstrapped_demos=3, max_labeled_demos=5)\n", "optimized_program = optimizer.compile(HealthLiteracyClassifier(), trainset=trainset)\n", "\n", "# --- 7. Save the Optimized Prompt ---\n", "optimized_program.save(\"/home/mshahidul/readctrl/data/new_exp/optimized_health_classifier.json\")\n", "\n", "# Inspect the final prompt logic\n", "vllm_model.inspect_history(n=1)" ] }, { "cell_type": "markdown", "id": "06a0eb62", "metadata": {}, "source": [ "## V2" ] }, { "cell_type": "code", "execution_count": null, "id": "e3529bb0", "metadata": {}, "outputs": [], "source": [ "import dspy\n", "import json\n", "from typing import Literal\n", "from dspy.teleprompt import BootstrapFewShotWithRandomSearch\n", "from dspy.evaluate import Evaluate\n", "\n", "# --- 1. LLM Configuration ---\n", "api_file = \"/home/mshahidul/api_new.json\"\n", "with open(api_file, \"r\") as f:\n", " api_keys = json.load(f)\n", "openai_api_key = api_keys[\"openai\"]\n", "\n", "# Student: Local vLLM (Deployment Model)\n", "vllm_model = dspy.LM(\n", " model='openai/Qwen/Qwen3-30B-A3B-Instruct-2507',\n", " api_base=\"http://172.16.34.29:8004/v1\",\n", " api_key=\"EMPTY\",\n", " temperature=0.0\n", ")\n", "\n", "# Teacher: OpenAI (High-quality rationale generation)\n", "# Note: Ensure 'gpt-5' is the correct model name in your environment (usually 'gpt-4-turbo' or 'gpt-4o')\n", "openai_model_teacher = dspy.LM(model='gpt-5', api_key=openai_api_key)\n", "openai_model_student = dspy.LM(model='gpt-5-mini', api_key=openai_api_key)\n", "\n", "dspy.configure(lm=openai_model_student) # Default to OpenAI for optimization\n", "\n", "# --- 2. Data Processing & Deduplication ---\n", "\n", "# 2.1 Load Training Data (Few-Shot)\n", "with open(\"/home/mshahidul/readctrl/data/new_exp/few_shot_examples.json\", 'r') as f:\n", " few_shot_data = json.load(f)\n", "\n", "trainset = []\n", "train_identifiers = set()\n", "\n", "for label_key, examples in few_shot_data.items():\n", " for ex in examples:\n", " # Create a unique ID to prevent data leakage\n", " unique_id = f\"{ex['doc_id']}_{label_key}\"\n", " train_identifiers.add(unique_id)\n", " \n", " # In few_shot, 'gen_text' is the summary we want to judge\n", " trainset.append(dspy.Example(\n", " summary_text=ex['gen_text'], \n", " label=label_key\n", " ).with_inputs('summary_text'))\n", "\n", "# 2.2 Load Test Data as Dev Set (Updated Path)\n", "test_data_path = \"/home/mshahidul/readctrl/data/new_exp/test_health_literacy_data.json\"\n", "with open(test_data_path, 'r') as f:\n", " test_data = json.load(f)\n", "\n", "devset = []\n", "for item in test_data:\n", " unique_id = f\"{item['doc_id']}_{item['label']}\"\n", " \n", " # Filter out examples if they accidentally appear in the training set\n", " if unique_id not in train_identifiers:\n", " devset.append(dspy.Example(\n", " summary_text=item['gen_text'], \n", " label=item['label']\n", " ).with_inputs('summary_text'))\n", "\n", "print(f\"Dataset Stats: Train={len(trainset)}, Dev (Test Set)={len(devset)}\")\n", "\n", "# --- 3. Robust Signature & Module ---\n", "\n", "class HealthLiteracySignature(dspy.Signature):\n", " \"\"\"\n", " Judge the health literacy level of a generated medical summary.\n", " Identify if the language is suitable for a layperson (low) or requires medical expertise (proficient).\n", " \"\"\"\n", " summary_text: str = dspy.InputField(desc=\"The generated medical summary to be analyzed.\")\n", " reasoning: str = dspy.OutputField(desc=\"Analysis of jargon, acronyms, and sentence complexity.\")\n", " label: Literal[\"low_health_literacy\", \"intermediate_health_literacy\", \"proficient_health_literacy\"] = dspy.OutputField()\n", "\n", "class HealthLiteracyClassifier(dspy.Module):\n", " def __init__(self):\n", " super().__init__()\n", " self.predictor = dspy.ChainOfThought(HealthLiteracySignature)\n", "\n", " def forward(self, summary_text):\n", " return self.predictor(summary_text=summary_text)\n", "\n", "# --- 4. Metric and Optimization ---\n", "\n", "def health_literacy_metric(gold, pred, trace=None):\n", " if not pred or not pred.label: return False\n", " return gold.label.strip().lower() == pred.label.strip().lower()\n", "\n", "optimizer = BootstrapFewShotWithRandomSearch(\n", " metric=health_literacy_metric,\n", " max_bootstrapped_demos=3,\n", " num_candidate_programs=8, \n", " teacher_settings=dict(lm=openai_model_teacher)\n", ")\n", "\n", "# Compile the program\n", "optimized_program = optimizer.compile(HealthLiteracyClassifier(), trainset=trainset)\n", "\n", "# --- 5. Evaluation & Saving ---\n", "\n", "# Evaluate on the provided test dataset\n", "evaluator = Evaluate(devset=devset, metric=health_literacy_metric, num_threads=1, display_progress=True)\n", "accuracy_score = evaluator(optimized_program)\n", "\n", "print(f\"\\nOptimization Complete.\")\n", "print(f\"Final Accuracy on Test Set: {accuracy_score}%\")\n", "\n", "# Save the finalized prompt logic\n", "optimized_program.save(\"/home/mshahidul/readctrl/data/new_exp/optimized_health_classifier_gpt5-mini.json\")" ] }, { "cell_type": "code", "execution_count": null, "id": "96f1f99e", "metadata": {}, "outputs": [], "source": [ "print(f\"Final Accuracy on Test Set: {accuracy_score}%\")" ] }, { "cell_type": "code", "execution_count": null, "id": "814b0186", "metadata": {}, "outputs": [], "source": [ "CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=2 python '/home/mshahidul/readctrl/code/RL_model/finetune.py'\n", "CUDA_DEVICE_ORDER=PCI_BUS_ID CUDA_VISIBLE_DEVICES=2 python -m vllm.entrypoints.openai.api_server --model Qwen/Qwen3-30B-A3B-Instruct-2507 --max-model-len 8192 --tensor-parallel-size 1 --port 8004 --dtype auto --trust_remote_code True" ] }, { "cell_type": "code", "execution_count": null, "id": "f0e0fbb8", "metadata": {}, "outputs": [], "source": [ "# To load and use:\n", "classifier = HealthLiteracyClassifier()\n", "classifier.load(\"/home/mshahidul/readctrl/data/new_exp/optimized_health_classifier.json\")\n", "path=\"/home/mshahidul/readctrl/data/new_exp/test_health_literacy_data.json\"\n", "with open(path,'r') as f:\n", " test_data = json.load(f)\n", "for item in test_data:\n", " expected_label = item['label']\n", " text = item['gen_text']\n", " result = classifier(summary_text=text)\n", " if (result.label == expected_label):\n", " print(f\"Correctly classified: {expected_label} ✅\")\n", " else:\n", " print(f\"Misclassified. Expected: {expected_label}, Got: {result.label} ❌\")" ] }, { "cell_type": "code", "execution_count": null, "id": "8700ac2b", "metadata": {}, "outputs": [], "source": [ "print(few_shot_data.keys())\n", "print(few_shot_data['low_health_literacy'][0].keys())" ] }, { "cell_type": "code", "execution_count": null, "id": "6b5dbe7a", "metadata": {}, "outputs": [], "source": [ "# import json\n", "# import pandas as pd\n", "# from tqdm import tqdm\n", "# import dspy\n", "# from sklearn.metrics import accuracy_score, f1_score, cohen_kappa_score, classification_report\n", "\n", "# # --- 1. Load Data and Optimized Program ---\n", "# CLEANED_DATA_PATH = \"/home/mshahidul/readctrl/data/new_exp/cleaned_health_literacy_data.json\"\n", "# FEW_SHOT_PATH = \"/home/mshahidul/readctrl/data/new_exp/few_shot_examples.json\"\n", "# MODEL_SAVE_PATH = \"/home/mshahidul/readctrl/data/new_exp/optimized_health_classifier.json\"\n", "\n", "# with open(CLEANED_DATA_PATH, 'r') as f:\n", "# full_data = json.load(f)\n", "\n", "# with open(FEW_SHOT_PATH, 'r') as f:\n", "# few_shot_data = json.load(f)\n", "\n", "# # Identify which doc_ids were used for training to ensure a clean test set\n", "# trained_ids = []\n", "# for label in few_shot_data:\n", "# trained_ids.extend([ex['doc_id'] for ex in few_shot_data[label]])\n", "\n", "# test_set = [item for item in full_data if item['doc_id'] not in trained_ids]\n", "# print(f\"Total test examples: {len(test_set)}\")\n", "# # --- 2. Initialize DSPy Program ---\n", "# vllm_model = dspy.LM(\n", "# model='openai/Qwen/Qwen3-30B-A3B-Instruct-2507',\n", "# api_base=\"http://172.16.34.29:8004/v1\",\n", "# api_key=\"EMPTY\"\n", "# )\n", "# dspy.configure(lm=vllm_model)\n", "\n", "# class HealthLiteracySignature(dspy.Signature):\n", "# \"\"\"Judge health literacy difficulty: low, intermediate, or proficient.\"\"\"\n", "# text = dspy.InputField()\n", "# reasoning = dspy.OutputField()\n", "# label = dspy.OutputField()\n", "\n", "# class HealthLiteracyClassifier(dspy.Module):\n", "# def __init__(self):\n", "# super().__init__()\n", "# self.predictor = dspy.ChainOfThought(HealthLiteracySignature)\n", "# def forward(self, text):\n", "# return self.predictor(text=text)\n", "\n", "# # Load the optimized state\n", "# classifier = HealthLiteracyClassifier()\n", "# classifier.load(MODEL_SAVE_PATH)\n", "\n", "# # --- 3. Run Inference ---\n", "# results = []\n", "# y_true = []\n", "# y_pred = []\n", "\n", "# print(f\"Starting evaluation on {len(test_set)} examples...\")\n", "\n", "# for item in tqdm(test_set):\n", "# try:\n", "# prediction = classifier(text=item['text'])\n", " \n", "# # Clean the label (sometimes LLMs add extra text or punctuation)\n", "# pred_label = prediction.label.strip().lower().replace(\" \", \"_\")\n", " \n", "# results.append({\n", "# \"doc_id\": item['doc_id'],\n", "# \"true_label\": item['label'],\n", "# \"pred_label\": pred_label,\n", "# \"reasoning\": prediction.reasoning\n", "# })\n", " \n", "# y_true.append(item['label'])\n", "# y_pred.append(pred_label)\n", "# except Exception as e:\n", "# print(f\"Error processing doc {item['doc_id']}: {e}\")\n", "\n", "# # --- 4. Calculate Metrics ---\n", "# labels = [\"low_health_literacy\", \"intermediate_health_literacy\", \"proficient_health_literacy\"]\n", "\n", "# accuracy = accuracy_score(y_true, y_pred)\n", "# f1 = f1_score(y_true, y_pred, average='weighted')\n", "# kappa = cohen_kappa_score(y_true, y_pred)\n", "\n", "# print(\"\\n--- Evaluation Results ---\")\n", "# print(f\"Accuracy: {accuracy:.4f}\")\n", "# print(f\"Cohen’s Kappa: {kappa:.4f}\")\n", "# print(f\"F1 Score (Weighted): {f1:.4f}\")\n", "# print(\"\\nClassification Report:\")\n", "# print(classification_report(y_true, y_pred, target_names=labels))\n", "\n", "# # Save results for failure analysis\n", "# output_file = \"/home/mshahidul/readctrl/data/new_exp/evaluation_results.json\"\n", "# with open(output_file, 'w') as f:\n", "# json.dump(results, f, indent=4)" ] }, { "cell_type": "code", "execution_count": null, "id": "e935e64c", "metadata": {}, "outputs": [], "source": [ "CUDA_DEVICE_ORDER=PCI_BUS_ID \\\n", "CUDA_VISIBLE_DEVICES=\"2\" \\\n", "PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True \\\n", "VLLM_USE_MODELSCOPE=True \\\n", "vllm \\\n", " serve swift/Qwen3-30B-A3B-AWQ \\\n", " --gpu-memory-utilization 0.9 \\\n", " --max-model-len 32768 \\\n", " --max-num-seqs 64 \\\n", " --served-model-name swift/Qwen3-30B-A3B-AWQ \\\n", " --host 127.0.0.1 \\\n", " --port 8004" ] }, { "cell_type": "code", "execution_count": 1, "id": "8e90b755", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Items processed: 60\n", "Max raters per item: 7\n", "---\n", "Krippendorff's Alpha (Ordinal): 0.7083\n", "\n", "Note: Fleiss' Kappa skipped because of unequal rater counts per item.\n", "Use Krippendorff's Alpha for your final report as it accounts for this.\n" ] } ], "source": [ "import json\n", "import numpy as np\n", "import krippendorff\n", "\n", "def calculate_iaa_robust(file_path):\n", " with open(file_path, 'r') as f:\n", " data = json.load(f)\n", "\n", " # 1. Prepare data for Krippendorff's Alpha\n", " # Matrix shape must be (coders, items)\n", " max_annotations = max(len(entry['rating_distribution']) for entry in data)\n", " \n", " # We create a list for each \"slot\" (rater position)\n", " # If Doc 1 has 3 ratings and Doc 2 has 5, Doc 1 gets two np.nan values\n", " reliability_data = []\n", " for i in range(max_annotations):\n", " row = []\n", " for entry in data:\n", " ratings = entry['rating_distribution']\n", " if i < len(ratings):\n", " row.append(ratings[i])\n", " else:\n", " row.append(np.nan)\n", " reliability_data.append(row)\n", " \n", " reliability_matrix = np.array(reliability_data)\n", "\n", " # 2. Calculate Krippendorff's Alpha (The primary metric for your paper)\n", " # Level of measurement 'ordinal' is best for 1-5 scales\n", " alpha = krippendorff.alpha(reliability_data=reliability_matrix, \n", " level_of_measurement='ordinal')\n", " \n", " print(f\"Items processed: {len(data)}\")\n", " print(f\"Max raters per item: {max_annotations}\")\n", " print(f\"---\")\n", " print(f\"Krippendorff's Alpha (Ordinal): {alpha:.4f}\")\n", "\n", " # 3. Handling Fleiss' Kappa (Optional/Conditional)\n", " counts_list = []\n", " rater_counts = []\n", " for entry in data:\n", " counts = [entry['rating_distribution'].count(i) for i in range(1, 6)]\n", " counts_list.append(counts)\n", " rater_counts.append(sum(counts))\n", " \n", " # Only run Fleiss if the raters are equal across all items\n", " if len(set(rater_counts)) == 1:\n", " from statsmodels.stats.inter_rater import fleiss_kappa\n", " f_kappa = fleiss_kappa(np.array(counts_list))\n", " print(f\"Fleiss' Kappa: {f_kappa:.4f}\")\n", " else:\n", " print(\"\\nNote: Fleiss' Kappa skipped because of unequal rater counts per item.\")\n", " print(\"Use Krippendorff's Alpha for your final report as it accounts for this.\")\n", "\n", "# Usage\n", "path = '/home/mshahidul/readctrl/data/final_result/consolidated_ratings_threshold_manual_edit.json'\n", "calculate_iaa_robust(path)" ] }, { "cell_type": "code", "execution_count": 3, "id": "a0776765", "metadata": {}, "outputs": [], "source": [ "# /home/mshahidul/readctrl/data/final_result/consolidated_ratings_threshold.json\n", "import json\n", "def get_expected_label(rating):\n", " if rating in [1, 2]:\n", " return \"low_health_literacy\"\n", " elif rating == 3:\n", " return \"intermediate_health_literacy\"\n", " elif rating in [4, 5]:\n", " return \"proficient_health_literacy\"\n", " return None\n", "with open(\"/home/mshahidul/readctrl/data/final_result/consolidated_ratings_threshold_manual_edit.json\", 'r') as f:\n", " few_shot_data = json.load(f)\n", "cnt=0\n", "for item in few_shot_data:\n", " expected_label = item['health_literacy_label']\n", " consensus_rating = get_expected_label(item['consensus_rating'])\n", " if expected_label == consensus_rating:\n", " cnt+=1" ] }, { "cell_type": "code", "execution_count": null, "id": "ed0a0618", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "76ed37ea", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "dict_keys(['id', 'fulltext', 'summary'])\n" ] } ], "source": [ "# /home/mshahidul/readctrl/data/thresold_finding/junaed/seq0_record3.json\n", "import json\n", "with open(\"/home/mshahidul/readctrl/data/processed_test_raw_data/multiclinsum_test_en.json\", 'r') as f:\n", " data = json.load(f)\n", "print(data[0].keys())\n" ] }, { "cell_type": "code", "execution_count": 15, "id": "eaefbfc6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Source Type | Level | Mean Threshold (%)\n", "-------------------------------------------------------\n", "Gold Summary | low | 61.07% (n=15)\n", "Gold Summary | intermediate | 81.99% (n=15)\n", "Gold Summary | proficient | 95.69% (n=2)\n", "Full Original Text | low | 37.23% (n=14)\n", "Full Original Text | intermediate | 66.11% (n=14)\n", "Full Original Text | proficient | 90.69% (n=4)\n" ] } ], "source": [ "import os\n", "import json\n", "from collections import defaultdict\n", "import numpy as np\n", "\n", "# Configuration\n", "base_path = \"/home/mshahidul/readctrl/data/thresold_finding\"\n", "levels = ['low', 'intermediate', 'proficient']\n", "source_types = [\"Gold Summary\", \"Full Original Text\"]\n", "\n", "# Dictionary to store percentages: results[source_type][level] = [list of values]\n", "results = {src: {lvl: [] for lvl in levels} for src in source_types}\n", "\n", "# Iterate through each annotator folder (e.g., 'junaed')\n", "annotator_names=['junaed','plabandas','shama']\n", "for annotator in annotator_names:\n", " annotator_path = os.path.join(base_path, annotator)\n", " \n", " if os.path.isdir(annotator_path):\n", " # Iterate through each json file in the folder\n", " for filename in os.listdir(annotator_path):\n", " if filename.endswith(\".json\"):\n", " file_path = os.path.join(annotator_path, filename)\n", " \n", " try:\n", " with open(file_path, 'r') as f:\n", " data = json.load(f)\n", " \n", " src_type = data.get('source_type')\n", " # Ensure source_type is one we are tracking\n", " if src_type in source_types:\n", " for lvl in levels:\n", " # Extract threshold percentage from the annotations\n", " # Adjust 'threshold' key name if it differs in your JSON\n", " val = data['annotations'][lvl].get('percentage').replace('%', '').strip()\n", " if val is not None:\n", " if float(val) <= 99:\n", " results[src_type][lvl].append(float(val))\n", "\n", " \n", " except Exception as e:\n", " print(f\"Error processing {file_path}: {e}\")\n", "\n", "# Calculate and display averages\n", "print(f\"{'Source Type':<20} | {'Level':<15} | {'Mean Threshold (%)'}\")\n", "print(\"-\" * 55)\n", "\n", "for src in source_types:\n", " for lvl in levels:\n", " vals = results[src][lvl]\n", " if vals:\n", " mean_val = np.mean(vals)\n", " count = len(vals)\n", " print(f\"{src:<20} | {lvl:<15} | {mean_val:>8.2f}% (n={count})\")\n", " else:\n", " print(f\"{src:<20} | {lvl:<15} | No data found\")" ] }, { "cell_type": "code", "execution_count": null, "id": "1aa3cd60", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "un", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.14" } }, "nbformat": 4, "nbformat_minor": 5 }