| | import json |
| | import requests |
| | import os |
| |
|
| | |
| | DEV_SET_PATH = "/home/mshahidul/readctrl/data/new_exp/test_health_literacy_data.json" |
| | FEW_SHOT_SET_PATH = "/home/mshahidul/readctrl/data/new_exp/final_prompt_template_info.json" |
| | LOCAL_API_URL = "http://172.16.34.29:8004/v1/chat/completions" |
| | LOCAL_MODEL_NAME = "Qwen/Qwen3-30B-A3B-Instruct-2507" |
| |
|
| | |
| | |
| | SHOTS_TO_EVALUATE = [0, 1, 2, 3,4,5,6] |
| |
|
| | |
| |
|
| | def build_dynamic_prompt(few_shot_data, k_per_label): |
| | """Constructs a prompt with k examples per literacy category.""" |
| | instruction = ( |
| | "You are an expert in health communication. Your task is to judge the health literacy level of the provided text.\n" |
| | "Classify the text into: low_health_literacy, intermediate_health_literacy, or proficient_health_literacy.\n\n" |
| | ) |
| | |
| | if k_per_label == 0: |
| | return instruction + "### Task:\nTarget Text: \"{input_text}\"\nReasoning:" |
| |
|
| | |
| | categorized = {} |
| | for entry in few_shot_data: |
| | label = entry['label'] |
| | categorized.setdefault(label, []).append(entry) |
| |
|
| | few_shot_blocks = "### Examples:\n" |
| | labels = ["low_health_literacy", "intermediate_health_literacy", "proficient_health_literacy"] |
| | |
| | for label in labels: |
| | examples = categorized.get(label, [])[:k_per_label] |
| | for ex in examples: |
| | few_shot_blocks += f"Target Text: \"{ex['gen_text']}\"\n" |
| | few_shot_blocks += f"Reasoning: {ex['reasoning']}\n" |
| | few_shot_blocks += f"Label: {label}\n" |
| | few_shot_blocks += "-" * 30 + "\n" |
| | |
| | return instruction + few_shot_blocks + "\n### Task:\nTarget Text: \"{input_text}\"\nReasoning:" |
| |
|
| | def get_prediction(prompt_template, input_text): |
| | """Sends the formatted prompt to the local LLM.""" |
| | final_prompt = prompt_template.format(input_text=input_text) |
| | payload = { |
| | "model": LOCAL_MODEL_NAME, |
| | "messages": [{"role": "user", "content": final_prompt}], |
| | "temperature": 0 |
| | } |
| | try: |
| | response = requests.post(LOCAL_API_URL, json=payload, timeout=30) |
| | return response.json()['choices'][0]['message']['content'].strip() |
| | except Exception: |
| | return "Error" |
| |
|
| | def parse_label(text): |
| | """Normalizes LLM output to match dataset labels.""" |
| | text = text.lower() |
| | if "low" in text: return "low_health_literacy" |
| | if "intermediate" in text: return "intermediate_health_literacy" |
| | if "proficient" in text: return "proficient_health_literacy" |
| | return "unknown" |
| |
|
| | |
| |
|
| | |
| | with open(DEV_SET_PATH, 'r') as f: |
| | dev_set = json.load(f) |
| | with open(FEW_SHOT_SET_PATH, 'r') as f: |
| | few_shot_pool = json.load(f) |
| |
|
| | |
| | |
| | shot_ids = {item['doc_id'] for item in few_shot_pool} |
| | clean_dev_set = [item for item in dev_set if item['doc_id'] not in shot_ids] |
| |
|
| | results_summary = [] |
| |
|
| | print(f"Starting Evaluation on {len(clean_dev_set)} samples...\n") |
| |
|
| | |
| | for k in SHOTS_TO_EVALUATE: |
| | print(f"Evaluating {k}-shot per label (Total {k*3} examples)...") |
| | |
| | current_template = build_dynamic_prompt(few_shot_pool, k) |
| | correct = 0 |
| | |
| | for case in clean_dev_set: |
| | raw_output = get_prediction(current_template, case['gen_text']) |
| | pred = parse_label(raw_output) |
| | actual = parse_label(case['label']) |
| | |
| | if pred == actual: |
| | correct += 1 |
| | |
| | accuracy = (correct / len(clean_dev_set)) * 100 |
| | results_summary.append({"shots_per_label": k, "accuracy": accuracy}) |
| | print(f"-> Accuracy: {accuracy:.2f}%\n") |
| |
|
| | |
| | print("-" * 30) |
| | print(f"{'Shots/Label':<15} | {'Accuracy':<10}") |
| | print("-" * 30) |
| | for res in results_summary: |
| | print(f"{res['shots_per_label']:<15} | {res['accuracy']:.2f}%") |
| | with open("/home/mshahidul/readctrl/data/new_exp/few_shot_evaluation_summary.json", 'w') as f: |
| | json.dump(results_summary, f, indent=4) |