import json
import requests
import random
import os
import csv
import numpy as np

# --- Configuration ---
DEV_SET_PATH = "/home/mshahidul/readctrl/data/new_exp/test_health_literacy_data.json"
FEW_SHOT_POOL_PATH = "/home/mshahidul/readctrl/data/new_exp/final_prompt_template_info.json"
LOCAL_API_URL = "http://172.16.34.29:8004/v1/chat/completions"
LOCAL_MODEL_NAME = "Qwen/Qwen3-30B-A3B-Instruct-2507"

# EXPERIMENT SETTINGS
SHOTS_TO_EVALUATE = [1, 2, 3,4,5,6] 
NUM_TRIALS = 3  # How many times to run each shot-count with different random samples

# --- Logic ---

def build_random_prompt(few_shot_data, k_per_label):
    """Randomly samples k examples per label and builds a prompt."""
    instruction = (
        "You are an expert in health communication. Your task is to judge the health literacy level of the provided text.\n"
        "Classify the text into: low_health_literacy, intermediate_health_literacy, or proficient_health_literacy.\n\n"
    )
    
    # Organize pool by label
    categorized = {}
    for entry in few_shot_data:
        label = entry['label']
        categorized.setdefault(label, []).append(entry)

    few_shot_blocks = "### Examples:\n"
    labels = ["low_health_literacy", "intermediate_health_literacy", "proficient_health_literacy"]
    
    for label in labels:
        # RANDOM SAMPLING: Shuffle and take k
        pool = categorized.get(label, [])
        selected = random.sample(pool, min(k_per_label, len(pool)))
        
        for ex in selected:
            few_shot_blocks += f"Target Text: \"{ex['gen_text']}\"\n"
            few_shot_blocks += f"Reasoning: {ex['reasoning']}\n"
            few_shot_blocks += f"Label: {label}\n"
            few_shot_blocks += "-" * 30 + "\n"
            
    return instruction + few_shot_blocks + "\n### Task:\nTarget Text: \"{input_text}\"\nReasoning:"

def get_prediction(prompt_template, input_text):
    final_prompt = prompt_template.format(input_text=input_text)
    payload = {"model": LOCAL_MODEL_NAME, "messages": [{"role": "user", "content": final_prompt}], "temperature": 0}
    try:
        response = requests.post(LOCAL_API_URL, json=payload, timeout=30)
        return response.json()['choices'][0]['message']['content'].strip()
    except: return "Error"

def parse_label(text):
    text = text.lower()
    if "low" in text: return "low_health_literacy"
    if "intermediate" in text: return "intermediate_health_literacy"
    if "proficient" in text: return "proficient_health_literacy"
    return "unknown"

# --- Execution ---

with open(DEV_SET_PATH, 'r') as f:
    dev_set = json.load(f)
with open(FEW_SHOT_POOL_PATH, 'r') as f:
    few_shot_pool = json.load(f)

# Ensure no data leakage (remove few-shot examples from dev set)
shot_ids = {item['doc_id'] for item in few_shot_pool}
clean_dev_set = [item for item in dev_set if item['doc_id'] not in shot_ids]

final_summary = []

for k in SHOTS_TO_EVALUATE:
    trial_accuracies = []
    print(f"\n>>> Starting evaluation for {k}-shot ({NUM_TRIALS} trials)")
    
    for t in range(NUM_TRIALS):
        # Create a prompt with a NEW random sample for this trial
        current_template = build_random_prompt(few_shot_pool, k)
        correct = 0
        
        for case in clean_dev_set:
            pred = parse_label(get_prediction(current_template, case['gen_text']))
            if pred == parse_label(case['label']):
                correct += 1
        
        acc = (correct / len(clean_dev_set)) * 100
        trial_accuracies.append(acc)
        print(f"   Trial {t+1}/{NUM_TRIALS}: Accuracy = {acc:.2f}%")

    # Calculate statistics for the shot count
    avg_acc = np.mean(trial_accuracies)
    std_dev = np.std(trial_accuracies)
    
    final_summary.append({
        "shots_per_label": k,
        "average_accuracy": round(avg_acc, 2),
        "std_dev": round(std_dev, 2),
        "trial_results": trial_accuracies
    })

# --- Save Results ---
output_json = "/home/mshahidul/readctrl/data/new_exp/random_trial_results.json"
with open(output_json, 'w') as f:
    json.dump(final_summary, f, indent=4)

print("\n" + "="*40)
print(f"{'Shots':<10} | {'Avg Accuracy':<15} | {'Std Dev':<10}")
print("-" * 40)
for res in final_summary:
    print(f"{res['shots_per_label']:<10} | {res['average_accuracy']:<15}% | {res['std_dev']:<10}")
print("="*40)