import json
import os

import dspy
from dspy.evaluate import Evaluate


LLM_CPP_API_BASE = os.environ.get("LLM_CPP_API_BASE", "http://172.16.34.21:8034/v1")
MODEL_PATH = (
    "/home/mshahidul/readctrl/code/text_classifier/dspy_model/vllm-Meta-Llama-3.1-8B-Instruct_teacher-gpt5_v1/model.json"
)
TEST_PATH = "/home/mshahidul/readctrl/code/text_classifier/test.jsonl"


llama_cpp_lm = dspy.LM(
    model="openai/dspy",
    api_base=LLM_CPP_API_BASE,
    api_key="EMPTY",
    temperature=0.0,
)
dspy.configure(lm=llama_cpp_lm)


class HealthLiteracySignature(dspy.Signature):
    """
    Analyze the linguistic complexity, use of medical jargon, and sentence
    structure of 'generated_text' to determine the health literacy level.
    """

    generated_text = dspy.InputField(
        desc="A version of the source text rewritten for a specific audience."
    )
    literacy_label = dspy.OutputField(
        desc=(
            "Classification: low_health_literacy (simple words, no jargon), "
            "intermediate_health_literacy (moderate technicality), or "
            "proficient_health_literacy (highly technical/original level)."
        )
    )


class HealthLiteracyClassifier(dspy.Module):
    def __init__(self):
        super().__init__()
        self.classifier = dspy.ChainOfThought(HealthLiteracySignature)

    def forward(self, generated_text):
        return self.classifier(generated_text=generated_text)


def load_testset(path):
    examples = []
    with open(path, "r") as f:
        for line in f:
            if not line.strip():
                continue
            record = json.loads(line)
            example = dspy.Example(
                generated_text=record["generated_text"],
                literacy_label=record["literacy_label"],
            ).with_inputs("generated_text")
            examples.append(example)
    return examples


def health_literacy_metric(gold, pred, trace=None):
    if not pred or not hasattr(pred, "literacy_label"):
        return False

    gold_label = str(gold.literacy_label).strip().lower()
    pred_label = str(pred.literacy_label).strip().lower()
    return gold_label in pred_label


def load_compiled_classifier(path):
    if hasattr(dspy, "load"):
        try:
            return dspy.load(path)
        except Exception:
            pass
    classifier = HealthLiteracyClassifier()
    try:
        classifier.load(path)
    except Exception as exc:
        raise RuntimeError(f"Failed to load compiled model from {path}") from exc
    return classifier


def main():
    if not os.path.exists(MODEL_PATH):
        raise FileNotFoundError(f"Model file not found: {MODEL_PATH}")
    if not os.path.exists(TEST_PATH):
        raise FileNotFoundError(f"Test file not found: {TEST_PATH}")

    testset = load_testset(TEST_PATH)
    compiled_classifier = load_compiled_classifier(MODEL_PATH)

    evaluator = Evaluate(
        devset=testset,
        metric=health_literacy_metric,
        num_threads=1,
        display_progress=True,
    )
    evaluation_result = evaluator(compiled_classifier)
    accuracy_score = (
        float(evaluation_result.score)
        if hasattr(evaluation_result, "score")
        else float(evaluation_result)
    )
    print(evaluation_result)
    print(f"accuracy_score: {accuracy_score}")


if __name__ == "__main__":
    main()