import json import os import dspy from dspy.evaluate import Evaluate LLM_CPP_API_BASE = os.environ.get("LLM_CPP_API_BASE", "http://172.16.34.21:8034/v1") MODEL_PATH = ( "/home/mshahidul/readctrl/code/text_classifier/dspy_model/vllm-Meta-Llama-3.1-8B-Instruct_teacher-gpt5_v1/model.json" ) TEST_PATH = "/home/mshahidul/readctrl/code/text_classifier/test.jsonl" llama_cpp_lm = dspy.LM( model="openai/dspy", api_base=LLM_CPP_API_BASE, api_key="EMPTY", temperature=0.0, ) dspy.configure(lm=llama_cpp_lm) class HealthLiteracySignature(dspy.Signature): """ Analyze the linguistic complexity, use of medical jargon, and sentence structure of 'generated_text' to determine the health literacy level. """ generated_text = dspy.InputField( desc="A version of the source text rewritten for a specific audience." ) literacy_label = dspy.OutputField( desc=( "Classification: low_health_literacy (simple words, no jargon), " "intermediate_health_literacy (moderate technicality), or " "proficient_health_literacy (highly technical/original level)." ) ) class HealthLiteracyClassifier(dspy.Module): def __init__(self): super().__init__() self.classifier = dspy.ChainOfThought(HealthLiteracySignature) def forward(self, generated_text): return self.classifier(generated_text=generated_text) def load_testset(path): examples = [] with open(path, "r") as f: for line in f: if not line.strip(): continue record = json.loads(line) example = dspy.Example( generated_text=record["generated_text"], literacy_label=record["literacy_label"], ).with_inputs("generated_text") examples.append(example) return examples def health_literacy_metric(gold, pred, trace=None): if not pred or not hasattr(pred, "literacy_label"): return False gold_label = str(gold.literacy_label).strip().lower() pred_label = str(pred.literacy_label).strip().lower() return gold_label in pred_label def load_compiled_classifier(path): if hasattr(dspy, "load"): try: return dspy.load(path) except Exception: pass classifier = HealthLiteracyClassifier() try: classifier.load(path) except Exception as exc: raise RuntimeError(f"Failed to load compiled model from {path}") from exc return classifier def main(): if not os.path.exists(MODEL_PATH): raise FileNotFoundError(f"Model file not found: {MODEL_PATH}") if not os.path.exists(TEST_PATH): raise FileNotFoundError(f"Test file not found: {TEST_PATH}") testset = load_testset(TEST_PATH) compiled_classifier = load_compiled_classifier(MODEL_PATH) evaluator = Evaluate( devset=testset, metric=health_literacy_metric, num_threads=1, display_progress=True, ) evaluation_result = evaluator(compiled_classifier) accuracy_score = ( float(evaluation_result.score) if hasattr(evaluation_result, "score") else float(evaluation_result) ) print(evaluation_result) print(f"accuracy_score: {accuracy_score}") if __name__ == "__main__": main()