readctrl / code /text_classifier /text_classifier_dspy_vllm_test_cpp.py

Add files using upload-large-folder tool

1db7196 verified 27 days ago

3.34 kB

	import json
	import os

	import dspy
	from dspy.evaluate import Evaluate


	LLM_CPP_API_BASE = os.environ.get("LLM_CPP_API_BASE", "http://172.16.34.21:8034/v1")
	MODEL_PATH = (
	"/home/mshahidul/readctrl/code/text_classifier/dspy_model/vllm-Meta-Llama-3.1-8B-Instruct_teacher-gpt5_v1/model.json"
	)
	TEST_PATH = "/home/mshahidul/readctrl/code/text_classifier/test.jsonl"


	llama_cpp_lm = dspy.LM(
	model="openai/dspy",
	api_base=LLM_CPP_API_BASE,
	api_key="EMPTY",
	temperature=0.0,
	)
	dspy.configure(lm=llama_cpp_lm)


	class HealthLiteracySignature(dspy.Signature):
	"""
	Analyze the linguistic complexity, use of medical jargon, and sentence
	structure of 'generated_text' to determine the health literacy level.
	"""

	generated_text = dspy.InputField(
	desc="A version of the source text rewritten for a specific audience."
	)
	literacy_label = dspy.OutputField(
	desc=(
	"Classification: low_health_literacy (simple words, no jargon), "
	"intermediate_health_literacy (moderate technicality), or "
	"proficient_health_literacy (highly technical/original level)."
	)
	)


	class HealthLiteracyClassifier(dspy.Module):
	def __init__(self):
	super().__init__()
	self.classifier = dspy.ChainOfThought(HealthLiteracySignature)

	def forward(self, generated_text):
	return self.classifier(generated_text=generated_text)


	def load_testset(path):
	examples = []
	with open(path, "r") as f:
	for line in f:
	if not line.strip():
	continue
	record = json.loads(line)
	example = dspy.Example(
	generated_text=record["generated_text"],
	literacy_label=record["literacy_label"],
	).with_inputs("generated_text")
	examples.append(example)
	return examples


	def health_literacy_metric(gold, pred, trace=None):
	if not pred or not hasattr(pred, "literacy_label"):
	return False

	gold_label = str(gold.literacy_label).strip().lower()
	pred_label = str(pred.literacy_label).strip().lower()
	return gold_label in pred_label


	def load_compiled_classifier(path):
	if hasattr(dspy, "load"):
	try:
	return dspy.load(path)
	except Exception:
	pass
	classifier = HealthLiteracyClassifier()
	try:
	classifier.load(path)
	except Exception as exc:
	raise RuntimeError(f"Failed to load compiled model from {path}") from exc
	return classifier


	def main():
	if not os.path.exists(MODEL_PATH):
	raise FileNotFoundError(f"Model file not found: {MODEL_PATH}")
	if not os.path.exists(TEST_PATH):
	raise FileNotFoundError(f"Test file not found: {TEST_PATH}")

	testset = load_testset(TEST_PATH)
	compiled_classifier = load_compiled_classifier(MODEL_PATH)

	evaluator = Evaluate(
	devset=testset,
	metric=health_literacy_metric,
	num_threads=1,
	display_progress=True,
	)
	evaluation_result = evaluator(compiled_classifier)
	accuracy_score = (
	float(evaluation_result.score)
	if hasattr(evaluation_result, "score")
	else float(evaluation_result)
	)
	print(evaluation_result)
	print(f"accuracy_score: {accuracy_score}")


	if __name__ == "__main__":
	main()