| | import dspy |
| | import json |
| | import os |
| | import random |
| | from typing import Literal |
| | from dspy.teleprompt import BootstrapFewShotWithRandomSearch |
| | from dspy.evaluate import Evaluate |
| |
|
| | |
| | api_file = "/home/mshahidul/api_new.json" |
| | with open(api_file, "r") as f: |
| | api_keys = json.load(f) |
| | openai_api_key = api_keys["openai"] |
| |
|
| | |
| | vllm_model = dspy.LM( |
| | model="openai/dspy", |
| | api_base="http://172.16.34.29:8030/v1", |
| | api_key="EMPTY", |
| | temperature=0.0 |
| | ) |
| |
|
| | |
| | |
| | openai_model_teacher = dspy.LM(model="gpt-5", api_key=openai_api_key) |
| |
|
| | |
| | |
| | dspy.configure(lm=vllm_model) |
| |
|
| | class HealthLiteracySignature(dspy.Signature): |
| | """ |
| | Analyze the linguistic complexity, use of medical jargon, and sentence |
| | structure of 'generated_text' relative to 'full_text' to determine |
| | the health literacy level. |
| | """ |
| | full_text = dspy.InputField(desc="Original clinical or medical source text containing jargon and technical details.") |
| | generated_text = dspy.InputField( |
| | desc="A version of the source text rewritten for a specific audience." |
| | ) |
| | |
| | literacy_label = dspy.OutputField( |
| | desc="Classification: low_health_literacy (simple words, no jargon), intermediate_health_literacy (moderate technicality), or proficient_health_literacy (highly technical/original level)." |
| | ) |
| |
|
| | class HealthLiteracyClassifier(dspy.Module): |
| | def __init__(self): |
| | super().__init__() |
| | |
| | self.classifier = dspy.ChainOfThought(HealthLiteracySignature) |
| |
|
| | def forward(self, full_text, generated_text): |
| | return self.classifier(full_text=full_text, generated_text=generated_text) |
| |
|
| | def prepare_data(raw_data, seed=42, train_ratio=0.6): |
| | labels = [ |
| | "low_health_literacy", |
| | "intermediate_health_literacy", |
| | "proficient_health_literacy", |
| | ] |
| | rng = random.Random(seed) |
| | buckets = {label: [] for label in labels} |
| | for item in raw_data: |
| | label = item.get("label") |
| | if label not in buckets: |
| | continue |
| | example = dspy.Example( |
| | full_text=item["fulltext"], |
| | generated_text=item["diff_label_texts"], |
| | literacy_label=label, |
| | ).with_inputs("full_text", "generated_text") |
| | buckets[label].append(example) |
| |
|
| | min_count = min(len(buckets[label]) for label in labels) |
| | if min_count == 0: |
| | raise ValueError("One or more labels has no examples; cannot balance.") |
| |
|
| | per_label_total = min_count |
| | per_label_train = int(round(per_label_total * train_ratio)) |
| | per_label_train = max(1, min(per_label_train, per_label_total - 1)) |
| |
|
| | trainset = [] |
| | testset = [] |
| | for label in labels: |
| | rng.shuffle(buckets[label]) |
| | selected = buckets[label][:per_label_total] |
| | trainset.extend(selected[:per_label_train]) |
| | testset.extend(selected[per_label_train:per_label_total]) |
| |
|
| | rng.shuffle(trainset) |
| | rng.shuffle(testset) |
| | return trainset, testset |
| |
|
| |
|
| | import json |
| | path = "/home/mshahidul/readctrl/code/text_classifier/verified_combined_0-80.json" |
| | raw_data = json.load(open(path)) |
| | trainset, testset = prepare_data(raw_data) |
| |
|
| | def _example_to_dict(example): |
| | return { |
| | "full_text": example.full_text, |
| | "generated_text": example.generated_text, |
| | "literacy_label": example.literacy_label, |
| | } |
| |
|
| | def save_jsonl(path, examples): |
| | with open(path, "w") as f: |
| | for ex in examples: |
| | f.write(json.dumps(_example_to_dict(ex), ensure_ascii=False) + "\n") |
| |
|
| | train_path = "/home/mshahidul/readctrl/code/text_classifier/train.jsonl" |
| | test_path = "/home/mshahidul/readctrl/code/text_classifier/test.jsonl" |
| | save_jsonl(train_path, trainset) |
| | save_jsonl(test_path, testset) |
| |
|
| | def health_literacy_metric(gold, pred, trace=None): |
| | if not pred or not hasattr(pred, 'literacy_label'): |
| | return False |
| | |
| | gold_label = str(gold.literacy_label).strip().lower() |
| | pred_label = str(pred.literacy_label).strip().lower() |
| | |
| | |
| | return gold_label in pred_label |
| |
|
| | optimizer = BootstrapFewShotWithRandomSearch( |
| | metric=health_literacy_metric, |
| | max_bootstrapped_demos=3, |
| | num_candidate_programs=8, |
| | teacher_settings=dict(lm=openai_model_teacher) |
| | ) |
| |
|
| | |
| | compiled_classifier = optimizer.compile(HealthLiteracyClassifier(), trainset=trainset) |
| |
|
| | evaluator = Evaluate(devset=testset, metric=health_literacy_metric, num_threads=1, display_progress=True) |
| | evaluation_result = evaluator(compiled_classifier) |
| | accuracy_score = ( |
| | float(evaluation_result.score) |
| | if hasattr(evaluation_result, "score") |
| | else float(evaluation_result) |
| | ) |
| |
|
| | def _extract_usage(record): |
| | if isinstance(record, dict): |
| | usage = record.get("usage") |
| | if usage: |
| | return usage |
| | response = record.get("response") |
| | if isinstance(response, dict) and response.get("usage"): |
| | return response["usage"] |
| | return None |
| |
|
| | def calc_cost_usd(lm, price_in_per_1m, price_out_per_1m, price_cached_in_per_1m=None): |
| | prompt_tokens = 0 |
| | completion_tokens = 0 |
| | cached_tokens = 0 |
| | for record in getattr(lm, "history", []) or []: |
| | usage = _extract_usage(record) |
| | if not usage: |
| | continue |
| | prompt_tokens += int(usage.get("prompt_tokens", usage.get("input_tokens", 0)) or 0) |
| | completion_tokens += int(usage.get("completion_tokens", usage.get("output_tokens", 0)) or 0) |
| | cached_tokens += int(usage.get("cached_tokens", usage.get("prompt_tokens_cached", 0)) or 0) |
| | cost = (prompt_tokens / 1_000_000) * price_in_per_1m |
| | cost += (completion_tokens / 1_000_000) * price_out_per_1m |
| | if price_cached_in_per_1m is not None: |
| | cost += (cached_tokens / 1_000_000) * price_cached_in_per_1m |
| | return { |
| | "prompt_tokens": prompt_tokens, |
| | "completion_tokens": completion_tokens, |
| | "cached_tokens": cached_tokens, |
| | "cost_usd": cost, |
| | } |
| |
|
| | |
| | GPT5_PRICE_INPUT_PER_1M = 1.25 |
| | GPT5_PRICE_OUTPUT_PER_1M = 10.0 |
| |
|
| | teacher_cost = calc_cost_usd( |
| | openai_model_teacher, |
| | GPT5_PRICE_INPUT_PER_1M, |
| | GPT5_PRICE_OUTPUT_PER_1M, |
| | ) |
| |
|
| | cost_report = { |
| | "gpt-5": teacher_cost, |
| | } |
| | folder_name = "vllm-qwen3-8b_teacher-gpt5_v1" |
| | os.makedirs(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}", exist_ok=True) |
| | compiled_classifier.save(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}/model.json") |
| |
|
| | print(evaluation_result) |
| |
|
| | with open(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}/accuracy.json", "w") as f: |
| | json.dump( |
| | { |
| | "accuracy_score": accuracy_score, |
| | "num_results": len(getattr(evaluation_result, "results", []) or []), |
| | }, |
| | f, |
| | indent=2, |
| | ) |
| | print(json.dumps(cost_report, indent=2)) |
| | with open(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}/cost.json", "w") as f: |
| | json.dump(cost_report, f, indent=2) |