File size: 1,499 Bytes
1db7196 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 | def readability_training_prompt_with_human(full_text, gold_summary, generated_text, human_score):
"""
Create a training prompt for evaluating readability based on human-assigned scores.
full_text: original medical text
gold_summary: human-written summary
generated_text: model-generated text
human_score: integer from 1 to 5 (human-evaluated readability)
Returns a conversation-style dictionary suitable for training an LLM.
"""
system_prompt = f"""
You are a medical readability evaluator.
Your task is to assess the readability of the GENERATED TEXT for a general audience.
You are given:
- FULL TEXT: {full_text}
- GOLD SUMMARY: {gold_summary}
- GENERATED TEXT: {generated_text}
Use the FULL TEXT and GOLD SUMMARY only as context. Evaluate ONLY the GENERATED TEXT.
Rate readability on a scale from 1 to 5:
1 = Very easy (child-friendly, minimal medical language)
2 = Easy
3 = Medium
4 = Hard
5 = Very hard (requires medical knowledge)
Do NOT evaluate factual correctness.
Do NOT compare writing quality.
Focus ONLY on readability.
### Output Format (STRICT JSON)
Return a valid JSON object with the following fields:
{{
"readability_score": {human_score},
}}
Do NOT include any text outside the JSON.
"""
conversation = {}
conversation['conversations'] = (
{'from': "user", 'content': system_prompt},
{'from': "assistant", 'content': f'Human-assigned score: {human_score}'},
)
return conversation
|