Upload folder using huggingface_hub
Browse files- README_model.md +70 -0
- app.py +55 -1
- requirements.txt +2 -0
README_model.md
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
language:
|
| 4 |
+
- en
|
| 5 |
+
pipeline_tag: summarization
|
| 6 |
+
tags:
|
| 7 |
+
- medical
|
| 8 |
+
- simplification
|
| 9 |
+
- health-literacy
|
| 10 |
+
- flan-t5
|
| 11 |
+
- plain-language
|
| 12 |
+
datasets:
|
| 13 |
+
- GEM/cochrane-simplification
|
| 14 |
+
- tttamayo/Med-EASi
|
| 15 |
+
base_model: google/flan-t5-base
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
# MedClear V2: Medical Text Simplification
|
| 19 |
+
|
| 20 |
+
**MedClear** translates doctor-speak into human-speak. Fine-tuned FLAN-T5-base (248M params) that simplifies clinical notes, medical terms, and discharge summaries into plain language patients can understand.
|
| 21 |
+
|
| 22 |
+
## Usage
|
| 23 |
+
|
| 24 |
+
```python
|
| 25 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 26 |
+
|
| 27 |
+
tokenizer = AutoTokenizer.from_pretrained("DTanzillo/medclear-v2-base")
|
| 28 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("DTanzillo/medclear-v2-base")
|
| 29 |
+
|
| 30 |
+
text = "simplify: Patient underwent laparoscopic cholecystectomy for acute cholecystitis. EBL minimal. POD1: afebrile, tolerating PO diet."
|
| 31 |
+
inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
|
| 32 |
+
outputs = model.generate(**inputs, max_new_tokens=256, num_beams=4)
|
| 33 |
+
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
## Training Data
|
| 37 |
+
|
| 38 |
+
23,157 examples across multiple granularity levels:
|
| 39 |
+
|
| 40 |
+
| Level | Examples | % |
|
| 41 |
+
|-------|----------|---|
|
| 42 |
+
| Terms | 4,989 | 21.5% |
|
| 43 |
+
| Phrases | 6,660 | 28.8% |
|
| 44 |
+
| Sentences | 8,000 | 34.5% |
|
| 45 |
+
| Flashcards | 2,689 | 11.6% |
|
| 46 |
+
| Paragraphs | 574 | 2.5% |
|
| 47 |
+
| RAG-augmented | 245 | 1.1% |
|
| 48 |
+
|
| 49 |
+
Key insight: 50% of training is term/phrase level. The model learns vocabulary mappings first, then composes them into simplified text.
|
| 50 |
+
|
| 51 |
+
## Results
|
| 52 |
+
|
| 53 |
+
| Metric | Raw FLAN-T5 | MedClear |
|
| 54 |
+
|--------|-------------|----------|
|
| 55 |
+
| ROUGE-1 F1 | 0.13 | **0.36** |
|
| 56 |
+
| ROUGE-2 F1 | 0.05 | **0.13** |
|
| 57 |
+
| ROUGE-L F1 | 0.10 | **0.22** |
|
| 58 |
+
| Eval Loss | -- | **1.712** |
|
| 59 |
+
|
| 60 |
+
## Limitations
|
| 61 |
+
|
| 62 |
+
- Can hallucinate on complex multi-fact clinical notes
|
| 63 |
+
- Best used with RAG pipeline (MedlinePlus) for verification
|
| 64 |
+
- Not a substitute for professional medical advice
|
| 65 |
+
|
| 66 |
+
## Demo
|
| 67 |
+
|
| 68 |
+
Try the live demo: [MedClear on HuggingFace Spaces](https://huggingface.co/spaces/DTanzillo/medclear)
|
| 69 |
+
|
| 70 |
+
**Duke University Hackathon 2026**
|
app.py
CHANGED
|
@@ -269,5 +269,59 @@ demo = gr.Interface(
|
|
| 269 |
theme=gr.themes.Soft(),
|
| 270 |
)
|
| 271 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
if __name__ == "__main__":
|
| 273 |
-
|
|
|
|
| 269 |
theme=gr.themes.Soft(),
|
| 270 |
)
|
| 271 |
|
| 272 |
+
# Mount a Flask API so the React frontend can call /api/simplify
|
| 273 |
+
import json
|
| 274 |
+
from flask import Flask, request as flask_request, jsonify
|
| 275 |
+
from flask_cors import CORS
|
| 276 |
+
|
| 277 |
+
flask_app = Flask(__name__)
|
| 278 |
+
CORS(flask_app)
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
@flask_app.route("/api/simplify", methods=["POST"])
|
| 282 |
+
def api_simplify():
|
| 283 |
+
data = flask_request.get_json()
|
| 284 |
+
if not data or "text" not in data:
|
| 285 |
+
return jsonify({"error": "Missing 'text' field"}), 400
|
| 286 |
+
|
| 287 |
+
clinical_text = data["text"]
|
| 288 |
+
plain_language, _ = simplify(clinical_text)
|
| 289 |
+
|
| 290 |
+
# Build structured annotations for React frontend
|
| 291 |
+
terms = find_terms(clinical_text)
|
| 292 |
+
annotations = []
|
| 293 |
+
for term_text, simple_def in terms:
|
| 294 |
+
pattern = re.compile(r'\b' + re.escape(term_text) + r'\b', re.IGNORECASE)
|
| 295 |
+
match = pattern.search(clinical_text)
|
| 296 |
+
if match:
|
| 297 |
+
ml = search_medlineplus(term_text)
|
| 298 |
+
ml_url = ml["url"] if ml else f"https://medlineplus.gov/search/?query={urllib.parse.quote(term_text)}"
|
| 299 |
+
ml_summary = ml["summary"] if ml else ""
|
| 300 |
+
annotations.append({
|
| 301 |
+
"term": match.group(),
|
| 302 |
+
"simple": simple_def,
|
| 303 |
+
"start": match.start(),
|
| 304 |
+
"end": match.end(),
|
| 305 |
+
"url": ml_url,
|
| 306 |
+
"medlineplus_summary": ml_summary,
|
| 307 |
+
})
|
| 308 |
+
|
| 309 |
+
annotations.sort(key=lambda x: x["start"])
|
| 310 |
+
return jsonify({
|
| 311 |
+
"input": clinical_text,
|
| 312 |
+
"plain_language": plain_language,
|
| 313 |
+
"source_annotations": annotations,
|
| 314 |
+
"output_annotations": [],
|
| 315 |
+
})
|
| 316 |
+
|
| 317 |
+
|
| 318 |
+
@flask_app.route("/api/health", methods=["GET"])
|
| 319 |
+
def api_health():
|
| 320 |
+
return jsonify({"status": "ok", "model_loaded": True})
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
# Mount Flask app inside Gradio
|
| 324 |
+
demo = gr.mount_gradio_app(flask_app, demo, path="/")
|
| 325 |
+
|
| 326 |
if __name__ == "__main__":
|
| 327 |
+
flask_app.run(host="0.0.0.0", port=7860)
|
requirements.txt
CHANGED
|
@@ -1,3 +1,5 @@
|
|
| 1 |
torch
|
| 2 |
transformers
|
| 3 |
gradio
|
|
|
|
|
|
|
|
|
| 1 |
torch
|
| 2 |
transformers
|
| 3 |
gradio
|
| 4 |
+
flask
|
| 5 |
+
flask-cors
|