Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gradio/certificate.pem +31 -0
- assignment_sc_2/assignment_documentation.md +250 -0
- assignment_sc_2/code.py +320 -0
- assignment_sc_2/rubric_points_explanation.md +128 -0
- assignment_sc_2/rubric_points_explanation.pdf +137 -0
- code/attribution_eval.py +142 -0
- code/attribution_evalV2.py +222 -0
- code/combine_docid_labels.py +232 -0
- code/convert_awq.py +35 -0
- code/finetune-inference/convert_fp16.py +60 -0
- code/interface/annotators_v5.py +266 -0
- code/interface/annotators_v5_tran_quality.py +198 -0
- code/interface/instr +107 -0
- code/interface/instructions +43 -0
- code/interface/interface_correction_data.py +210 -0
- code/interface/t.py +8 -0
- code/interface/translate_gemma.py +78 -0
- code/interface/translation_quality.py +253 -0
- code/interface/translation_quality_v2.py +251 -0
- code/interface/vllm_app.py +46 -0
- code/interface/vllm_app_v2.py +115 -0
- code/key_subclaims_extract.py +109 -0
- code/literacy_thresholds.py +178 -0
- code/literacy_thresholds_v2.py +174 -0
- code/old/FH_es.py +86 -0
- code/old/FH_esV2.py +39 -0
- code/old/FH_fr.py +86 -0
- code/old/FH_pt.py +87 -0
- code/old/generate_thinking_data.ipynb +442 -0
- code/old/readability_controlv2.py +69 -0
- code/old/resonability_check_completeness_openai_V2.py +140 -0
- code/old/resonability_check_completeness_openai_V3.py +140 -0
- code/old/synthetic_data_generationV3.py +348 -0
- code/old/sz_es.py +68 -0
- code/rc.py +44 -0
- code/readability_final_res_process.ipynb +349 -0
- code/test.ipynb +64 -0
- code/text_classifier/dspy.ipynb +224 -0
- code/text_classifier/qwen3_(4b)_instruct.py +146 -0
- code/text_classifier/test_saved_dspy_vllm_gen_text_only.py +193 -0
- code/text_classifier/text_classifier_dspy.py +216 -0
- code/text_classifier/text_classifier_dspy_load_and_infer_full.py +353 -0
- code/text_classifier/text_classifier_dspy_only_gen_text.py +212 -0
- code/text_classifier/text_classifier_dspy_vllm.py +207 -0
- code/text_classifier/text_classifier_dspy_vllm_gen_text_only.py +203 -0
- code/text_classifier/text_classifier_dspy_vllm_test_cpp.py +115 -0
- code/translation_quality_check/calc_comet_bertscore_from_jsonl.py +274 -0
- code/translation_quality_check/eval_gpt52_translation.py +438 -0
- code/validation/data_gen_subclaims_support_valid_ch_gpt5.py +56 -0
- code/validation/subclaims_extr_valid_check_gpt5.py +56 -0
.gradio/certificate.pem
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
-----BEGIN CERTIFICATE-----
|
| 2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
| 3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
| 4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
| 5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
| 6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
| 7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
| 8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
| 9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
| 10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
| 11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
| 12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
| 13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
| 14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
| 15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
| 16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
| 17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
| 18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
| 19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
| 20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
| 21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
| 22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
| 23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
| 24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
| 25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
| 26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
| 27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
| 28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
| 29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
| 30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
| 31 |
+
-----END CERTIFICATE-----
|
assignment_sc_2/assignment_documentation.md
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Text-Attributed Network Analysis Documentation
|
| 2 |
+
|
| 3 |
+
This document explains how the implementation in `assignment_sc_2/code.py` addresses the assignment requirements and grading rubric.
|
| 4 |
+
|
| 5 |
+
## 1. Objective
|
| 6 |
+
|
| 7 |
+
The assignment analyzes a network of research papers where:
|
| 8 |
+
|
| 9 |
+
- each node is a paper with metadata (`id`, `year`, `authors`, `title`, `abstract`),
|
| 10 |
+
- each edge represents semantic similarity between two papers,
|
| 11 |
+
- edge `weight` indicates tie strength (higher weight = stronger topical similarity).
|
| 12 |
+
|
| 13 |
+
The code loads `aclbib.graphml`, extracts the Largest Connected Component (LCC), and performs:
|
| 14 |
+
|
| 15 |
+
- weak/strong tie removal analysis,
|
| 16 |
+
- centrality analysis,
|
| 17 |
+
- centrality ranking correlation analysis,
|
| 18 |
+
- optional temporal topic-shift analysis.
|
| 19 |
+
|
| 20 |
+
---
|
| 21 |
+
|
| 22 |
+
## 2. Rubric Coverage Summary
|
| 23 |
+
|
| 24 |
+
### (Part 2, 30%) Weak/Strong Ties and LCC Dynamics
|
| 25 |
+
|
| 26 |
+
Covered in `weaktie_analysis(LCC)`:
|
| 27 |
+
|
| 28 |
+
- ties are ordered by weight to represent weak-to-strong and strong-to-weak removal,
|
| 29 |
+
- two experiments are run:
|
| 30 |
+
- removing weakest ties first,
|
| 31 |
+
- removing strongest ties first,
|
| 32 |
+
- after each single edge removal, LCC size is recomputed,
|
| 33 |
+
- x-axis is fraction of ties removed,
|
| 34 |
+
- y-axis is LCC size (number of nodes).
|
| 35 |
+
|
| 36 |
+
Note: The implementation uses rank-based weak/strong definitions (by sorted weights). If explicit threshold-based counts are required by instructor policy, add a threshold rule (e.g., bottom/top quartile) and print those counts.
|
| 37 |
+
|
| 38 |
+
### (Part 2, 35%) Centrality + Central Papers + Correlation + Interpretation
|
| 39 |
+
|
| 40 |
+
Covered in `centrality_analysis(LCC)`:
|
| 41 |
+
|
| 42 |
+
- computes degree, closeness, and betweenness centrality,
|
| 43 |
+
- identifies top 10 papers for each metric,
|
| 44 |
+
- outputs entries in `ID<TAB>Title` format,
|
| 45 |
+
- converts centrality scores to ranking vectors,
|
| 46 |
+
- computes Pearson correlation between metric rankings,
|
| 47 |
+
- prints a correlation table,
|
| 48 |
+
- identifies the lowest-correlation pair,
|
| 49 |
+
- provides interpretation grounded in metric definitions.
|
| 50 |
+
|
| 51 |
+
### (Part 2, 10%) Report Quality
|
| 52 |
+
|
| 53 |
+
This markdown report provides:
|
| 54 |
+
|
| 55 |
+
- clear method descriptions,
|
| 56 |
+
- consistent structure by rubric item,
|
| 57 |
+
- direct mapping from requirements to implementation,
|
| 58 |
+
- interpretation guidance and limitations.
|
| 59 |
+
|
| 60 |
+
### (Part 2, Optional Extra Credit, 50%) Research Evolution Analysis
|
| 61 |
+
|
| 62 |
+
Covered in `research_evolution_analysis(G)`:
|
| 63 |
+
|
| 64 |
+
- splits papers into before-2023 and after-2023 groups,
|
| 65 |
+
- tokenizes title + abstract,
|
| 66 |
+
- builds a shared global dictionary (vocabulary),
|
| 67 |
+
- trains LDA models for both groups using same vocabulary,
|
| 68 |
+
- obtains comparable topic-term matrices:
|
| 69 |
+
- `D` for pre-2023,
|
| 70 |
+
- `S` for post-2023,
|
| 71 |
+
- computes topic shift using cosine similarity,
|
| 72 |
+
- ranks potentially disappearing and emerging themes,
|
| 73 |
+
- prints top words for contextual interpretation.
|
| 74 |
+
|
| 75 |
+
---
|
| 76 |
+
|
| 77 |
+
## 3. Detailed Methodology
|
| 78 |
+
|
| 79 |
+
## 3.1 Data Loading and LCC Extraction
|
| 80 |
+
|
| 81 |
+
1. Load graph from `aclbib.graphml`.
|
| 82 |
+
2. Extract the largest connected component:
|
| 83 |
+
- this ensures path-based metrics (closeness, betweenness) are meaningful and comparable.
|
| 84 |
+
|
| 85 |
+
---
|
| 86 |
+
|
| 87 |
+
## 3.2 Weak vs Strong Tie Analysis
|
| 88 |
+
|
| 89 |
+
### Definitions
|
| 90 |
+
|
| 91 |
+
- Weak ties: lower edge weights (lower semantic similarity).
|
| 92 |
+
- Strong ties: higher edge weights (higher semantic similarity).
|
| 93 |
+
|
| 94 |
+
### Procedure
|
| 95 |
+
|
| 96 |
+
1. Sort edges by weight ascending (`weak -> strong`).
|
| 97 |
+
2. Create reversed order (`strong -> weak`).
|
| 98 |
+
3. For each removal order:
|
| 99 |
+
- remove one edge at a time,
|
| 100 |
+
- recompute LCC size after each removal,
|
| 101 |
+
- record:
|
| 102 |
+
- fraction removed = removed_edges / total_edges,
|
| 103 |
+
- LCC size = number of nodes in current largest connected component.
|
| 104 |
+
4. Plot both removal curves.
|
| 105 |
+
|
| 106 |
+
### What this shows
|
| 107 |
+
|
| 108 |
+
- If removing weak ties first rapidly fragments the network, weak ties are acting as bridges.
|
| 109 |
+
- If removing strong ties first causes larger impact, strong ties are most critical to global cohesion.
|
| 110 |
+
|
| 111 |
+
---
|
| 112 |
+
|
| 113 |
+
## 3.3 Centrality Analysis
|
| 114 |
+
|
| 115 |
+
### Metrics
|
| 116 |
+
|
| 117 |
+
- Degree centrality: local connectivity prominence.
|
| 118 |
+
- Closeness centrality: global proximity to all nodes.
|
| 119 |
+
- Betweenness centrality: control over shortest-path flow.
|
| 120 |
+
|
| 121 |
+
### Output
|
| 122 |
+
|
| 123 |
+
- Top 10 papers for each metric, as `ID<TAB>Title`.
|
| 124 |
+
- These lists identify influential papers under different notions of centrality.
|
| 125 |
+
|
| 126 |
+
---
|
| 127 |
+
|
| 128 |
+
## 3.4 Correlation Between Centrality Rankings
|
| 129 |
+
|
| 130 |
+
The assignment requests correlation between rankings, not raw centrality values.
|
| 131 |
+
|
| 132 |
+
### Procedure
|
| 133 |
+
|
| 134 |
+
1. Convert each metric score map into rank vector (rank 1 = highest centrality).
|
| 135 |
+
2. Compute Pearson correlation for each pair:
|
| 136 |
+
- Degree vs Closeness,
|
| 137 |
+
- Degree vs Betweenness,
|
| 138 |
+
- Closeness vs Betweenness.
|
| 139 |
+
3. Build and print correlation table.
|
| 140 |
+
4. Find lowest-correlation pair and print interpretation.
|
| 141 |
+
|
| 142 |
+
### Interpretation principle
|
| 143 |
+
|
| 144 |
+
Low correlation occurs when two metrics encode different structural roles, e.g.:
|
| 145 |
+
|
| 146 |
+
- local popularity (degree) vs bridge control (betweenness),
|
| 147 |
+
- global distance efficiency (closeness) vs brokerage roles (betweenness).
|
| 148 |
+
|
| 149 |
+
---
|
| 150 |
+
|
| 151 |
+
## 3.5 Optional Extra Credit: Research Evolution
|
| 152 |
+
|
| 153 |
+
### Goal
|
| 154 |
+
|
| 155 |
+
Trace thematic shifts in research trends before and after 2023.
|
| 156 |
+
|
| 157 |
+
### Procedure
|
| 158 |
+
|
| 159 |
+
1. Split nodes by publication year:
|
| 160 |
+
- before 2023,
|
| 161 |
+
- 2023 and later.
|
| 162 |
+
2. Build documents from title + abstract.
|
| 163 |
+
3. Tokenize and clean text.
|
| 164 |
+
4. Create one shared vocabulary dictionary for both groups.
|
| 165 |
+
5. Train two LDA models (same vocabulary, separate corpora).
|
| 166 |
+
6. Extract topic-term matrices:
|
| 167 |
+
- `D` (pre-2023),
|
| 168 |
+
- `S` (post-2023).
|
| 169 |
+
7. Compute shift score for each topic:
|
| 170 |
+
- shift = `1 - max cosine similarity` to any topic in opposite period.
|
| 171 |
+
8. Rank:
|
| 172 |
+
- pre-2023 topics with highest shift (potentially disappearing),
|
| 173 |
+
- post-2023 topics with highest shift (potentially emerging).
|
| 174 |
+
9. Print top words for each ranked topic.
|
| 175 |
+
|
| 176 |
+
### Why this is valid
|
| 177 |
+
|
| 178 |
+
- Shared vocabulary ensures `D` and `S` are directly comparable.
|
| 179 |
+
- Cosine similarity captures semantic overlap between topic distributions.
|
| 180 |
+
- Ranking by shift provides interpretable emergence/disappearance candidates.
|
| 181 |
+
|
| 182 |
+
---
|
| 183 |
+
|
| 184 |
+
## 4. Observed Results from Current Run
|
| 185 |
+
|
| 186 |
+
The following results were generated by running:
|
| 187 |
+
|
| 188 |
+
`python /home/mshahidul/readctrl/assignment_sc_2/code.py`
|
| 189 |
+
|
| 190 |
+
### 4.1 Network and LCC Summary
|
| 191 |
+
|
| 192 |
+
- LCC contains `1662` nodes and `26134` edges.
|
| 193 |
+
- This indicates analysis is performed on a large connected core, suitable for centrality and connectivity experiments.
|
| 194 |
+
|
| 195 |
+
### 4.2 Centrality Correlation Results
|
| 196 |
+
|
| 197 |
+
Pearson correlation between centrality rankings:
|
| 198 |
+
|
| 199 |
+
| Metric | Degree | Closeness | Betweenness |
|
| 200 |
+
|---|---:|---:|---:|
|
| 201 |
+
| Degree | 1.0000 | 0.9361 | 0.8114 |
|
| 202 |
+
| Closeness | 0.9361 | 1.0000 | 0.7684 |
|
| 203 |
+
| Betweenness | 0.8114 | 0.7684 | 1.0000 |
|
| 204 |
+
|
| 205 |
+
- Lowest-correlation pair: **Closeness vs Betweenness** (`r = 0.7684`).
|
| 206 |
+
- Interpretation: closeness captures global proximity, while betweenness captures shortest-path brokerage; these are related but not identical structural roles.
|
| 207 |
+
|
| 208 |
+
### 4.3 Central Papers (Top-10) Highlights
|
| 209 |
+
|
| 210 |
+
Across Degree, Closeness, and Betweenness top-10 lists, several papers repeatedly appear, including:
|
| 211 |
+
|
| 212 |
+
- `ahuja-etal-2023-mega` (`{MEGA}: Multilingual Evaluation of Generative {AI}`),
|
| 213 |
+
- `ding-etal-2020-discriminatively`,
|
| 214 |
+
- `shin-etal-2020-autoprompt`,
|
| 215 |
+
- `weller-etal-2020-learning`,
|
| 216 |
+
- `qin-etal-2023-chatgpt`.
|
| 217 |
+
|
| 218 |
+
This overlap suggests robust influence of these papers across local connectivity, global accessibility, and bridge-like structural importance.
|
| 219 |
+
|
| 220 |
+
### 4.4 Optional Topic Evolution Results
|
| 221 |
+
|
| 222 |
+
Topic matrices:
|
| 223 |
+
|
| 224 |
+
- `D` (before 2023): shape `(5, 5000)`
|
| 225 |
+
- `S` (after 2023): shape `(5, 5000)`
|
| 226 |
+
|
| 227 |
+
Top potentially disappearing theme example:
|
| 228 |
+
|
| 229 |
+
- Before Topic 4, shift `0.1912`, keywords:
|
| 230 |
+
`question, knowledge, event, performance, questions, task, graph, can`
|
| 231 |
+
|
| 232 |
+
Top potentially emerging theme example:
|
| 233 |
+
|
| 234 |
+
- After Topic 2, shift `0.1989`, keywords:
|
| 235 |
+
`llms, large, data, tasks, knowledge, reasoning, generation, performance`
|
| 236 |
+
|
| 237 |
+
Interpretation: post-2023 topics show stronger emphasis on **LLMs**, reasoning, and generation-centered trends.
|
| 238 |
+
|
| 239 |
+
---
|
| 240 |
+
|
| 241 |
+
## 5. Limitations and Practical Notes
|
| 242 |
+
|
| 243 |
+
- Weak/strong tie counts are currently implicit via sorted order; explicit threshold-based counts can be added if required.
|
| 244 |
+
- Topic modeling quality depends on preprocessing and corpus size.
|
| 245 |
+
- Interpretation quality in final report should connect output topics/central papers to real NLP/AI trends for stronger grading.
|
| 246 |
+
|
| 247 |
+
---
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
|
assignment_sc_2/code.py
ADDED
|
@@ -0,0 +1,320 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Author: Md. Shahidul Salim
|
| 2 |
+
# Date: February 12, 2026
|
| 3 |
+
|
| 4 |
+
import networkx as nx
|
| 5 |
+
import pandas as pd
|
| 6 |
+
from scipy.stats import pearsonr
|
| 7 |
+
import numpy as np
|
| 8 |
+
import matplotlib.pyplot as plt
|
| 9 |
+
|
| 10 |
+
# Extra credit imports
|
| 11 |
+
from gensim.models.ldamodel import LdaModel
|
| 12 |
+
from gensim.corpora.dictionary import Dictionary
|
| 13 |
+
import nltk
|
| 14 |
+
from nltk.tokenize import word_tokenize
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# Ensure NLTK resources are available
|
| 18 |
+
try:
|
| 19 |
+
nltk.data.find("tokenizers/punkt")
|
| 20 |
+
except LookupError:
|
| 21 |
+
nltk.download("punkt", quiet=True)
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
nltk.data.find("tokenizers/punkt_tab")
|
| 25 |
+
except LookupError:
|
| 26 |
+
# Required by some NLTK versions.
|
| 27 |
+
nltk.download("punkt_tab", quiet=True)
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _safe_int_year(value):
|
| 31 |
+
try:
|
| 32 |
+
return int(value)
|
| 33 |
+
except (TypeError, ValueError):
|
| 34 |
+
return 0
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _rank_vector(scores, node_order):
|
| 38 |
+
"""
|
| 39 |
+
Convert centrality scores to rank vectors (1 = highest centrality),
|
| 40 |
+
which matches the assignment requirement to correlate rankings.
|
| 41 |
+
"""
|
| 42 |
+
series = pd.Series({node: scores[node] for node in node_order})
|
| 43 |
+
ranks = series.rank(method="average", ascending=False)
|
| 44 |
+
return [float(ranks[node]) for node in node_order]
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _tokenize(text):
|
| 48 |
+
tokens = word_tokenize(text.lower())
|
| 49 |
+
return [tok for tok in tokens if tok.isalpha() and len(tok) > 2]
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# Part 1: Weak Tie Analysis
|
| 53 |
+
def weaktie_analysis(LCC):
|
| 54 |
+
print("\n--- Starting Weak/Strong Tie Analysis ---")
|
| 55 |
+
edges_asc = sorted(
|
| 56 |
+
LCC.edges(data=True), key=lambda x: float(x[2].get("weight", 0.0))
|
| 57 |
+
)
|
| 58 |
+
edges_desc = list(reversed(edges_asc))
|
| 59 |
+
edge_weights = [float(data.get("weight", 0.0)) for _, _, data in edges_asc]
|
| 60 |
+
total_edges = len(edge_weights)
|
| 61 |
+
|
| 62 |
+
if total_edges == 0:
|
| 63 |
+
print("No ties found in the LCC; skipping weak/strong tie removal analysis.")
|
| 64 |
+
return
|
| 65 |
+
|
| 66 |
+
# Use median edge weight as the cutoff:
|
| 67 |
+
# weak ties: weight <= median, strong ties: weight > median.
|
| 68 |
+
median_weight = float(np.median(edge_weights))
|
| 69 |
+
weak_ties = [(u, v, d) for u, v, d in edges_asc if float(d.get("weight", 0.0)) <= median_weight]
|
| 70 |
+
strong_ties = [(u, v, d) for u, v, d in edges_asc if float(d.get("weight", 0.0)) > median_weight]
|
| 71 |
+
|
| 72 |
+
print(f"Total ties in LCC: {total_edges}")
|
| 73 |
+
print(f"Weak tie threshold (median weight): {median_weight:.4f}")
|
| 74 |
+
print(f"Number of weak ties (weight <= {median_weight:.4f}): {len(weak_ties)}")
|
| 75 |
+
print(f"Number of strong ties (weight > {median_weight:.4f}): {len(strong_ties)}")
|
| 76 |
+
print("Methodology: remove one tie per step and recompute LCC size after each removal.")
|
| 77 |
+
|
| 78 |
+
def get_lcc_sizes_by_single_removal(edge_list):
|
| 79 |
+
temp_graph = LCC.copy()
|
| 80 |
+
total_edges = len(edge_list)
|
| 81 |
+
fractions_removed = [0.0]
|
| 82 |
+
lcc_sizes = [len(max(nx.connected_components(temp_graph), key=len))]
|
| 83 |
+
|
| 84 |
+
for idx, (u, v, _) in enumerate(edge_list, start=1):
|
| 85 |
+
if temp_graph.has_edge(u, v):
|
| 86 |
+
temp_graph.remove_edge(u, v)
|
| 87 |
+
|
| 88 |
+
if temp_graph.number_of_nodes() > 0:
|
| 89 |
+
current_lcc = max(nx.connected_components(temp_graph), key=len)
|
| 90 |
+
lcc_sizes.append(len(current_lcc))
|
| 91 |
+
else:
|
| 92 |
+
lcc_sizes.append(0)
|
| 93 |
+
fractions_removed.append(idx / total_edges)
|
| 94 |
+
|
| 95 |
+
return fractions_removed, lcc_sizes
|
| 96 |
+
|
| 97 |
+
x_weak, y_weak = get_lcc_sizes_by_single_removal(edges_asc)
|
| 98 |
+
x_strong, y_strong = get_lcc_sizes_by_single_removal(edges_desc)
|
| 99 |
+
|
| 100 |
+
plt.figure(figsize=(10, 6))
|
| 101 |
+
plt.plot(x_weak, y_weak, label="Removing Weakest First")
|
| 102 |
+
plt.plot(x_strong, y_strong, label="Removing Strongest First")
|
| 103 |
+
plt.xlabel("Fraction of Ties Removed")
|
| 104 |
+
plt.ylabel("LCC Size (Number of Nodes)")
|
| 105 |
+
plt.title("Impact of Weak vs Strong Tie Removal on LCC")
|
| 106 |
+
plt.legend()
|
| 107 |
+
plt.grid(True, linestyle="--", alpha=0.7)
|
| 108 |
+
plt.tight_layout()
|
| 109 |
+
plt.show()
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
# Part 2: Centrality Analysis
|
| 113 |
+
def centrality_analysis(LCC):
|
| 114 |
+
print("\n--- Starting Centrality Analysis ---")
|
| 115 |
+
|
| 116 |
+
degree = nx.degree_centrality(LCC)
|
| 117 |
+
closeness = nx.closeness_centrality(LCC)
|
| 118 |
+
betweenness = nx.betweenness_centrality(LCC)
|
| 119 |
+
|
| 120 |
+
nodes = list(LCC.nodes())
|
| 121 |
+
d_rank = _rank_vector(degree, nodes)
|
| 122 |
+
c_rank = _rank_vector(closeness, nodes)
|
| 123 |
+
b_rank = _rank_vector(betweenness, nodes)
|
| 124 |
+
|
| 125 |
+
corr_dc, _ = pearsonr(d_rank, c_rank)
|
| 126 |
+
corr_db, _ = pearsonr(d_rank, b_rank)
|
| 127 |
+
corr_cb, _ = pearsonr(c_rank, b_rank)
|
| 128 |
+
|
| 129 |
+
print("\nTable 1: Pearson Correlation between Centrality Measure Rankings")
|
| 130 |
+
table = pd.DataFrame(
|
| 131 |
+
{
|
| 132 |
+
"Metric": ["Degree", "Closeness", "Betweenness"],
|
| 133 |
+
"Degree": [1.0, corr_dc, corr_db],
|
| 134 |
+
"Closeness": [corr_dc, 1.0, corr_cb],
|
| 135 |
+
"Betweenness": [corr_db, corr_cb, 1.0],
|
| 136 |
+
}
|
| 137 |
+
)
|
| 138 |
+
print(table.to_string(index=False, float_format=lambda x: f"{x:.4f}"))
|
| 139 |
+
|
| 140 |
+
pair_corr = {
|
| 141 |
+
("Degree", "Closeness"): corr_dc,
|
| 142 |
+
("Degree", "Betweenness"): corr_db,
|
| 143 |
+
("Closeness", "Betweenness"): corr_cb,
|
| 144 |
+
}
|
| 145 |
+
lowest_pair, lowest_value = min(pair_corr.items(), key=lambda x: x[1])
|
| 146 |
+
highest_pair, highest_value = max(pair_corr.items(), key=lambda x: x[1])
|
| 147 |
+
print(
|
| 148 |
+
f"\nLowest-correlation pair: {lowest_pair[0]} vs {lowest_pair[1]} "
|
| 149 |
+
f"(r = {lowest_value:.4f})"
|
| 150 |
+
)
|
| 151 |
+
print(
|
| 152 |
+
f"Highest-correlation pair: {highest_pair[0]} vs {highest_pair[1]} "
|
| 153 |
+
f"(r = {highest_value:.4f})"
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
explanations = {
|
| 157 |
+
frozenset(("Degree", "Closeness")): (
|
| 158 |
+
"Degree is local (immediate neighbors), while closeness captures "
|
| 159 |
+
"global shortest-path proximity to all nodes."
|
| 160 |
+
),
|
| 161 |
+
frozenset(("Degree", "Betweenness")): (
|
| 162 |
+
"High degree does not always imply bridge-like behavior; betweenness "
|
| 163 |
+
"emphasizes control over shortest paths across communities."
|
| 164 |
+
),
|
| 165 |
+
frozenset(("Closeness", "Betweenness")): (
|
| 166 |
+
"Closeness rewards overall proximity, while betweenness rewards "
|
| 167 |
+
"being on critical routes between other nodes."
|
| 168 |
+
),
|
| 169 |
+
}
|
| 170 |
+
print(f"Interpretation: {explanations[frozenset(lowest_pair)]}")
|
| 171 |
+
print(
|
| 172 |
+
"Correlation quality note: values closer to 1 indicate stronger agreement "
|
| 173 |
+
"between ranking-based notions of node importance."
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
metrics = {"Degree": degree, "Closeness": closeness, "Betweenness": betweenness}
|
| 177 |
+
top_nodes_by_metric = {}
|
| 178 |
+
for metric_name, score_map in metrics.items():
|
| 179 |
+
print(f"\nTop 10 Papers for {metric_name} (ID<TAB>Title<TAB>Score):")
|
| 180 |
+
top_10 = sorted(score_map.items(), key=lambda x: x[1], reverse=True)[:10]
|
| 181 |
+
top_nodes_by_metric[metric_name] = [node_id for node_id, _ in top_10]
|
| 182 |
+
for node_id, _ in top_10:
|
| 183 |
+
title = LCC.nodes[node_id].get("title", "Unknown Title")
|
| 184 |
+
print(f"{node_id}\t{title}\t{score_map[node_id]:.6f}")
|
| 185 |
+
|
| 186 |
+
# Identify papers that appear in multiple top-10 lists (robust centrality evidence).
|
| 187 |
+
top_presence = {}
|
| 188 |
+
for metric_name, node_ids in top_nodes_by_metric.items():
|
| 189 |
+
for node_id in node_ids:
|
| 190 |
+
if node_id not in top_presence:
|
| 191 |
+
top_presence[node_id] = []
|
| 192 |
+
top_presence[node_id].append(metric_name)
|
| 193 |
+
|
| 194 |
+
repeated = [
|
| 195 |
+
(node_id, sorted(metric_names))
|
| 196 |
+
for node_id, metric_names in top_presence.items()
|
| 197 |
+
if len(metric_names) >= 2
|
| 198 |
+
]
|
| 199 |
+
repeated.sort(key=lambda x: (-len(x[1]), x[0]))
|
| 200 |
+
|
| 201 |
+
if repeated:
|
| 202 |
+
print("\nPapers repeated across multiple centrality top-10 lists:")
|
| 203 |
+
for node_id, metric_names in repeated:
|
| 204 |
+
title = LCC.nodes[node_id].get("title", "Unknown Title")
|
| 205 |
+
print(f"{node_id}\t{title}\tappears in: {', '.join(metric_names)}")
|
| 206 |
+
else:
|
| 207 |
+
print("\nNo paper appears in more than one top-10 centrality list.")
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
# Part 3: Research Evolution (Optional Extra Credit)
|
| 211 |
+
def research_evolution_analysis(G, num_topics=5):
|
| 212 |
+
print("\n--- Optional: Research Evolution Analysis ---")
|
| 213 |
+
|
| 214 |
+
before_nodes = [n for n, d in G.nodes(data=True) if _safe_int_year(d.get("year")) < 2023]
|
| 215 |
+
after_nodes = [n for n, d in G.nodes(data=True) if _safe_int_year(d.get("year")) >= 2023]
|
| 216 |
+
|
| 217 |
+
before_docs = []
|
| 218 |
+
for n in before_nodes:
|
| 219 |
+
title = G.nodes[n].get("title", "")
|
| 220 |
+
abstract = G.nodes[n].get("abstract", "")
|
| 221 |
+
text = f"{title} {abstract}".strip()
|
| 222 |
+
tokens = _tokenize(text) if text else []
|
| 223 |
+
if tokens:
|
| 224 |
+
before_docs.append(tokens)
|
| 225 |
+
|
| 226 |
+
after_docs = []
|
| 227 |
+
for n in after_nodes:
|
| 228 |
+
title = G.nodes[n].get("title", "")
|
| 229 |
+
abstract = G.nodes[n].get("abstract", "")
|
| 230 |
+
text = f"{title} {abstract}".strip()
|
| 231 |
+
tokens = _tokenize(text) if text else []
|
| 232 |
+
if tokens:
|
| 233 |
+
after_docs.append(tokens)
|
| 234 |
+
|
| 235 |
+
if not before_docs or not after_docs:
|
| 236 |
+
print("Insufficient tokenized documents before/after 2023 for topic comparison.")
|
| 237 |
+
return
|
| 238 |
+
|
| 239 |
+
# Shared dictionary gives a single global vocabulary n for both matrices.
|
| 240 |
+
dictionary = Dictionary(before_docs + after_docs)
|
| 241 |
+
dictionary.filter_extremes(no_below=2, no_above=0.5, keep_n=5000)
|
| 242 |
+
if len(dictionary) == 0:
|
| 243 |
+
print("Vocabulary became empty after filtering; skipping extra credit analysis.")
|
| 244 |
+
return
|
| 245 |
+
|
| 246 |
+
before_corpus = [dictionary.doc2bow(doc) for doc in before_docs]
|
| 247 |
+
after_corpus = [dictionary.doc2bow(doc) for doc in after_docs]
|
| 248 |
+
before_corpus = [bow for bow in before_corpus if bow]
|
| 249 |
+
after_corpus = [bow for bow in after_corpus if bow]
|
| 250 |
+
|
| 251 |
+
if not before_corpus or not after_corpus:
|
| 252 |
+
print("Insufficient BOW documents after vocabulary filtering.")
|
| 253 |
+
return
|
| 254 |
+
|
| 255 |
+
lda_before = LdaModel(
|
| 256 |
+
corpus=before_corpus, id2word=dictionary, num_topics=num_topics, passes=10, random_state=42
|
| 257 |
+
)
|
| 258 |
+
lda_after = LdaModel(
|
| 259 |
+
corpus=after_corpus, id2word=dictionary, num_topics=num_topics, passes=10, random_state=42
|
| 260 |
+
)
|
| 261 |
+
|
| 262 |
+
# D and S correspond to topic-term probability matrices with shared vocabulary.
|
| 263 |
+
D = lda_before.get_topics() # shape: (k1, n)
|
| 264 |
+
S = lda_after.get_topics() # shape: (k2, n)
|
| 265 |
+
print(f"D matrix shape (before): {D.shape}")
|
| 266 |
+
print(f"S matrix shape (after): {S.shape}")
|
| 267 |
+
|
| 268 |
+
def cosine_similarity(a, b):
|
| 269 |
+
denom = np.linalg.norm(a) * np.linalg.norm(b)
|
| 270 |
+
if denom == 0:
|
| 271 |
+
return 0.0
|
| 272 |
+
return float(np.dot(a, b) / denom)
|
| 273 |
+
|
| 274 |
+
before_shift = []
|
| 275 |
+
for i in range(D.shape[0]):
|
| 276 |
+
sims = [cosine_similarity(D[i], S[j]) for j in range(S.shape[0])]
|
| 277 |
+
before_shift.append((i, 1.0 - max(sims) if sims else 1.0))
|
| 278 |
+
|
| 279 |
+
after_shift = []
|
| 280 |
+
for j in range(S.shape[0]):
|
| 281 |
+
sims = [cosine_similarity(S[j], D[i]) for i in range(D.shape[0])]
|
| 282 |
+
after_shift.append((j, 1.0 - max(sims) if sims else 1.0))
|
| 283 |
+
|
| 284 |
+
before_shift.sort(key=lambda x: x[1], reverse=True)
|
| 285 |
+
after_shift.sort(key=lambda x: x[1], reverse=True)
|
| 286 |
+
|
| 287 |
+
def top_words(topic_vec, topn=8):
|
| 288 |
+
idx = np.argsort(topic_vec)[::-1][:topn]
|
| 289 |
+
return ", ".join(dictionary[i] for i in idx)
|
| 290 |
+
|
| 291 |
+
print("\nPotentially disappearing themes (before topics with largest shift):")
|
| 292 |
+
for topic_id, shift_score in before_shift:
|
| 293 |
+
print(f"Before Topic {topic_id} | shift={shift_score:.4f} | {top_words(D[topic_id])}")
|
| 294 |
+
|
| 295 |
+
print("\nPotentially emerging themes (after topics with largest shift):")
|
| 296 |
+
for topic_id, shift_score in after_shift:
|
| 297 |
+
print(f"After Topic {topic_id} | shift={shift_score:.4f} | {top_words(S[topic_id])}")
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
def main():
|
| 301 |
+
try:
|
| 302 |
+
G = nx.read_graphml("aclbib.graphml")
|
| 303 |
+
except Exception as e:
|
| 304 |
+
print(f"Error loading graph file: {e}")
|
| 305 |
+
return
|
| 306 |
+
|
| 307 |
+
LCC_nodes = max(nx.connected_components(G), key=len)
|
| 308 |
+
LCC = G.subgraph(LCC_nodes).copy()
|
| 309 |
+
print(
|
| 310 |
+
f"Network loaded. LCC contains {len(LCC.nodes())} nodes and "
|
| 311 |
+
f"{len(LCC.edges())} edges."
|
| 312 |
+
)
|
| 313 |
+
|
| 314 |
+
weaktie_analysis(LCC)
|
| 315 |
+
centrality_analysis(LCC)
|
| 316 |
+
research_evolution_analysis(G)
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
if __name__ == "__main__":
|
| 320 |
+
main()
|
assignment_sc_2/rubric_points_explanation.md
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Part 2 Rubric Explanation
|
| 2 |
+
## 1) Weak/strong ties and LCC change during removal
|
| 3 |
+
|
| 4 |
+
Tie strength is defined by edge `weight` in the LCC.
|
| 5 |
+
|
| 6 |
+
- Weak ties: `weight <= median`
|
| 7 |
+
- Strong ties: `weight > median`
|
| 8 |
+
|
| 9 |
+
From the run output:
|
| 10 |
+
I run two removal orders on the LCC:
|
| 11 |
+
1. weakest to strongest
|
| 12 |
+
2. strongest to weakest
|
| 13 |
+
|
| 14 |
+
After each edge removal, the LCC is recomputed and recorded (fraction removed vs. LCC size). This directly satisfies the rubric requirement to compare structural robustness under weak-first and strong-first deletions.
|
| 15 |
+
Edges are removed one by one. After every removal, the LCC is recalculated and its size is stored as node count. The x-axis is fraction of ties removed, and the y-axis is LCC size.
|
| 16 |
+
## 2) Centrality, top papers, and correlation analysis
|
| 17 |
+
From the run output, the starting LCC is:
|
| 18 |
+
Centrality is computed on the LCC using:
|
| 19 |
+
- `1662` nodes
|
| 20 |
+
- `26134` edges
|
| 21 |
+
|
| 22 |
+
The code also prints exact weak/strong tie statistics:
|
| 23 |
+
|
| 24 |
+
- total number of ties in the LCC: `26134`
|
| 25 |
+
- weak-tie threshold (median weight): `0.6276`
|
| 26 |
+
- number of weak ties (`weight <= 0.6276`): `13067`
|
| 27 |
+
- number of strong ties (`weight > 0.6276`): `13067`
|
| 28 |
+
|
| 29 |
+
So both tie classification and total weak/strong counts are explicitly reported before the stepwise removal process.
|
| 30 |
+
|
| 31 |
+
## Centrality, central papers, interpretation, correlation
|
| 32 |
+
|
| 33 |
+
Three centrality measures are computed on the LCC:
|
| 34 |
+
- Degree
|
| 35 |
+
- Closeness
|
| 36 |
+
- Betweenness
|
| 37 |
+
|
| 38 |
+
For each metric, top-10 papers are printed in `ID<TAB>Title` format. Correlation between ranking vectors is:
|
| 39 |
+
For each one, top-10 papers are listed in `ID<TAB>Title` format.
|
| 40 |
+
|
| 41 |
+
For correlation, I first convert centrality scores to ranking vectors and then compute Pearson correlation between rankings.
|
| 42 |
+
|
| 43 |
+
Results from the run:
|
| 44 |
+
| Metric | Degree | Closeness | Betweenness |
|
| 45 |
+
|---|---:|---:|---:|
|
| 46 |
+
| Degree | 1.0000 | 0.9361 | 0.8114 |
|
| 47 |
+
| Closeness | 0.9361 | 1.0000 | 0.7684 |
|
| 48 |
+
| Betweenness | 0.8114 | 0.7684 | 1.0000 |
|
| 49 |
+
|
| 50 |
+
Lowest-correlation pair: **Closeness vs Betweenness (`0.7684`)**.
|
| 51 |
+
- Degree vs Closeness: `0.9361`
|
| 52 |
+
- Degree vs Betweenness: `0.8114`
|
| 53 |
+
- Closeness vs Betweenness: `0.7684` (lowest)
|
| 54 |
+
Interpretation: closeness captures global proximity, while betweenness captures bridge roles on shortest paths. A node can be globally near many others without being a major bridge, so these rankings diverge more than the other pairs.
|
| 55 |
+
The output explicitly reports the lowest-correlation pair.
|
| 56 |
+
Papers repeatedly appearing across top lists (e.g., `ahuja-etal-2023-mega`, `ding-etal-2020-discriminatively`, `qin-etal-2023-chatgpt`) indicate robust influence across multiple centrality notions.
|
| 57 |
+
Lowest pair interpretation:
|
| 58 |
+
## 3) Optional extra credit: theme shift before vs after 2023
|
| 59 |
+
- closeness measures overall proximity in the graph
|
| 60 |
+
- betweenness measures bridge role on shortest paths
|
| 61 |
+
- these are related but different structural roles, so their rankings are less aligned
|
| 62 |
+
I split papers into two periods (before 2023, and 2023+), build text from title+abstract, use one shared vocabulary, train LDA for both periods, and compare topic vectors by cosine similarity.
|
| 63 |
+
Repeatedly central papers across top lists include:
|
| 64 |
+
Output evidence:
|
| 65 |
+
- `ahuja-etal-2023-mega`
|
| 66 |
+
- `ding-etal-2020-discriminatively`
|
| 67 |
+
- `shin-etal-2020-autoprompt`
|
| 68 |
+
- `weller-etal-2020-learning`
|
| 69 |
+
- `qin-etal-2023-chatgpt`
|
| 70 |
+
|
| 71 |
+
The code also explicitly prints papers that appear in multiple metric top-10 lists (with metric names), which strengthens the evidence for identifying robustly central papers.
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
## Optional Extra Credit (50%): Theme shift before and after 2023
|
| 75 |
+
|
| 76 |
+
I compare two time groups: before 2023 and 2023+.
|
| 77 |
+
|
| 78 |
+
Steps used:
|
| 79 |
+
|
| 80 |
+
1. split papers by year
|
| 81 |
+
2. create text from title + abstract
|
| 82 |
+
3. tokenize and clean
|
| 83 |
+
4. build one shared vocabulary
|
| 84 |
+
5. train LDA for each period
|
| 85 |
+
6. extract topic-term matrices `D` (before) and `S` (after)
|
| 86 |
+
7. compare topics with cosine similarity and rank by shift score
|
| 87 |
+
|
| 88 |
+
Run evidence:
|
| 89 |
+
|
| 90 |
+
- `D` shape: `(5, 5000)`
|
| 91 |
+
- `S` shape: `(5, 5000)`
|
| 92 |
+
|
| 93 |
+
Examples from output:
|
| 94 |
+
|
| 95 |
+
- emerging: `After Topic 2 | shift=0.1989 | llms, large, data, tasks, knowledge, reasoning, generation, performance`
|
| 96 |
+
- disappearing: `Before Topic 4 | shift=0.1912 | question, knowledge, event, performance, questions, task, graph, can`
|
| 97 |
+
|
| 98 |
+
This indicates a stronger LLM/reasoning focus in the later period.
|
| 99 |
+
|
| 100 |
+
## Results (from current execution)
|
| 101 |
+
|
| 102 |
+
- Network loaded successfully; LCC size is `1662` nodes and `26134` edges.
|
| 103 |
+
- Weak/strong tie section reports:
|
| 104 |
+
- total ties: `26134`
|
| 105 |
+
- median-weight threshold: `0.6276`
|
| 106 |
+
- weak ties: `13067`
|
| 107 |
+
- strong ties: `13067`
|
| 108 |
+
- Centrality ranking correlations:
|
| 109 |
+
- Degree-Closeness: `0.9361`
|
| 110 |
+
- Degree-Betweenness: `0.8114`
|
| 111 |
+
- Closeness-Betweenness: `0.7684`
|
| 112 |
+
- Lowest-correlation pair: Closeness vs Betweenness.
|
| 113 |
+
- Top-10 central papers were produced for all three metrics in `ID<TAB>Title` format.
|
| 114 |
+
- Repeated papers across multiple centrality top-10 lists are explicitly reported.
|
| 115 |
+
- Topic-evolution matrices were produced:
|
| 116 |
+
- `D` (before 2023): `(5, 5000)`
|
| 117 |
+
- `S` (2023+): `(5, 5000)`
|
| 118 |
+
- Highest-shift emerging topic: After Topic 2 (`shift=0.1989`) with keywords around `llms`, `reasoning`, and `generation`.
|
| 119 |
+
- Highest-shift disappearing topic: Before Topic 4 (`shift=0.1912`) with keywords around `question`, `knowledge`, and `graph`.
|
| 120 |
+
- Topic matrices: `D` (before) = `(5, 5000)`, `S` (2023+) = `(5, 5000)`
|
| 121 |
+
## Findings
|
| 122 |
+
Conclusion: post-2023 topics shift toward LLM- and reasoning-centered themes, while earlier topics are more question/knowledge/graph-oriented.
|
| 123 |
+
- The centrality rankings are strongly related overall, but not identical.
|
| 124 |
+
- Degree and closeness are most aligned (`0.9361`), indicating that papers with strong local connectivity are often globally well-positioned.
|
| 125 |
+
- Closeness and betweenness are least aligned (`0.7684`), showing that global proximity and bridge-role influence capture different node functions.
|
| 126 |
+
- Repeated appearance of papers such as `ahuja-etal-2023-mega`, `ding-etal-2020-discriminatively`, and `qin-etal-2023-chatgpt` across multiple lists suggests robust influence across different centrality definitions.
|
| 127 |
+
- Topic-shift outputs indicate post-2023 movement toward LLM-oriented and reasoning-heavy themes.
|
| 128 |
+
- Overall, the network remains highly connected at baseline, and the analysis pipeline covers connectivity, influence, and temporal theme evolution in a consistent way.
|
assignment_sc_2/rubric_points_explanation.pdf
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
%PDF-1.4
|
| 2 |
+
%���� ReportLab Generated PDF document (opensource)
|
| 3 |
+
1 0 obj
|
| 4 |
+
<<
|
| 5 |
+
/F1 2 0 R /F2 3 0 R /F3 5 0 R
|
| 6 |
+
>>
|
| 7 |
+
endobj
|
| 8 |
+
2 0 obj
|
| 9 |
+
<<
|
| 10 |
+
/BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font
|
| 11 |
+
>>
|
| 12 |
+
endobj
|
| 13 |
+
3 0 obj
|
| 14 |
+
<<
|
| 15 |
+
/BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding /Name /F2 /Subtype /Type1 /Type /Font
|
| 16 |
+
>>
|
| 17 |
+
endobj
|
| 18 |
+
4 0 obj
|
| 19 |
+
<<
|
| 20 |
+
/Contents 12 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 11 0 R /Resources <<
|
| 21 |
+
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
| 22 |
+
>> /Rotate 0 /Trans <<
|
| 23 |
+
|
| 24 |
+
>>
|
| 25 |
+
/Type /Page
|
| 26 |
+
>>
|
| 27 |
+
endobj
|
| 28 |
+
5 0 obj
|
| 29 |
+
<<
|
| 30 |
+
/BaseFont /Courier /Encoding /WinAnsiEncoding /Name /F3 /Subtype /Type1 /Type /Font
|
| 31 |
+
>>
|
| 32 |
+
endobj
|
| 33 |
+
6 0 obj
|
| 34 |
+
<<
|
| 35 |
+
/Contents 13 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 11 0 R /Resources <<
|
| 36 |
+
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
| 37 |
+
>> /Rotate 0 /Trans <<
|
| 38 |
+
|
| 39 |
+
>>
|
| 40 |
+
/Type /Page
|
| 41 |
+
>>
|
| 42 |
+
endobj
|
| 43 |
+
7 0 obj
|
| 44 |
+
<<
|
| 45 |
+
/Contents 14 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 11 0 R /Resources <<
|
| 46 |
+
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
| 47 |
+
>> /Rotate 0 /Trans <<
|
| 48 |
+
|
| 49 |
+
>>
|
| 50 |
+
/Type /Page
|
| 51 |
+
>>
|
| 52 |
+
endobj
|
| 53 |
+
8 0 obj
|
| 54 |
+
<<
|
| 55 |
+
/Contents 15 0 R /MediaBox [ 0 0 595.2756 841.8898 ] /Parent 11 0 R /Resources <<
|
| 56 |
+
/Font 1 0 R /ProcSet [ /PDF /Text /ImageB /ImageC /ImageI ]
|
| 57 |
+
>> /Rotate 0 /Trans <<
|
| 58 |
+
|
| 59 |
+
>>
|
| 60 |
+
/Type /Page
|
| 61 |
+
>>
|
| 62 |
+
endobj
|
| 63 |
+
9 0 obj
|
| 64 |
+
<<
|
| 65 |
+
/PageMode /UseNone /Pages 11 0 R /Type /Catalog
|
| 66 |
+
>>
|
| 67 |
+
endobj
|
| 68 |
+
10 0 obj
|
| 69 |
+
<<
|
| 70 |
+
/Author (\(anonymous\)) /CreationDate (D:20260212152818-08'00') /Creator (\(unspecified\)) /Keywords () /ModDate (D:20260212152818-08'00') /Producer (ReportLab PDF Library - \(opensource\))
|
| 71 |
+
/Subject (\(unspecified\)) /Title (\(anonymous\)) /Trapped /False
|
| 72 |
+
>>
|
| 73 |
+
endobj
|
| 74 |
+
11 0 obj
|
| 75 |
+
<<
|
| 76 |
+
/Count 4 /Kids [ 4 0 R 6 0 R 7 0 R 8 0 R ] /Type /Pages
|
| 77 |
+
>>
|
| 78 |
+
endobj
|
| 79 |
+
12 0 obj
|
| 80 |
+
<<
|
| 81 |
+
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1311
|
| 82 |
+
>>
|
| 83 |
+
stream
|
| 84 |
+
GauHL>ArL\'RoMS300IBA`?rd[6)fS[j-kh%spUrQbJhp^tU-[3mLRafC,t3BM.OAUacGn!ljDNhmn.n!?WE%]]LNUDfb^KR/dP$^&epA,FEt7]D:!,=(K'$$)U90A-C=?SWYPGn9=Z8Dn_P'/)5[q2T;;T\7#AJVDenb]q+m&PmRTfX'J8f_qeg.d#';*WXgf`(ZmqrK7Dn@PgLmA(d5Q_KP,NE)435?eN]!VHu,#]eJ]-7%FAhr?Z0kViSPO"Q!t@Cbr6bJ.DF3H_-*E.f\'e)#0ssTOVmph5^_s5%gTa#'rVQD?If91=euOT'C@_$"BMYT#'^6-5-PAp;O"t_CBe"_f,J9`pSN*68SS>=5:q+3F\]t"d.k8#P,?R\Gd9'q:Y;m>[-+FKbq$^JA(G).E(U=*%973N[)%gfK8Ho:%H.IeL1=(u&Dl_,#79btP["2:)%1OJ.*rt(XbATCMLZT<&>!rClsRq9]L(t%_f6]NOtA\rXugs6BI%@OU0-:)fnd!/B4GW8]FJo%hj]&XiTIAHJ([24!17'm?of(]+EJ&)5Ik9^ZQHZZ30#Qu_K]UcIKA,2J)6'3<.L''-6!M%f>aNDP(%u,5sKfspkUeNr`BAHEj>nm:$`f1^9JoCAo@bcfDl?iS-S8.6:9J%Er_G.e4Ps-Z8?Sm6EOU$\pWm@8JidW$M,ioG<#Q5<bYGEqkL),/9^m9'RoGfbr6tnIEG=]9[tmbZCD\U>BE49AJ)sK@q62''1U&\el6,#H;]-#2A^5,VD-&m*j=fQY/]WsdTjs1&?AQ>m9Q0JXgQj<@;W3_*5kH[X-[LA3e,]^8irp9>-t;I<Q@n1:s/&`dh1\<eR<([1/-S+">3DoB]Hb&9TgYTEHY.l@=/)$nQ7&&6GggB@M^H$@n.A>$99BbaQ1ghP9]*QMafr`H%j5X*jLH-qu"O0do#X)n_>Q,I>c:b/:7-d%;hUO5:%!ReuC;<fXgoS_Oh#`QiPlT#a,Rg9ZEgV&\chi4U>-!pGrbjf0o?TD?0&,C#XY#\cC;p/Cp_$*jdMNm6p?H`#Tuo1]o#qPLP/6huX#aD^Sq;%<4ePVpX-gC8lDdBlYK,pTW+U`;<cS6E#n.;L(QtL9Gi>/hOZ.Di4AD!Db5$(Pb)oC*Btj=g41<#@0-<=hWo+42.'OC+42S1_.rbfOPg*S\+XDXjSf%Pb\Ia]*[QSs%q/_:BM9s<[bRpanjb]>Y)sDa."aq=MdXWkPMe+'1Xh/H]+KUTo1h2g^D2.Pc0(jJir'MP8S"Fn`q!8K*C^DRR8%W;=ZeQ,:>u8iTbn?>l3+~>endstream
|
| 85 |
+
endobj
|
| 86 |
+
13 0 obj
|
| 87 |
+
<<
|
| 88 |
+
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1420
|
| 89 |
+
>>
|
| 90 |
+
stream
|
| 91 |
+
GauHLgMZ%0&:Ml+bY%tL)gb$0'bgh?h$jp'Mf';ZoSY8[D/OJ(8];"_O7/19't$U_7R`gi&JJ*5F,"`r,Qih9RJED=!*;hgGg1cLYhT6M(HsCb_O'B;h&/;u%,F@>\-[d9$l1B#'70-6[FZDiP1]%+X>`SS8sl__'V3/4#:r+'WODsKZ1ER,]2r&)okMLXdok;2B:k&b@YI_Z_o1r#7/R]0nY[#16FB=b/?*l#Ds$-fDDEu`8HqGPj6KN)Tfc5(fl\kHdqf83KbpEIAkk.NZp>iY'aOu0$?P,?gjD"RB"rGKj_n5Vi7Gm(>SRO6>*Q!iN,oJm&PA`c&@)"!kA*gq/HcigicTAE2mD7-.f*e#O+Ns*.?PBN4bBJd.#Bp>[/,tf>I_^IeXCoOKA'a&Dj=%t&ADR&A6fY254Y5G]DXoG,&KFR"="uWVFKHeP\m*>Cd_QD%NsKjdK5OmL59m9$.0q'SLF3jhbV*kJ`jg2"J6;TNft;b%k*f'+3_;L;=VLU00`ciO;KIk;#),si#2FrT5c@mC!bH;)S[`GJg<B7qAqIt3:oWAB9@E^1P;&LhZ>Z8UD'CDHFV!W?kA$s%Ih_+2=NmkoH*E4pDb`W#9sC4(f]krC-G!(Dl`,XW4SBl\T2uJ8.4kP"Ou*(V`*2n9C$-ZPP6F`[.-%p:O2])T8DLVXJkkAe`tqAC<ZZ=LA`M=R1Mb"qp1F/:>IDbRXboC_)'LDO-J*f/Z:2aL=Ga\%pCE=%PB<dVXKS^G,N4VC-WXZ,^:#\7\DNp/=NK_7W<X*p@DUqk9X';B79-3R*&HFU7/Ge=qo>>qLOu:/<L:hS$s,HZQ&Xp(S-(q1mRQVcmsFbCVfToaebH%/;9nT;8@>uTlq1Y_[B5TG]Hj5ek%]mhW,ZUqDBR4c8Q[V,`"`9d:TDT4Z<k4(EdM,hinR?=kar'[*)!'D_'$`I,^3C9$\S.<1_8[leht);(=a7akn1W1/U[rq\sUnOVHMPdH5,)kT1\Z90_kd"[a5Io1?N/J#%.cCW5(O>]3s.2B=147OR+.:$%U<D"a*`H-TtJW+&b+]C3"L[%J5*J6jppT%rjN5VHt'D!>K^-jsM@$&Ek*M=%Pr:^j9;FMt=q1G+Ef'+eYPM;8V!baW7W<,HJqMiD#"Y[jPsZ>^fS/%6j/Zt4AT*MT4<5L=8,+>G.&PuA848/j%[=sQDe(5b'#0$E[UI^h]/MX"rbkA#5=W(K^IX7H_Oms*FLLDdK?XL@u&V3?XYcZ?0CF-d#UW;,LP?d+=Ph+I@AQ<S#l(6"JoB&@'&j0+I4,'iR:7dEDCoAtNW_'ado=%#/iITi:L!]dYVdLgl%@#`)1c+R#d+a")P3%Vj,,=)p2cf<sN@<uGO[WG;HHYW5deCq'C)`hsY^hSZ0Yo`gZiTb(Vk4T5/rWWu8:(@~>endstream
|
| 92 |
+
endobj
|
| 93 |
+
14 0 obj
|
| 94 |
+
<<
|
| 95 |
+
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1480
|
| 96 |
+
>>
|
| 97 |
+
stream
|
| 98 |
+
GauI7?#T!f&;KZN/*<#_dPaie+RFm5O0d8,S0RDMdbaW99a:*M`&Y8)b%cG(NY<g_:V/>I77]!5j7+o]IV3I$*kb\S2ou)<VJu2I"6RLC"t[Cq@6/s8e$Et*Cbmj;0;I0\)%<:V-&\OcIRdalp"*r-7Ph988a5D`1SNDoBW(1P)_<jG)Un4:9`K_/&(\Z@oQn^)?t\Wh!PrZhHcpncFhV-eO1e`7cG7MJ_n]]R]$,hYP*`+(4K`+P>WEA%j+.&.+P%8U;902u_.Nj+-$(cWA-bC$J(-sX`3du^nSrY:)r85Z_s!%a@Nk_?i'duEL:hK@)'4PH[c._-i]L6c90j=!HGL>nn0A\H=Cka>*Ol<oE=H*^#+`Z-\0ta*G7?@-!-tjMg0d7E%TGr57FXs&8h7W2Bb5GY>@Z$T/S-jP(g2aW_P#$pk=1:22;H#8mh(Bl+3s35]Vc!^7Y;c-*MAJCP@5t`G<M(68ZCB.CKLnLJ4+-\F&q@R;H9*,9rrTEZkBAT@V.kQj9l350DPS$0o4kU!Q2Dlg=N>)HkIkD#s+<]6.E?+0KptD%rBl:aF,"_Aac2?QkIH(Q\.I14O8)jb^k6F-GM([I5`K2C^DA&8]i^s3p,4qB:bG-#tA3iAS'$,394C*C0OQuggcIfTYgGeO,[>M1Ij;=`I&*G<YStWjb.[8T11G^nS$oZ9HUSlf6ncER:@=:DKm63f1UFu/EA:ue39cO8'?L]lba\M<87a^oXE$4;6:ufc8k9-#M4.MhEsEGhCUd*SkFe"AP$`QmCf"#`baEH^2b=%hY^EJY+;id/ptOQoO6&trKi(]k;MX=EiA!ZU(<)RCDF:WGL35D6"fk^/qaF+U`uL]<s&\BDe1Z$gmUo#?bKtUFWa7e#D-)$jcrQSVAHdZk6ouh57!dGn(.r>ea^)e#Bc:L:!-Hd5uK.#5luGRUp=u[&[K2j&[K1kI^j^mgGhm?^S"_$[=h6EP^6\B-.GNN=d<RaT3u\d9+;)Jq@mKV76VD@[:,q%_iIn2Jk^+L&4:s>nNPU7EEF"VM0sU!A1jG&W2IMN7\:BtPfdZpdbmVCi/>1dfLWn99;]a(g`1j]'**_lJT=Ef0.p9RjpO3W//7B'bgJ5meG_.B_"1?PkBFmL;*Jm7`<%\23q/Y)A,OUjZeJ[O7-k&6_)a*d/X6X]1OJDm-SAW_kbt=3X.(4J`F(&i%+\Q9mVo<?05`SFWO:4/m]8C(QikqHKS+C_NXaSL'g9KE9/^8'5n_iW1kTr%N0dB>(N((L>i8@"+O'/_h'R/2NUk'F(5%bX222o^Gg`f$ma.+RH[dmI8E,%:LP:r;()QToBlms62nhU>G*GUr<C4Bf^"24H<nBQh*:5=[h.6P7="gZLF7X<rIFGk)Fn</XN35r"`kbO"#oj\nEmu83nJ+]Df:Hkc`"OFG*HLH2"eG1m*jeOV)P:V.;p0*_e*sol6pfa4Dd#=%mQ:P`rrT.HoW8~>endstream
|
| 99 |
+
endobj
|
| 100 |
+
15 0 obj
|
| 101 |
+
<<
|
| 102 |
+
/Filter [ /ASCII85Decode /FlateDecode ] /Length 1219
|
| 103 |
+
>>
|
| 104 |
+
stream
|
| 105 |
+
GauI69lK&M&;KZL'm"J#b8'-B-pS=!DAPI3=D[;u0=3R!\=lHIV!Rs8G:;.M\naBbMZs36J/WJ'5/VZf0F6ou28>=fi55nNqS[nQYf_L@-8Sae_/HBdQTKAH>XXcE*5r>PMtr%FMjWY_?()NeP>(BH=I4o>8f4\`Ed81rYH[l>LOSLLGR2XVGe('4bcfT.&)6G7pLpBROT`2EZ?APb.(M,X5mob2#aNeOSRD<hY75k>.s(Co11u$'hlCNMSMVDZ_<<,F7iTZ/Bo9c@]"WZiJb^/F@2Q09#H3R;6!\T5]"^dpQhBC.FY.'GUFrtl/?GK))e&*l]!C3^?D)Fl/O\_$9/O1U3B2M'+5rR1Tn2>3'Bf%efuU+KrAg[<%rj\:^5&7u"[(goVbLoW)dTKYUb$@%0a<9cC.4%&#ImBe_F?O81u(#\qfOZEm!SU=A:$oI'A6>e?<,'@O7%e3?Wt+K1O*&E*UcWq#s)\YNom[r4,I[34k,Q4?5:BBp71&-!.-#BZ([I$2&NY(.9Yd\Ti/a<?f0s-Nj!A1^5W)9>Nt<qj5!NS,cI6l;Rek,Y+E=E.PHBr7]TeB-q`n]"hg`BO`Cl"g4@Z^6qUBH_W?;gc>UEZhU!hFbI>N-c12HfK@l^C6u#A4?s=&9[LNR=XpDkK6c)DW?*IgWN5;-QK\XJS<l#eU]L$,_SnU3:e-$*kXV<*WC^G=;/O#.@UJm7I]ck=@\soN).W'n./kh$#B(+[LF*=n9s*CD3N&TsDLoR4$ad^Ub?7m:2-d9$G]8gmSb!0;5(8X7/c+6B+lNB[Mm-k@IaVn/G8"E_aVJN6Q14&jHIuI<ZXB(2@C^4@>/Z(I9Sq_kPpQKuWGoBu/r$RZJqeg8qSpT-O4!$ps-O"HU?IW>-J<%gg^FFgJF+1K2msNrh0VEb;0a(V+n_U"qT=[3mK^(.T:j)>dWgO\ujXk946m\hP_E,Gc4@fF,d@6S>I]C[:r`#DO1Nf*`b_-U]M`&,S=MrCmTAu5mH$_X=g/U!r3>`(9bK4oGS7FlhH5.#%Vg$plnFZ?CTT,TV?Y):ih7G!^ODjmC*>C^D:O)3]oENM&-b70nooAJ@I6LssU^.YpL,RAR[cBbX.-Pe@7X6[eO2A_f>EhH_qqghK\DhOIKnW,ubpSrFX*-H!W,Kc;dpYA?J7YQk$S>79r=:1gE1`S"G.odp^)t.;)DA9Y(8gW/(BA[Z!.BF"hu~>endstream
|
| 106 |
+
endobj
|
| 107 |
+
xref
|
| 108 |
+
0 16
|
| 109 |
+
0000000000 65535 f
|
| 110 |
+
0000000061 00000 n
|
| 111 |
+
0000000112 00000 n
|
| 112 |
+
0000000219 00000 n
|
| 113 |
+
0000000331 00000 n
|
| 114 |
+
0000000536 00000 n
|
| 115 |
+
0000000641 00000 n
|
| 116 |
+
0000000846 00000 n
|
| 117 |
+
0000001051 00000 n
|
| 118 |
+
0000001256 00000 n
|
| 119 |
+
0000001325 00000 n
|
| 120 |
+
0000001606 00000 n
|
| 121 |
+
0000001684 00000 n
|
| 122 |
+
0000003087 00000 n
|
| 123 |
+
0000004599 00000 n
|
| 124 |
+
0000006171 00000 n
|
| 125 |
+
trailer
|
| 126 |
+
<<
|
| 127 |
+
/ID
|
| 128 |
+
[<7b8e9ff53cd6975d4b04d04316e200e7><7b8e9ff53cd6975d4b04d04316e200e7>]
|
| 129 |
+
% ReportLab generated PDF document -- digest (opensource)
|
| 130 |
+
|
| 131 |
+
/Info 10 0 R
|
| 132 |
+
/Root 9 0 R
|
| 133 |
+
/Size 16
|
| 134 |
+
>>
|
| 135 |
+
startxref
|
| 136 |
+
7482
|
| 137 |
+
%%EOF
|
code/attribution_eval.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def return_prompts_attribution(reference_full_text, generated_summary, subclaims_json, difficulty_level):
|
| 2 |
+
return f'''
|
| 3 |
+
### **SYSTEM / ROLE INSTRUCTION**
|
| 4 |
+
|
| 5 |
+
You are a **medical factuality and attribution evaluator**.
|
| 6 |
+
You will assess whether **unsupported subclaims** in a generated summary (those with `"result": 0"`) are *reasonable additions* based on the readability level (*easy / intermediate / hard*).
|
| 7 |
+
|
| 8 |
+
The goal is to determine whether these **extra pieces of information** are acceptable simplifications or *hallucinations* that reduce factual faithfulness.
|
| 9 |
+
|
| 10 |
+
---
|
| 11 |
+
|
| 12 |
+
### **READABILITY & ATTRIBUTION GUIDELINES**
|
| 13 |
+
|
| 14 |
+
| Level | Audience | Content Goal | Allowable Additions |
|
| 15 |
+
| :--------------- | :------------------------------- | :--------------------------------------------------------------------- | :--------------------------------------------------------------------------------- |
|
| 16 |
+
| **Easy** | General public | Simplify and clarify events | Allow general background info or lay explanations, but not new facts or diagnoses. |
|
| 17 |
+
| **Intermediate** | Educated layperson / med student | Add brief clarifications or causal context if consistent with the text | Allow inferred, non-contradictory context; avoid adding unconfirmed data. |
|
| 18 |
+
| **Hard** | Medical professional | Maintain factual precision | No additions; everything must be supported by source text. |
|
| 19 |
+
|
| 20 |
+
---
|
| 21 |
+
|
| 22 |
+
### **INPUT FIELDS**
|
| 23 |
+
|
| 24 |
+
**Reference full text:**
|
| 25 |
+
{reference_full_text}
|
| 26 |
+
|
| 27 |
+
**Generated summary ({difficulty_level}):**
|
| 28 |
+
{generated_summary}
|
| 29 |
+
|
| 30 |
+
**Subclaims and results:**
|
| 31 |
+
{subclaims_json}
|
| 32 |
+
|
| 33 |
+
---
|
| 34 |
+
|
| 35 |
+
### **TASK INSTRUCTIONS**
|
| 36 |
+
|
| 37 |
+
1. Focus only on subclaims with `"result": 0"` (not supported by the input text).
|
| 38 |
+
2. For each unsupported subclaim:
|
| 39 |
+
|
| 40 |
+
* Judge whether adding it is **reasonable** for the given readability level.
|
| 41 |
+
* Choose one of: `"reasonable addition"`, `"unnecessary but harmless"`, `"misleading / hallucinated"`.
|
| 42 |
+
* Provide a **1–2 sentence justification** explaining your reasoning.
|
| 43 |
+
3. After all evaluations, assign a **numerical attribution score (0–5)**:
|
| 44 |
+
|
| 45 |
+
* **5** = All additions are reasonable or harmless simplifications.
|
| 46 |
+
* **4** = Mostly reasonable; minor harmless additions.
|
| 47 |
+
* **3** = Some misleading or unjustified additions.
|
| 48 |
+
* **2** = Many factual inaccuracies.
|
| 49 |
+
* **1** = Serious hallucinations; distorts source meaning.
|
| 50 |
+
* **0** = Highly unfaithful; mostly invented content.
|
| 51 |
+
4. End with an **overall explanation (3–5 sentences)** summarizing your reasoning and suggestions.
|
| 52 |
+
|
| 53 |
+
---
|
| 54 |
+
|
| 55 |
+
### **OUTPUT FORMAT (strict JSON)**
|
| 56 |
+
|
| 57 |
+
```json
|
| 58 |
+
{{
|
| 59 |
+
"evaluation_table": [
|
| 60 |
+
{{
|
| 61 |
+
"id": <subclaim_id>,
|
| 62 |
+
"subclaim": "<text>",
|
| 63 |
+
"evaluation": "<reasonable addition | unnecessary but harmless | misleading / hallucinated>",
|
| 64 |
+
"explanation": "<short justification>"
|
| 65 |
+
}}
|
| 66 |
+
],
|
| 67 |
+
"attribution_score": <0-5>,
|
| 68 |
+
"overall_explanation": "<concise summary of your judgment>"
|
| 69 |
+
}}
|
| 70 |
+
```
|
| 71 |
+
'''
|
| 72 |
+
from openai import OpenAI
|
| 73 |
+
import json
|
| 74 |
+
file_path = "/home/mshahidul/api_new.json"
|
| 75 |
+
with open(file_path, "r") as file:
|
| 76 |
+
api_keys = json.load(file)
|
| 77 |
+
|
| 78 |
+
openai_api_key = api_keys.get("openai")
|
| 79 |
+
|
| 80 |
+
client = OpenAI(api_key=openai_api_key)
|
| 81 |
+
def openai_return(prompt):
|
| 82 |
+
response = client.chat.completions.create(
|
| 83 |
+
model="gpt-5-mini",
|
| 84 |
+
messages=[
|
| 85 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
| 86 |
+
{"role": "user", "content": prompt}
|
| 87 |
+
]
|
| 88 |
+
)
|
| 89 |
+
cleaned_response = response.choices[0].message.content.strip().replace("```json", "").replace("```", "")
|
| 90 |
+
return json.loads(cleaned_response)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
import json
|
| 94 |
+
file_path = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json"
|
| 95 |
+
|
| 96 |
+
with open(file_path, 'r') as f:
|
| 97 |
+
synthetic_data = json.load(f)
|
| 98 |
+
|
| 99 |
+
file_path_qwen3_32B = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json"
|
| 100 |
+
|
| 101 |
+
with open(file_path_qwen3_32B, 'r') as f:
|
| 102 |
+
qwen3_32B_results = json.load(f)
|
| 103 |
+
|
| 104 |
+
# dict_keys(['id', 'full_text', 'ref_summary', 'readability_versions'])
|
| 105 |
+
# print(f"Full text: {synthetic_data[0]['full_text']}")
|
| 106 |
+
import os
|
| 107 |
+
|
| 108 |
+
res=[]
|
| 109 |
+
temp=""
|
| 110 |
+
save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/resonability_check_100_gpt5_attribution.json"
|
| 111 |
+
if os.path.exists(save_path):
|
| 112 |
+
with open(save_path, 'r') as f:
|
| 113 |
+
res = json.load(f)
|
| 114 |
+
print(f"Resuming from {len(res)} entries")
|
| 115 |
+
existing_check=set((entry['id'], entry['difficulty_level']) for entry in res)
|
| 116 |
+
import tqdm
|
| 117 |
+
for ind in tqdm.tqdm(range(len(res),100)):
|
| 118 |
+
for version in ["easy", "intermediate", "hard"]:
|
| 119 |
+
if (synthetic_data[ind]['id'], version) in existing_check:
|
| 120 |
+
print(f"Skipping {synthetic_data[ind]['id']}, {version}")
|
| 121 |
+
continue
|
| 122 |
+
ref_full_text_summary = (f"{synthetic_data[ind]['full_text']}")
|
| 123 |
+
generated_summary = (f"{synthetic_data[ind]['readability_versions'][version]['text']}")
|
| 124 |
+
subclaims_results = (f"{qwen3_32B_results[ind]['attribution']['results']}")
|
| 125 |
+
prompt = return_prompts_attribution(ref_full_text_summary, generated_summary, subclaims_results, version)
|
| 126 |
+
try:
|
| 127 |
+
ans=openai_return(prompt)
|
| 128 |
+
res.append({
|
| 129 |
+
"id": synthetic_data[ind]['id'],
|
| 130 |
+
"difficulty_level": version,
|
| 131 |
+
"response": ans
|
| 132 |
+
})
|
| 133 |
+
|
| 134 |
+
if len(res)%2==0:
|
| 135 |
+
print(f"Completed {len(res)} out of 300")
|
| 136 |
+
with open(save_path, 'w') as outfile:
|
| 137 |
+
json.dump(res, outfile, indent=2)
|
| 138 |
+
except Exception as e:
|
| 139 |
+
print(f"Error at index {ind}, version {version}: {e}")
|
| 140 |
+
|
| 141 |
+
with open(save_path, 'w') as outfile:
|
| 142 |
+
json.dump(res, outfile, indent=2)
|
code/attribution_evalV2.py
ADDED
|
@@ -0,0 +1,222 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
| 3 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "4"
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
import torch
|
| 7 |
+
from unsloth import FastLanguageModel
|
| 8 |
+
import tqdm
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
_model_cache = {"model": None, "tokenizer": None}
|
| 12 |
+
|
| 13 |
+
def load_finetuned_model(model_path: str):
|
| 14 |
+
"""Load and cache the fine-tuned model + tokenizer."""
|
| 15 |
+
if _model_cache["model"] is not None:
|
| 16 |
+
return _model_cache["model"], _model_cache["tokenizer"]
|
| 17 |
+
|
| 18 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 19 |
+
model_name=model_path,
|
| 20 |
+
max_seq_length=8192,
|
| 21 |
+
load_in_4bit=False,
|
| 22 |
+
load_in_8bit=False,
|
| 23 |
+
full_finetuning=False,
|
| 24 |
+
)
|
| 25 |
+
_model_cache["model"], _model_cache["tokenizer"] = model, tokenizer
|
| 26 |
+
return model, tokenizer
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def build_inference_prompt(
|
| 30 |
+
reference_full_text,
|
| 31 |
+
generated_summary,
|
| 32 |
+
subclaim_id,
|
| 33 |
+
subclaim_text,
|
| 34 |
+
subclaim_result,
|
| 35 |
+
difficulty_level
|
| 36 |
+
):
|
| 37 |
+
"""
|
| 38 |
+
Build a standardized inference prompt for single‑subclaim evaluation.
|
| 39 |
+
Use after fine‑tuning to assess new examples consistently.
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
inference_prompt = f"""
|
| 43 |
+
### **SYSTEM / ROLE INSTRUCTION**
|
| 44 |
+
|
| 45 |
+
You are a **medical factuality and attribution evaluator**.
|
| 46 |
+
You will analyze one subclaim from a generated medical summary.
|
| 47 |
+
|
| 48 |
+
Each subclaim includes a `"result"` flag:
|
| 49 |
+
- `1` → Supported by the reference text (no reasonableness check required)
|
| 50 |
+
- `0` → Unsupported by the reference text (evaluate scope and validity)
|
| 51 |
+
|
| 52 |
+
Your task is to decide, for unsupported subclaims, whether the new information
|
| 53 |
+
is a *reasonable addition* given the specified readability level:
|
| 54 |
+
**easy**, **intermediate**, or **hard**.
|
| 55 |
+
|
| 56 |
+
---
|
| 57 |
+
|
| 58 |
+
### **READABILITY GUIDELINES**
|
| 59 |
+
|
| 60 |
+
| Level | Audience | Style | Allowable Additions |
|
| 61 |
+
| :-- | :-- | :-- | :-- |
|
| 62 |
+
| **Easy (FH 70–100)** | General public | Simple, concrete | Broad clarifications only; no factual innovations |
|
| 63 |
+
| **Intermediate (FH 50–69)** | Educated nonspecialist | Moderate precision | Limited clarifications consistent with the text |
|
| 64 |
+
| **Hard (FH 0–49)** | Professionals | Formal, technical | Must be strictly supported by evidence |
|
| 65 |
+
|
| 66 |
+
---
|
| 67 |
+
|
| 68 |
+
### **INPUT**
|
| 69 |
+
|
| 70 |
+
Readability Level: {difficulty_level}
|
| 71 |
+
|
| 72 |
+
Reference Full Text:
|
| 73 |
+
{reference_full_text}
|
| 74 |
+
|
| 75 |
+
Generated Summary:
|
| 76 |
+
{generated_summary}
|
| 77 |
+
|
| 78 |
+
Subclaim Info:
|
| 79 |
+
{{
|
| 80 |
+
"subclaim_id": {subclaim_id},
|
| 81 |
+
"subclaim": "{subclaim_text}",
|
| 82 |
+
"result": {subclaim_result}
|
| 83 |
+
}}
|
| 84 |
+
|
| 85 |
+
---
|
| 86 |
+
|
| 87 |
+
### **TASK INSTRUCTIONS**
|
| 88 |
+
|
| 89 |
+
- If `"result": 1"`, respond with `"not_applicable"` and justify briefly
|
| 90 |
+
(e.g., *"supported, no evaluation required"*).
|
| 91 |
+
- If `"result": 0"`, classify reasonableness:
|
| 92 |
+
- `"reasonable"` → legitimate simplification consistent with the readability level
|
| 93 |
+
- `"partially_reasonable"` → benign rephrasing
|
| 94 |
+
- `"unreasonable"` → misleading, speculative, or contradicted by the source
|
| 95 |
+
|
| 96 |
+
Provide a **short 1–2 sentence justification**.
|
| 97 |
+
|
| 98 |
+
---
|
| 99 |
+
|
| 100 |
+
### **EXPECTED OUTPUT (JSON ONLY)**
|
| 101 |
+
|
| 102 |
+
```json
|
| 103 |
+
{{
|
| 104 |
+
"evaluation": {{
|
| 105 |
+
"subclaim_id": {subclaim_id},
|
| 106 |
+
"subclaim": "{subclaim_text}",
|
| 107 |
+
"result": {subclaim_result},
|
| 108 |
+
"reasonableness": "<reasonable | partially_reasonable | unreasonable | not_applicable>",
|
| 109 |
+
"justification": "<brief justification>"
|
| 110 |
+
}}
|
| 111 |
+
}}
|
| 112 |
+
""".strip()
|
| 113 |
+
|
| 114 |
+
return inference_prompt
|
| 115 |
+
def infer_attribution_reasonableness(prompt: str, model_path: str):
|
| 116 |
+
"""Run inference using the fine-tuned model with attribution prompt."""
|
| 117 |
+
model, tokenizer = load_finetuned_model(model_path)
|
| 118 |
+
|
| 119 |
+
messages = [{"role": "user", "content": prompt + "\n"}]
|
| 120 |
+
|
| 121 |
+
chat_text = tokenizer.apply_chat_template(
|
| 122 |
+
messages,
|
| 123 |
+
tokenize=False,
|
| 124 |
+
add_generation_prompt=True,
|
| 125 |
+
enable_thinking=False,
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
inputs = tokenizer(chat_text, return_tensors="pt").to("cuda")
|
| 129 |
+
|
| 130 |
+
with torch.no_grad():
|
| 131 |
+
output_ids = model.generate(
|
| 132 |
+
**inputs,
|
| 133 |
+
max_new_tokens=150,
|
| 134 |
+
temperature=0.2,
|
| 135 |
+
top_p=0.8,
|
| 136 |
+
top_k=5,
|
| 137 |
+
do_sample=False,
|
| 138 |
+
)
|
| 139 |
+
|
| 140 |
+
output_text = tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
|
| 141 |
+
if "</think>" in output_text:
|
| 142 |
+
output_text = output_text.split("</think>")[-1].strip().replace("```json", "").replace("```", "")
|
| 143 |
+
|
| 144 |
+
try:
|
| 145 |
+
parsed = json.loads(output_text)
|
| 146 |
+
except Exception:
|
| 147 |
+
parsed = output_text
|
| 148 |
+
return parsed
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
file_synth = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json"
|
| 152 |
+
file_qwen_results = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json"
|
| 153 |
+
save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/attribution_resonability_results_100_qwen3-32B_v2.json"
|
| 154 |
+
|
| 155 |
+
with open(file_synth, 'r') as f:
|
| 156 |
+
synthetic_data = json.load(f)
|
| 157 |
+
with open(file_qwen_results, 'r') as f:
|
| 158 |
+
qwen3_32B_results = json.load(f)
|
| 159 |
+
dict1={}
|
| 160 |
+
for item in qwen3_32B_results:
|
| 161 |
+
version=item['version']
|
| 162 |
+
dict1[(item['id'], version)] = item['attribution']['results']
|
| 163 |
+
|
| 164 |
+
res = []
|
| 165 |
+
if os.path.exists(save_path):
|
| 166 |
+
with open(save_path, 'r') as f:
|
| 167 |
+
res = json.load(f)
|
| 168 |
+
print(f"🔁 Resuming from {len(res)} entries")
|
| 169 |
+
|
| 170 |
+
existing = set((e["id"], e["difficulty_level"]) for e in res)
|
| 171 |
+
|
| 172 |
+
for ind in tqdm.tqdm(range(0, 100)):
|
| 173 |
+
entry = synthetic_data[ind]
|
| 174 |
+
|
| 175 |
+
for level in ["easy", "intermediate", "hard"]:
|
| 176 |
+
subclaims_results = dict1[(entry["id"], level)]
|
| 177 |
+
if (entry["id"], level) in existing:
|
| 178 |
+
print(f"⏭️ Skipping {entry['id']} ({level})")
|
| 179 |
+
continue
|
| 180 |
+
|
| 181 |
+
ref_full_text = entry["full_text"]
|
| 182 |
+
generated_summary = entry["readability_versions"][level]["text"]
|
| 183 |
+
temp=[]
|
| 184 |
+
for subclaim in subclaims_results:
|
| 185 |
+
subclaim_id = subclaim['subclaim']['id']
|
| 186 |
+
subclaim_text = subclaim['subclaim']['subclaim']
|
| 187 |
+
subclaim_result = subclaim['result']
|
| 188 |
+
prompt = build_inference_prompt(
|
| 189 |
+
ref_full_text,
|
| 190 |
+
generated_summary,
|
| 191 |
+
subclaim_id,
|
| 192 |
+
subclaim_text,
|
| 193 |
+
subclaim_result,
|
| 194 |
+
level
|
| 195 |
+
)
|
| 196 |
+
if subclaim_result=="1":
|
| 197 |
+
temp.append({
|
| 198 |
+
"subclaim_id": subclaim_id,
|
| 199 |
+
"subclaim_text": subclaim_text,
|
| 200 |
+
"response": "not_applicable"
|
| 201 |
+
})
|
| 202 |
+
continue
|
| 203 |
+
response = infer_attribution_reasonableness(prompt,"/home/mshahidul/readctrl_model/qwen3-32B_subclaims-attribution_resonability_check_8kCtx_v1")
|
| 204 |
+
temp.append({
|
| 205 |
+
"subclaim_id": subclaim_id,
|
| 206 |
+
"subclaim_text": subclaim_text,
|
| 207 |
+
"response": response
|
| 208 |
+
})
|
| 209 |
+
res.append({
|
| 210 |
+
"id": entry["id"],
|
| 211 |
+
"difficulty_level": level,
|
| 212 |
+
"results": temp
|
| 213 |
+
})
|
| 214 |
+
if len(res) % 10 == 0:
|
| 215 |
+
with open(save_path, 'w') as f:
|
| 216 |
+
json.dump(res, f, indent=2, ensure_ascii=False)
|
| 217 |
+
print(f"💾 Saved after {len(res)} entries")
|
| 218 |
+
|
| 219 |
+
with open(save_path, 'w') as f:
|
| 220 |
+
json.dump(res, f, indent=2, ensure_ascii=False)
|
| 221 |
+
|
| 222 |
+
|
code/combine_docid_labels.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
from collections import defaultdict
|
| 6 |
+
from dataclasses import dataclass
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
EXPECTED_LABELS = (
|
| 12 |
+
"low_health_literacy",
|
| 13 |
+
"intermediate_health_literacy",
|
| 14 |
+
"proficient_health_literacy",
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
@dataclass
|
| 19 |
+
class MergeStats:
|
| 20 |
+
total_rows: int = 0
|
| 21 |
+
total_doc_ids: int = 0
|
| 22 |
+
missing_label_rows: int = 0
|
| 23 |
+
unexpected_labels: int = 0
|
| 24 |
+
doc_ids_missing_some_labels: int = 0
|
| 25 |
+
doc_ids_fulltext_mismatch: int = 0
|
| 26 |
+
doc_ids_summary_mismatch: int = 0
|
| 27 |
+
doc_ids_fulltext_subclaims_mismatch: int = 0
|
| 28 |
+
doc_ids_summary_subclaims_mismatch: int = 0
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _pick_first_non_empty(values: List[Optional[str]]) -> Optional[str]:
|
| 32 |
+
for value in values:
|
| 33 |
+
if isinstance(value, str) and value.strip():
|
| 34 |
+
return value
|
| 35 |
+
for value in values:
|
| 36 |
+
if value is not None:
|
| 37 |
+
return value
|
| 38 |
+
return None
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _normalize_text(value: Any) -> Optional[str]:
|
| 42 |
+
if value is None:
|
| 43 |
+
return None
|
| 44 |
+
if not isinstance(value, str):
|
| 45 |
+
return str(value)
|
| 46 |
+
return value
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def _normalize_string_list(value: Any) -> Optional[Tuple[str, ...]]:
|
| 50 |
+
if value is None:
|
| 51 |
+
return None
|
| 52 |
+
if not isinstance(value, list):
|
| 53 |
+
return (str(value),)
|
| 54 |
+
normalized: List[str] = []
|
| 55 |
+
for item in value:
|
| 56 |
+
if item is None:
|
| 57 |
+
continue
|
| 58 |
+
if isinstance(item, str):
|
| 59 |
+
normalized.append(item.strip())
|
| 60 |
+
else:
|
| 61 |
+
normalized.append(str(item).strip())
|
| 62 |
+
return tuple(normalized)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def combine_by_doc_id(rows: List[Dict[str, Any]], keep_all_fields_per_label: bool = True) -> Tuple[List[Dict[str, Any]], MergeStats]:
|
| 66 |
+
stats = MergeStats(total_rows=len(rows))
|
| 67 |
+
|
| 68 |
+
grouped: Dict[int, List[Dict[str, Any]]] = defaultdict(list)
|
| 69 |
+
for row in rows:
|
| 70 |
+
if not isinstance(row, dict):
|
| 71 |
+
continue
|
| 72 |
+
doc_id = row.get("doc_id")
|
| 73 |
+
if doc_id is None:
|
| 74 |
+
continue
|
| 75 |
+
grouped[int(doc_id)].append(row)
|
| 76 |
+
|
| 77 |
+
stats.total_doc_ids = len(grouped)
|
| 78 |
+
|
| 79 |
+
combined: List[Dict[str, Any]] = []
|
| 80 |
+
|
| 81 |
+
for doc_id in sorted(grouped.keys()):
|
| 82 |
+
bucket = grouped[doc_id]
|
| 83 |
+
|
| 84 |
+
labels_map: Dict[str, Dict[str, Any]] = {}
|
| 85 |
+
fulltexts: List[Optional[str]] = []
|
| 86 |
+
summaries: List[Optional[str]] = []
|
| 87 |
+
fulltext_subclaims_sets: List[Optional[Tuple[str, ...]]] = []
|
| 88 |
+
summary_subclaims_sets: List[Optional[Tuple[str, ...]]] = []
|
| 89 |
+
|
| 90 |
+
for row in bucket:
|
| 91 |
+
label = row.get("label")
|
| 92 |
+
if not label:
|
| 93 |
+
stats.missing_label_rows += 1
|
| 94 |
+
continue
|
| 95 |
+
if label not in EXPECTED_LABELS:
|
| 96 |
+
stats.unexpected_labels += 1
|
| 97 |
+
|
| 98 |
+
fulltexts.append(_normalize_text(row.get("fulltext")))
|
| 99 |
+
summaries.append(_normalize_text(row.get("summary")))
|
| 100 |
+
fulltext_subclaims_sets.append(_normalize_string_list(row.get("fulltext_subclaims")))
|
| 101 |
+
summary_subclaims_sets.append(_normalize_string_list(row.get("summary_subclaims")))
|
| 102 |
+
|
| 103 |
+
label_payload: Dict[str, Any]
|
| 104 |
+
if keep_all_fields_per_label:
|
| 105 |
+
# Shared within a doc_id; keep them only once at top-level
|
| 106 |
+
label_payload = {
|
| 107 |
+
k: v
|
| 108 |
+
for k, v in row.items()
|
| 109 |
+
if k
|
| 110 |
+
not in (
|
| 111 |
+
"doc_id",
|
| 112 |
+
"label",
|
| 113 |
+
"fulltext",
|
| 114 |
+
"summary",
|
| 115 |
+
"fulltext_subclaims",
|
| 116 |
+
"summary_subclaims",
|
| 117 |
+
)
|
| 118 |
+
}
|
| 119 |
+
else:
|
| 120 |
+
label_payload = {
|
| 121 |
+
"diff_label_texts": row.get("diff_label_texts"),
|
| 122 |
+
"diff_label_subclaims": row.get("diff_label_subclaims"),
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
labels_map[str(label)] = label_payload
|
| 126 |
+
|
| 127 |
+
chosen_fulltext = _pick_first_non_empty(fulltexts)
|
| 128 |
+
chosen_summary = _pick_first_non_empty(summaries)
|
| 129 |
+
|
| 130 |
+
chosen_fulltext_subclaims: Optional[List[str]] = None
|
| 131 |
+
for items in fulltext_subclaims_sets:
|
| 132 |
+
if items:
|
| 133 |
+
chosen_fulltext_subclaims = list(items)
|
| 134 |
+
break
|
| 135 |
+
chosen_summary_subclaims: Optional[List[str]] = None
|
| 136 |
+
for items in summary_subclaims_sets:
|
| 137 |
+
if items:
|
| 138 |
+
chosen_summary_subclaims = list(items)
|
| 139 |
+
break
|
| 140 |
+
|
| 141 |
+
distinct_fulltexts = {t.strip() for t in fulltexts if isinstance(t, str) and t.strip()}
|
| 142 |
+
distinct_summaries = {t.strip() for t in summaries if isinstance(t, str) and t.strip()}
|
| 143 |
+
if len(distinct_fulltexts) > 1:
|
| 144 |
+
stats.doc_ids_fulltext_mismatch += 1
|
| 145 |
+
if len(distinct_summaries) > 1:
|
| 146 |
+
stats.doc_ids_summary_mismatch += 1
|
| 147 |
+
|
| 148 |
+
distinct_fulltext_subclaims = {t for t in fulltext_subclaims_sets if t}
|
| 149 |
+
distinct_summary_subclaims = {t for t in summary_subclaims_sets if t}
|
| 150 |
+
if len(distinct_fulltext_subclaims) > 1:
|
| 151 |
+
stats.doc_ids_fulltext_subclaims_mismatch += 1
|
| 152 |
+
if len(distinct_summary_subclaims) > 1:
|
| 153 |
+
stats.doc_ids_summary_subclaims_mismatch += 1
|
| 154 |
+
|
| 155 |
+
missing_some = any(lbl not in labels_map for lbl in EXPECTED_LABELS)
|
| 156 |
+
if missing_some:
|
| 157 |
+
stats.doc_ids_missing_some_labels += 1
|
| 158 |
+
|
| 159 |
+
combined.append(
|
| 160 |
+
{
|
| 161 |
+
"doc_id": doc_id,
|
| 162 |
+
"fulltext": chosen_fulltext,
|
| 163 |
+
"fulltext_subclaims": chosen_fulltext_subclaims,
|
| 164 |
+
"summary": chosen_summary,
|
| 165 |
+
"summary_subclaims": chosen_summary_subclaims,
|
| 166 |
+
"labels": labels_map,
|
| 167 |
+
}
|
| 168 |
+
)
|
| 169 |
+
|
| 170 |
+
return combined, stats
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def main() -> None:
|
| 174 |
+
parser = argparse.ArgumentParser(
|
| 175 |
+
description=(
|
| 176 |
+
"Combine per-label rows into a single object per doc_id. "
|
| 177 |
+
"Input is a JSON array with repeated doc_id for different labels."
|
| 178 |
+
)
|
| 179 |
+
)
|
| 180 |
+
parser.add_argument(
|
| 181 |
+
"--input",
|
| 182 |
+
required=True,
|
| 183 |
+
help="Path to input JSON file (list of rows)",
|
| 184 |
+
)
|
| 185 |
+
parser.add_argument(
|
| 186 |
+
"--output",
|
| 187 |
+
default=None,
|
| 188 |
+
help="Path to output JSON file. Default: same folder with *_by_docid.json suffix",
|
| 189 |
+
)
|
| 190 |
+
parser.add_argument(
|
| 191 |
+
"--minimal",
|
| 192 |
+
action="store_true",
|
| 193 |
+
help="Only keep diff_label_texts/diff_label_subclaims/fulltext_subclaims/summary_subclaims per label.",
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
args = parser.parse_args()
|
| 197 |
+
input_path = Path(args.input)
|
| 198 |
+
output_path = Path(args.output) if args.output else input_path.with_name(input_path.stem + "_by_docid.json")
|
| 199 |
+
|
| 200 |
+
rows = json.loads(input_path.read_text(encoding="utf-8"))
|
| 201 |
+
if not isinstance(rows, list):
|
| 202 |
+
raise SystemExit("Input JSON must be a list")
|
| 203 |
+
|
| 204 |
+
combined, stats = combine_by_doc_id(rows, keep_all_fields_per_label=not args.minimal)
|
| 205 |
+
|
| 206 |
+
output_path.write_text(
|
| 207 |
+
json.dumps(combined, ensure_ascii=False, indent=2) + "\n",
|
| 208 |
+
encoding="utf-8",
|
| 209 |
+
)
|
| 210 |
+
|
| 211 |
+
print("Wrote:", str(output_path))
|
| 212 |
+
print(
|
| 213 |
+
"Stats:",
|
| 214 |
+
json.dumps(
|
| 215 |
+
{
|
| 216 |
+
"total_rows": stats.total_rows,
|
| 217 |
+
"total_doc_ids": stats.total_doc_ids,
|
| 218 |
+
"missing_label_rows": stats.missing_label_rows,
|
| 219 |
+
"unexpected_labels": stats.unexpected_labels,
|
| 220 |
+
"doc_ids_missing_some_labels": stats.doc_ids_missing_some_labels,
|
| 221 |
+
"doc_ids_fulltext_mismatch": stats.doc_ids_fulltext_mismatch,
|
| 222 |
+
"doc_ids_summary_mismatch": stats.doc_ids_summary_mismatch,
|
| 223 |
+
"doc_ids_fulltext_subclaims_mismatch": stats.doc_ids_fulltext_subclaims_mismatch,
|
| 224 |
+
"doc_ids_summary_subclaims_mismatch": stats.doc_ids_summary_subclaims_mismatch,
|
| 225 |
+
},
|
| 226 |
+
indent=2,
|
| 227 |
+
),
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
if __name__ == "__main__":
|
| 232 |
+
main()
|
code/convert_awq.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
# Set GPU environment variables
|
| 3 |
+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
| 4 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
|
| 5 |
+
from awq import AutoAWQForCausalLM
|
| 6 |
+
from transformers import AutoTokenizer
|
| 7 |
+
|
| 8 |
+
# Paths
|
| 9 |
+
model_path = "/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-support-check-8b_ctx_v2-bf16"
|
| 10 |
+
quant_path = "/home/mshahidul/readctrl_model/full_model/qwen3-32B-subclaims-support-check-8b_ctx_AWQ"
|
| 11 |
+
|
| 12 |
+
# Quantization configuration
|
| 13 |
+
quant_config = {
|
| 14 |
+
"zero_point": True,
|
| 15 |
+
"q_group_size": 128,
|
| 16 |
+
"w_bit": 4,
|
| 17 |
+
"version": "GEMM"
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
# Load model and tokenizer
|
| 21 |
+
print("Loading model...")
|
| 22 |
+
model = AutoAWQForCausalLM.from_pretrained(model_path, device_map="auto")
|
| 23 |
+
tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
|
| 24 |
+
|
| 25 |
+
# Quantize
|
| 26 |
+
print("Starting quantization (this may take a while)...")
|
| 27 |
+
# AutoAWQ uses a default calibration dataset (pile-val)
|
| 28 |
+
model.quantize(tokenizer, quant_config=quant_config)
|
| 29 |
+
|
| 30 |
+
# Save quantized model
|
| 31 |
+
print(f"Saving quantized model to {quant_path}...")
|
| 32 |
+
model.save_quantized(quant_path)
|
| 33 |
+
tokenizer.save_pretrained(quant_path)
|
| 34 |
+
|
| 35 |
+
print("Quantization Complete!")
|
code/finetune-inference/convert_fp16.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import argparse
|
| 3 |
+
# python /home/mshahidul/readctrl/code/finetune-inference/convert_fp16.py \
|
| 4 |
+
# --model_path /home/mshahidul/readctrl_model/qwen3-32B_subclaims-attribution_resonability_check_8kCtx_v1
|
| 5 |
+
# --save_path /home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims-attribution_resonability_check_8kCtx_v1_BF16_merged
|
| 6 |
+
# --cuda_device 2
|
| 7 |
+
parser = argparse.ArgumentParser()
|
| 8 |
+
parser.add_argument("--model_path", type=str, required=True,
|
| 9 |
+
help="Path to the fine-tuned model/adapter to convert.")
|
| 10 |
+
parser.add_argument("--save_path", type=str, required=True,
|
| 11 |
+
help="Path to save the converted BF16 model.")
|
| 12 |
+
parser.add_argument("--msl", type=int, default=8192,
|
| 13 |
+
help="Maximum sequence length for the model.")
|
| 14 |
+
parser.add_argument("--cuda_device", type=str, default="2",
|
| 15 |
+
help="CUDA device index to use.")
|
| 16 |
+
args = parser.parse_args()
|
| 17 |
+
|
| 18 |
+
# Set your GPU visibility as you did in your script
|
| 19 |
+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
| 20 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda_device
|
| 21 |
+
import torch
|
| 22 |
+
from unsloth import FastLanguageModel
|
| 23 |
+
|
| 24 |
+
# -----------------------------
|
| 25 |
+
# CONFIGURATION
|
| 26 |
+
# -----------------------------
|
| 27 |
+
# Path to your current fine-tuned model/adapter
|
| 28 |
+
# MODEL_PATH = "/home/mshahidul/readctrl_model/qwen3-32B_subclaims-extraction-8b_ctx"
|
| 29 |
+
|
| 30 |
+
# Path where you want to save the BF16 version
|
| 31 |
+
# SAVE_PATH = "/home/mshahidul/readctrl_model/full_model/qwen3-32B_subclaims_BF16_merged"
|
| 32 |
+
|
| 33 |
+
def convert_and_save():
|
| 34 |
+
print(f"Loading model from: {args.model_path}")
|
| 35 |
+
|
| 36 |
+
# 1. Load the model
|
| 37 |
+
# We explicitly set dtype=torch.bfloat16 to ensure the base is loaded correctly
|
| 38 |
+
# load_in_4bit must be False to allow for a clean 16-bit merge
|
| 39 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 40 |
+
model_name=args.model_path,
|
| 41 |
+
max_seq_length=args.msl,
|
| 42 |
+
dtype=torch.bfloat16,
|
| 43 |
+
load_in_4bit=False,
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
print(f"Saving merged BF16 model to: {args.save_path}")
|
| 47 |
+
|
| 48 |
+
# 2. Save using save_pretrained_merged
|
| 49 |
+
# 'merged_16bit' will save as float16 or bfloat16 depending on the loaded dtype.
|
| 50 |
+
# Since we loaded with torch.bfloat16, this will save in bfloat16.
|
| 51 |
+
model.save_pretrained_merged(
|
| 52 |
+
args.save_path,
|
| 53 |
+
tokenizer,
|
| 54 |
+
save_method="merged_16bit",
|
| 55 |
+
)
|
| 56 |
+
|
| 57 |
+
print("Conversion complete. You can now use this path for vLLM or standard inference.")
|
| 58 |
+
|
| 59 |
+
if __name__ == "__main__":
|
| 60 |
+
convert_and_save()
|
code/interface/annotators_v5.py
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
# --- PATH CONFIGURATION ---
|
| 7 |
+
# DATA_PATH = "/home/mshahidul/readctrl/data/synthetic_dataset_diff_labels/syn_data_with_gs_summary_en_0_20.json"
|
| 8 |
+
DATA_PATH = "/home/mshahidul/readctrl/data/synthetic_dataset_diff_labels/syn_data_diff_labels_bn_0_80.json"
|
| 9 |
+
SAVE_ROOT = "/home/mshahidul/readctrl/data/annotators_validate_data_Bangla_(0_80)"
|
| 10 |
+
os.makedirs(SAVE_ROOT, exist_ok=True)
|
| 11 |
+
|
| 12 |
+
# --- UI HTML COMPONENTS (Kept same as original) ---
|
| 13 |
+
GUIDE_HTML = """
|
| 14 |
+
<div style="background-color: #f9f9f9; padding: 15px; border-left: 6px solid #4CAF50; border-radius: 4px; margin-bottom: 20px;">
|
| 15 |
+
<h3>Rating Guide: Medical Text Difficulty</h3>
|
| 16 |
+
<table style="width:100%; border-collapse: collapse; text-align: left;">
|
| 17 |
+
<tr style="background-color: #e8f5e9;">
|
| 18 |
+
<th style="padding: 8px; border: 1px solid #ddd;">Score</th>
|
| 19 |
+
<th style="padding: 8px; border: 1px solid #ddd;">Description</th>
|
| 20 |
+
</tr>
|
| 21 |
+
<tr><td><b>1</b></td><td><b>Very Easy:</b> Simple words, no medical jargon.</td></tr>
|
| 22 |
+
<tr><td><b>2</b></td><td><b>Easy:</b> Conversational medical terms.</td></tr>
|
| 23 |
+
<tr><td><b>3</b></td><td><b>Moderate:</b> Standard patient education material.</td></tr>
|
| 24 |
+
<tr><td><b>4</b></td><td><b>Hard:</b> Significant technical jargon.</td></tr>
|
| 25 |
+
<tr><td><b>5</b></td><td><b>Very Hard:</b> Specialist-level / Academic.</td></tr>
|
| 26 |
+
</table>
|
| 27 |
+
</div>
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
EXAMPLES_HTML = """
|
| 31 |
+
<div style="background-color: #ffffff; padding: 15px; border: 1px solid #ddd; border-radius: 4px;">
|
| 32 |
+
<h3 style="color: #2e7d32;">Reference Examples</h3>
|
| 33 |
+
<div style="display: flex; gap: 15px;">
|
| 34 |
+
<div style="flex: 1; background-color: #f1f8e9; padding: 10px; border-radius: 4px;">
|
| 35 |
+
<h4>Level 1-2</h4>
|
| 36 |
+
<p>"She had a kidney problem... a big blood clot blocked veins in her brain."</p>
|
| 37 |
+
</div>
|
| 38 |
+
<div style="flex: 1; background-color: #ffebee; padding: 10px; border-radius: 4px;">
|
| 39 |
+
<h4>Level 4-5</h4>
|
| 40 |
+
<p>"Idiopathic NS inaugurated by cerebral venous thrombosis extended to the right jugular vein."</p>
|
| 41 |
+
</div>
|
| 42 |
+
</div>
|
| 43 |
+
</div>
|
| 44 |
+
"""
|
| 45 |
+
def parse_diff_label_texts(raw_value):
|
| 46 |
+
"""
|
| 47 |
+
Parse diff_label_texts that may be:
|
| 48 |
+
- dict (already parsed)
|
| 49 |
+
- JSON string
|
| 50 |
+
- Python-dict-like string (single quotes)
|
| 51 |
+
"""
|
| 52 |
+
if isinstance(raw_value, dict):
|
| 53 |
+
return raw_value
|
| 54 |
+
|
| 55 |
+
if not isinstance(raw_value, str):
|
| 56 |
+
return {}
|
| 57 |
+
|
| 58 |
+
text = raw_value.strip()
|
| 59 |
+
if not text:
|
| 60 |
+
return {}
|
| 61 |
+
|
| 62 |
+
# Prefer strict JSON first; fall back to Python literal parsing.
|
| 63 |
+
try:
|
| 64 |
+
parsed = json.loads(text)
|
| 65 |
+
return parsed if isinstance(parsed, dict) else {}
|
| 66 |
+
except json.JSONDecodeError:
|
| 67 |
+
pass
|
| 68 |
+
|
| 69 |
+
try:
|
| 70 |
+
parsed = ast.literal_eval(text)
|
| 71 |
+
return parsed if isinstance(parsed, dict) else {}
|
| 72 |
+
except (ValueError, SyntaxError):
|
| 73 |
+
return {}
|
| 74 |
+
import ast
|
| 75 |
+
# --- DATA LOADING ---
|
| 76 |
+
def normalize_dataset(raw_dataset):
|
| 77 |
+
"""
|
| 78 |
+
Normalize different dataset layouts into a flat queue where each item has:
|
| 79 |
+
index, id, label, generated_summary.
|
| 80 |
+
"""
|
| 81 |
+
normalized = []
|
| 82 |
+
|
| 83 |
+
for item in raw_dataset:
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
# New layout: {"diff_label_texts": {label: text, ...}}
|
| 87 |
+
diff_label_texts = item.get("diff_label_texts")
|
| 88 |
+
if isinstance(diff_label_texts, dict):
|
| 89 |
+
for label, text in diff_label_texts.items():
|
| 90 |
+
normalized.append({
|
| 91 |
+
"index": item.get("index"),
|
| 92 |
+
"id": item.get("id"),
|
| 93 |
+
"label": label,
|
| 94 |
+
"generated_summary": text
|
| 95 |
+
})
|
| 96 |
+
|
| 97 |
+
else:
|
| 98 |
+
diff_label_texts = parse_diff_label_texts(item.get("diff_label_texts"))
|
| 99 |
+
for label, text in diff_label_texts.items():
|
| 100 |
+
normalized.append({
|
| 101 |
+
"index": item.get("index"),
|
| 102 |
+
"id": item.get("id"),
|
| 103 |
+
"label": label,
|
| 104 |
+
"generated_summary": text
|
| 105 |
+
})
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
return normalized
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
if os.path.exists(DATA_PATH):
|
| 113 |
+
with open(DATA_PATH, "r", encoding="utf-8") as f:
|
| 114 |
+
RAW_DATASET = json.load(f)
|
| 115 |
+
FULL_DATASET = normalize_dataset(RAW_DATASET)
|
| 116 |
+
print(len(FULL_DATASET))
|
| 117 |
+
assert FULL_DATASET, f"No valid items found in dataset: {DATA_PATH}"
|
| 118 |
+
else:
|
| 119 |
+
assert False, f"Data file not found at {DATA_PATH}"
|
| 120 |
+
|
| 121 |
+
# --- PERSISTENCE HELPERS ---
|
| 122 |
+
def get_user_dir(username):
|
| 123 |
+
clean_username = "".join([c for c in username if c.isalnum() or c in (' ', '_', '-')]).strip() or "anonymous"
|
| 124 |
+
return os.path.join(SAVE_ROOT, clean_username)
|
| 125 |
+
|
| 126 |
+
def save_state(user_dir, state_dict):
|
| 127 |
+
with open(os.path.join(user_dir, "state.json"), "w") as f:
|
| 128 |
+
json.dump(state_dict, f, indent=4)
|
| 129 |
+
|
| 130 |
+
def load_state(user_dir):
|
| 131 |
+
state_path = os.path.join(user_dir, "state.json")
|
| 132 |
+
if os.path.exists(state_path):
|
| 133 |
+
with open(state_path, "r") as f:
|
| 134 |
+
return json.load(f)
|
| 135 |
+
return None
|
| 136 |
+
|
| 137 |
+
# --- LOGIC FUNCTIONS ---
|
| 138 |
+
def get_current_ui_values(state):
|
| 139 |
+
"""Helper to get UI values for the current index, including previous ratings if they exist."""
|
| 140 |
+
idx = state['current_index']
|
| 141 |
+
current_item = state['queue'][idx]
|
| 142 |
+
|
| 143 |
+
# Check if we already have a rating for this specific index
|
| 144 |
+
existing_rating = 3 # Default
|
| 145 |
+
for res in state['results']:
|
| 146 |
+
if res['queue_position'] == idx:
|
| 147 |
+
existing_rating = res['rating']
|
| 148 |
+
break
|
| 149 |
+
|
| 150 |
+
progress = f"Item {idx + 1} of {len(state['queue'])}"
|
| 151 |
+
return current_item['generated_summary'], progress, existing_rating
|
| 152 |
+
|
| 153 |
+
def start_session(username):
|
| 154 |
+
if not username:
|
| 155 |
+
gr.Warning("Please enter a username!")
|
| 156 |
+
return [gr.update()] * 5
|
| 157 |
+
|
| 158 |
+
user_dir = get_user_dir(username)
|
| 159 |
+
os.makedirs(user_dir, exist_ok=True)
|
| 160 |
+
existing_state = load_state(user_dir)
|
| 161 |
+
|
| 162 |
+
if existing_state:
|
| 163 |
+
gr.Info(f"Welcome back! Resuming from item {existing_state['current_index'] + 1}.")
|
| 164 |
+
state = existing_state
|
| 165 |
+
else:
|
| 166 |
+
state = {
|
| 167 |
+
"username": username,
|
| 168 |
+
"current_index": 0,
|
| 169 |
+
"queue": list(FULL_DATASET),
|
| 170 |
+
"results": [],
|
| 171 |
+
"completed": False
|
| 172 |
+
}
|
| 173 |
+
save_state(user_dir, state)
|
| 174 |
+
|
| 175 |
+
text, progress, rating = get_current_ui_values(state)
|
| 176 |
+
return (gr.update(visible=False), gr.update(visible=True), text, progress, rating, state)
|
| 177 |
+
|
| 178 |
+
def submit_rating(doc_slider, state):
|
| 179 |
+
if state is None: return "", "Error", 3, 3, None
|
| 180 |
+
|
| 181 |
+
user_dir = get_user_dir(state['username'])
|
| 182 |
+
idx = state['current_index']
|
| 183 |
+
current_item = state['queue'][idx]
|
| 184 |
+
|
| 185 |
+
# Update existing rating if editing, otherwise append
|
| 186 |
+
new_result = {
|
| 187 |
+
"queue_position": idx,
|
| 188 |
+
"index": current_item.get('index', idx),
|
| 189 |
+
"doc_id": current_item.get('id', current_item.get('index', 'no_id')),
|
| 190 |
+
"label": current_item.get('label', 'no_label'),
|
| 191 |
+
"rating": doc_slider,
|
| 192 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
# Logic to overwrite existing rating for this index
|
| 196 |
+
state['results'] = [r for r in state['results'] if r['queue_position'] != idx]
|
| 197 |
+
state['results'].append(new_result)
|
| 198 |
+
state['results'].sort(key=lambda x: x['queue_position']) # Keep sorted
|
| 199 |
+
|
| 200 |
+
if idx + 1 < len(state['queue']):
|
| 201 |
+
state['current_index'] += 1
|
| 202 |
+
save_state(user_dir, state)
|
| 203 |
+
# Save results file
|
| 204 |
+
with open(os.path.join(user_dir, "annotation_results.json"), "w") as f:
|
| 205 |
+
json.dump(state['results'], f, indent=4)
|
| 206 |
+
|
| 207 |
+
text, progress, rating = get_current_ui_values(state)
|
| 208 |
+
return text, progress, rating, state
|
| 209 |
+
else:
|
| 210 |
+
state['completed'] = True
|
| 211 |
+
save_state(user_dir, state)
|
| 212 |
+
return "✅ ALL TASKS COMPLETED", "Status: Finished", 1, state
|
| 213 |
+
|
| 214 |
+
def go_back(state):
|
| 215 |
+
if state is None or state['current_index'] <= 0:
|
| 216 |
+
gr.Warning("Already at the first item.")
|
| 217 |
+
return [gr.update()] * 3 + [state]
|
| 218 |
+
|
| 219 |
+
state['current_index'] -= 1
|
| 220 |
+
text, progress, rating = get_current_ui_values(state)
|
| 221 |
+
return text, progress, rating, state
|
| 222 |
+
|
| 223 |
+
# --- UI INTERFACE ---
|
| 224 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 225 |
+
session_state = gr.State()
|
| 226 |
+
|
| 227 |
+
gr.Markdown("# Medical Text Readability Annotation")
|
| 228 |
+
|
| 229 |
+
with gr.Accordion("Instructions & Calibration", open=False):
|
| 230 |
+
gr.HTML(GUIDE_HTML)
|
| 231 |
+
gr.HTML(EXAMPLES_HTML)
|
| 232 |
+
|
| 233 |
+
with gr.Column(visible=True) as intro_box:
|
| 234 |
+
username_input = gr.Textbox(label="Enter Your Name/ID", placeholder="e.g., user_101")
|
| 235 |
+
btn_start = gr.Button("Start / Resume Annotation", variant="primary")
|
| 236 |
+
|
| 237 |
+
with gr.Column(visible=False) as task_box:
|
| 238 |
+
progress_label = gr.Label(label="Overall Progress")
|
| 239 |
+
doc_display = gr.Textbox(interactive=False, lines=12, label="Medical Text")
|
| 240 |
+
doc_slider = gr.Slider(1, 5, step=1, label="Difficulty (1=Easy, 5=Hard)", value=3)
|
| 241 |
+
|
| 242 |
+
with gr.Row():
|
| 243 |
+
btn_prev = gr.Button("⬅️ Previous", variant="secondary")
|
| 244 |
+
btn_submit = gr.Button("Submit & Next ➡️", variant="primary")
|
| 245 |
+
|
| 246 |
+
# --- EVENT HANDLERS ---
|
| 247 |
+
btn_start.click(
|
| 248 |
+
fn=start_session,
|
| 249 |
+
inputs=[username_input],
|
| 250 |
+
outputs=[intro_box, task_box, doc_display, progress_label, doc_slider, session_state]
|
| 251 |
+
)
|
| 252 |
+
|
| 253 |
+
btn_submit.click(
|
| 254 |
+
fn=submit_rating,
|
| 255 |
+
inputs=[doc_slider, session_state],
|
| 256 |
+
outputs=[doc_display, progress_label, doc_slider, session_state]
|
| 257 |
+
)
|
| 258 |
+
|
| 259 |
+
btn_prev.click(
|
| 260 |
+
fn=go_back,
|
| 261 |
+
inputs=[session_state],
|
| 262 |
+
outputs=[doc_display, progress_label, doc_slider, session_state]
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
if __name__ == "__main__":
|
| 266 |
+
demo.launch(share=True)
|
code/interface/annotators_v5_tran_quality.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
# --- PATH CONFIGURATION ---
|
| 7 |
+
# DATA_PATH = "/home/mshahidul/readctrl/data/synthetic_dataset_diff_labels/syn_data_with_gs_summary_en_0_20.json"
|
| 8 |
+
DATA_PATH = "/home/mshahidul/readctrl/data/data_annotator_data/syn_data_diff_labels_en_0_80.json"
|
| 9 |
+
SAVE_ROOT = "/home/mshahidul/readctrl/data/annotators_validate_data_(20_80)"
|
| 10 |
+
os.makedirs(SAVE_ROOT, exist_ok=True)
|
| 11 |
+
|
| 12 |
+
# --- UI HTML COMPONENTS (Kept same as original) ---
|
| 13 |
+
GUIDE_HTML = """
|
| 14 |
+
<div style="background-color: #f9f9f9; padding: 15px; border-left: 6px solid #4CAF50; border-radius: 4px; margin-bottom: 20px;">
|
| 15 |
+
<h3>Rating Guide: Medical Text Difficulty</h3>
|
| 16 |
+
<table style="width:100%; border-collapse: collapse; text-align: left;">
|
| 17 |
+
<tr style="background-color: #e8f5e9;">
|
| 18 |
+
<th style="padding: 8px; border: 1px solid #ddd;">Score</th>
|
| 19 |
+
<th style="padding: 8px; border: 1px solid #ddd;">Description</th>
|
| 20 |
+
</tr>
|
| 21 |
+
<tr><td><b>1</b></td><td><b>Very Easy:</b> Simple words, no medical jargon.</td></tr>
|
| 22 |
+
<tr><td><b>2</b></td><td><b>Easy:</b> Conversational medical terms.</td></tr>
|
| 23 |
+
<tr><td><b>3</b></td><td><b>Moderate:</b> Standard patient education material.</td></tr>
|
| 24 |
+
<tr><td><b>4</b></td><td><b>Hard:</b> Significant technical jargon.</td></tr>
|
| 25 |
+
<tr><td><b>5</b></td><td><b>Very Hard:</b> Specialist-level / Academic.</td></tr>
|
| 26 |
+
</table>
|
| 27 |
+
</div>
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
EXAMPLES_HTML = """
|
| 31 |
+
<div style="background-color: #ffffff; padding: 15px; border: 1px solid #ddd; border-radius: 4px;">
|
| 32 |
+
<h3 style="color: #2e7d32;">Reference Examples</h3>
|
| 33 |
+
<div style="display: flex; gap: 15px;">
|
| 34 |
+
<div style="flex: 1; background-color: #f1f8e9; padding: 10px; border-radius: 4px;">
|
| 35 |
+
<h4>Level 1-2</h4>
|
| 36 |
+
<p>"She had a kidney problem... a big blood clot blocked veins in her brain."</p>
|
| 37 |
+
</div>
|
| 38 |
+
<div style="flex: 1; background-color: #ffebee; padding: 10px; border-radius: 4px;">
|
| 39 |
+
<h4>Level 4-5</h4>
|
| 40 |
+
<p>"Idiopathic NS inaugurated by cerebral venous thrombosis extended to the right jugular vein."</p>
|
| 41 |
+
</div>
|
| 42 |
+
</div>
|
| 43 |
+
</div>
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
# --- DATA LOADING ---
|
| 47 |
+
if os.path.exists(DATA_PATH):
|
| 48 |
+
with open(DATA_PATH, "r") as f:
|
| 49 |
+
FULL_DATASET = json.load(f)
|
| 50 |
+
FULL_DATASET=FULL_DATASET[60:]
|
| 51 |
+
else:
|
| 52 |
+
assert False, f"Data file not found at {DATA_PATH}"
|
| 53 |
+
|
| 54 |
+
# --- PERSISTENCE HELPERS ---
|
| 55 |
+
def get_user_dir(username):
|
| 56 |
+
clean_username = "".join([c for c in username if c.isalnum() or c in (' ', '_', '-')]).strip() or "anonymous"
|
| 57 |
+
return os.path.join(SAVE_ROOT, clean_username)
|
| 58 |
+
|
| 59 |
+
def save_state(user_dir, state_dict):
|
| 60 |
+
with open(os.path.join(user_dir, "state.json"), "w") as f:
|
| 61 |
+
json.dump(state_dict, f, indent=4)
|
| 62 |
+
|
| 63 |
+
def load_state(user_dir):
|
| 64 |
+
state_path = os.path.join(user_dir, "state.json")
|
| 65 |
+
if os.path.exists(state_path):
|
| 66 |
+
with open(state_path, "r") as f:
|
| 67 |
+
return json.load(f)
|
| 68 |
+
return None
|
| 69 |
+
|
| 70 |
+
# --- LOGIC FUNCTIONS ---
|
| 71 |
+
def get_current_ui_values(state):
|
| 72 |
+
"""Helper to get UI values for the current index, including previous ratings if they exist."""
|
| 73 |
+
idx = state['current_index']
|
| 74 |
+
current_item = state['queue'][idx]
|
| 75 |
+
|
| 76 |
+
# Check if we already have a rating for this specific index
|
| 77 |
+
existing_rating = 3 # Default
|
| 78 |
+
for res in state['results']:
|
| 79 |
+
if res['queue_position'] == idx:
|
| 80 |
+
existing_rating = res['rating']
|
| 81 |
+
break
|
| 82 |
+
|
| 83 |
+
progress = f"Item {idx + 1} of {len(state['queue'])}"
|
| 84 |
+
return current_item['generated_summary'], progress, existing_rating
|
| 85 |
+
|
| 86 |
+
def start_session(username):
|
| 87 |
+
if not username:
|
| 88 |
+
gr.Warning("Please enter a username!")
|
| 89 |
+
return [gr.update()] * 5
|
| 90 |
+
|
| 91 |
+
user_dir = get_user_dir(username)
|
| 92 |
+
os.makedirs(user_dir, exist_ok=True)
|
| 93 |
+
existing_state = load_state(user_dir)
|
| 94 |
+
|
| 95 |
+
if existing_state:
|
| 96 |
+
gr.Info(f"Welcome back! Resuming from item {existing_state['current_index'] + 1}.")
|
| 97 |
+
state = existing_state
|
| 98 |
+
else:
|
| 99 |
+
state = {
|
| 100 |
+
"username": username,
|
| 101 |
+
"current_index": 0,
|
| 102 |
+
"queue": list(FULL_DATASET),
|
| 103 |
+
"results": [],
|
| 104 |
+
"completed": False
|
| 105 |
+
}
|
| 106 |
+
save_state(user_dir, state)
|
| 107 |
+
|
| 108 |
+
text, progress, rating = get_current_ui_values(state)
|
| 109 |
+
return (gr.update(visible=False), gr.update(visible=True), text, progress, rating, state)
|
| 110 |
+
|
| 111 |
+
def submit_rating(doc_slider, state):
|
| 112 |
+
if state is None: return "", "Error", 3, 3, None
|
| 113 |
+
|
| 114 |
+
user_dir = get_user_dir(state['username'])
|
| 115 |
+
idx = state['current_index']
|
| 116 |
+
current_item = state['queue'][idx]
|
| 117 |
+
|
| 118 |
+
# Update existing rating if editing, otherwise append
|
| 119 |
+
new_result = {
|
| 120 |
+
"queue_position": idx,
|
| 121 |
+
"doc_id": current_item.get('index', 'no_id'),
|
| 122 |
+
"label": current_item.get('label', 'no_label'),
|
| 123 |
+
"rating": doc_slider,
|
| 124 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
# Logic to overwrite existing rating for this index
|
| 128 |
+
state['results'] = [r for r in state['results'] if r['queue_position'] != idx]
|
| 129 |
+
state['results'].append(new_result)
|
| 130 |
+
state['results'].sort(key=lambda x: x['queue_position']) # Keep sorted
|
| 131 |
+
|
| 132 |
+
if idx + 1 < len(state['queue']):
|
| 133 |
+
state['current_index'] += 1
|
| 134 |
+
save_state(user_dir, state)
|
| 135 |
+
# Save results file
|
| 136 |
+
with open(os.path.join(user_dir, "annotation_results.json"), "w") as f:
|
| 137 |
+
json.dump(state['results'], f, indent=4)
|
| 138 |
+
|
| 139 |
+
text, progress, rating = get_current_ui_values(state)
|
| 140 |
+
return text, progress, rating, state
|
| 141 |
+
else:
|
| 142 |
+
state['completed'] = True
|
| 143 |
+
save_state(user_dir, state)
|
| 144 |
+
return "✅ ALL TASKS COMPLETED", "Status: Finished", 1, state
|
| 145 |
+
|
| 146 |
+
def go_back(state):
|
| 147 |
+
if state is None or state['current_index'] <= 0:
|
| 148 |
+
gr.Warning("Already at the first item.")
|
| 149 |
+
return [gr.update()] * 3 + [state]
|
| 150 |
+
|
| 151 |
+
state['current_index'] -= 1
|
| 152 |
+
text, progress, rating = get_current_ui_values(state)
|
| 153 |
+
return text, progress, rating, state
|
| 154 |
+
|
| 155 |
+
# --- UI INTERFACE ---
|
| 156 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 157 |
+
session_state = gr.State()
|
| 158 |
+
|
| 159 |
+
gr.Markdown("# Medical Text Readability Annotation")
|
| 160 |
+
|
| 161 |
+
with gr.Accordion("Instructions & Calibration", open=False):
|
| 162 |
+
gr.HTML(GUIDE_HTML)
|
| 163 |
+
gr.HTML(EXAMPLES_HTML)
|
| 164 |
+
|
| 165 |
+
with gr.Column(visible=True) as intro_box:
|
| 166 |
+
username_input = gr.Textbox(label="Enter Your Name/ID", placeholder="e.g., user_101")
|
| 167 |
+
btn_start = gr.Button("Start / Resume Annotation", variant="primary")
|
| 168 |
+
|
| 169 |
+
with gr.Column(visible=False) as task_box:
|
| 170 |
+
progress_label = gr.Label(label="Overall Progress")
|
| 171 |
+
doc_display = gr.Textbox(interactive=False, lines=12, label="Medical Text")
|
| 172 |
+
doc_slider = gr.Slider(1, 5, step=1, label="Difficulty (1=Easy, 5=Hard)", value=3)
|
| 173 |
+
|
| 174 |
+
with gr.Row():
|
| 175 |
+
btn_prev = gr.Button("⬅️ Previous", variant="secondary")
|
| 176 |
+
btn_submit = gr.Button("Submit & Next ➡️", variant="primary")
|
| 177 |
+
|
| 178 |
+
# --- EVENT HANDLERS ---
|
| 179 |
+
btn_start.click(
|
| 180 |
+
fn=start_session,
|
| 181 |
+
inputs=[username_input],
|
| 182 |
+
outputs=[intro_box, task_box, doc_display, progress_label, doc_slider, session_state]
|
| 183 |
+
)
|
| 184 |
+
|
| 185 |
+
btn_submit.click(
|
| 186 |
+
fn=submit_rating,
|
| 187 |
+
inputs=[doc_slider, session_state],
|
| 188 |
+
outputs=[doc_display, progress_label, doc_slider, session_state]
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
btn_prev.click(
|
| 192 |
+
fn=go_back,
|
| 193 |
+
inputs=[session_state],
|
| 194 |
+
outputs=[doc_display, progress_label, doc_slider, session_state]
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
if __name__ == "__main__":
|
| 198 |
+
demo.launch(share=True)
|
code/interface/instr
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
|
| 3 |
+
# gr.Markdown("# 🏥 Health Literacy Subclaim Annotation\n## Texts labeled as low health literacy include less information than those labeled as intermediate health literacy, and intermediate health literacy texts include less information than proficient health literacy texts.\nSome key information has already been pre-selected to ensure that each label contains a minimum required amount of information. If you believe additional information should be included for a given label, please select the corresponding checkboxes.")
|
| 4 |
+
# with gr.Accordion("📖 Read Instructions First", open=True):
|
| 5 |
+
# gr.Markdown("""
|
| 6 |
+
|
| 7 |
+
# ### Step 1: Read the Text Type
|
| 8 |
+
|
| 9 |
+
# You will see **one text at a time**. At the top, the interface will tell you whether this is:
|
| 10 |
+
|
| 11 |
+
# * **Full Text**, or
|
| 12 |
+
# * **Gold Summary**
|
| 13 |
+
|
| 14 |
+
# Please read the text carefully before selecting any subclaims.
|
| 15 |
+
|
| 16 |
+
# ---
|
| 17 |
+
|
| 18 |
+
# ### Step 2: Review the Subclaims
|
| 19 |
+
|
| 20 |
+
# Below the text, you will see a list of **subclaims**.
|
| 21 |
+
# Each subclaim represents one piece of information from the text.
|
| 22 |
+
|
| 23 |
+
# **Example subclaims:**
|
| 24 |
+
|
| 25 |
+
# * ☐ The patient has high blood pressure.
|
| 26 |
+
# * ☐ The patient is 62 years old.
|
| 27 |
+
# * ☐ The patient experiences chest pain when breathing.
|
| 28 |
+
# * ☐ A chest X-ray shows pneumonia in the right lung.
|
| 29 |
+
# * ☐ The COVID test result is negative.
|
| 30 |
+
|
| 31 |
+
# ---
|
| 32 |
+
|
| 33 |
+
# ### Step 3: Annotate for Each Health Literacy Label
|
| 34 |
+
|
| 35 |
+
# You must select subclaims **separately for each label**.
|
| 36 |
+
|
| 37 |
+
# #### Low Health Literacy
|
| 38 |
+
|
| 39 |
+
# Select **only the most essential information** needed for basic understanding.
|
| 40 |
+
|
| 41 |
+
# **Good selection example:**
|
| 42 |
+
|
| 43 |
+
# * ☑ The patient has high blood pressure.
|
| 44 |
+
# * ☑ A chest X-ray shows pneumonia in the right lung.
|
| 45 |
+
|
| 46 |
+
# **Do NOT include:**
|
| 47 |
+
|
| 48 |
+
# * Exact age
|
| 49 |
+
# * Test details unless critical
|
| 50 |
+
# * Extra clinical findings
|
| 51 |
+
|
| 52 |
+
# ➡ Coverage should be **lowest**.
|
| 53 |
+
|
| 54 |
+
# ---
|
| 55 |
+
|
| 56 |
+
# #### Intermediate Health Literacy
|
| 57 |
+
|
| 58 |
+
# Select the **core information plus some helpful details**.
|
| 59 |
+
|
| 60 |
+
# **Good selection example:**
|
| 61 |
+
|
| 62 |
+
# * ☑ The patient has high blood pressure.
|
| 63 |
+
# * ☑ The patient experiences chest pain when breathing.
|
| 64 |
+
# * ☑ A chest X-ray shows pneumonia in the right lung.
|
| 65 |
+
# * ☑ The COVID test result is negative.
|
| 66 |
+
|
| 67 |
+
# ➡ Coverage should be **more than low**, but **less than proficient**.
|
| 68 |
+
|
| 69 |
+
# ---
|
| 70 |
+
|
| 71 |
+
# #### Proficient Health Literacy
|
| 72 |
+
|
| 73 |
+
# Select **all clinically relevant information**.
|
| 74 |
+
|
| 75 |
+
# **Good selection example:**
|
| 76 |
+
|
| 77 |
+
# * ☑ The patient has high blood pressure.
|
| 78 |
+
# * ☑ The patient is 62 years old.
|
| 79 |
+
# * ☑ The patient experiences chest pain when breathing.
|
| 80 |
+
# * ☑ A chest X-ray shows pneumonia in the right lung.
|
| 81 |
+
# * ☑ The COVID test result is negative.
|
| 82 |
+
|
| 83 |
+
# ➡ Coverage should be **highest**.
|
| 84 |
+
|
| 85 |
+
# ---
|
| 86 |
+
|
| 87 |
+
# ### Step 4: Check Information Percentages
|
| 88 |
+
|
| 89 |
+
# The interface shows the **percentage of selected information** for each label.
|
| 90 |
+
|
| 91 |
+
# A correct annotation should follow this order:
|
| 92 |
+
|
| 93 |
+
# > **Low % < Intermediate % < Proficient %**
|
| 94 |
+
|
| 95 |
+
# ⚠️ If low health literacy has more information than intermediate or proficient, you will see a warning. Please revise your selections.
|
| 96 |
+
|
| 97 |
+
# ---
|
| 98 |
+
|
| 99 |
+
# ### Key Reminder
|
| 100 |
+
|
| 101 |
+
# * Some subclaims may already be pre-selected to ensure **minimum required information**.
|
| 102 |
+
# * Only add new subclaims if you believe they are appropriate for that label.
|
| 103 |
+
# * When finished, submit and proceed to the **next instance**.
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
# """)
|
code/interface/instructions
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 📖 Annotation Guide: Health Literacy
|
| 2 |
+
Welcome! Your task is to determine which pieces of information (subclaims) belong in different versions of a health text based on **Health Literacy levels**.
|
| 3 |
+
## * **Pre-selections:** Some boxes are checked by default—these are the "minimum required" facts.
|
| 4 |
+
## Sometimes, generated summaries with different labels contain all the information present in the gold summary.
|
| 5 |
+
## In the case of full text, the amount of information included depends on the readability level. Texts with a low readability label contain less information than those with a proficient readability label.
|
| 6 |
+
## Consistency: Any information listed under 'Low' should automatically also appear under 'Intermediate' and 'Proficient.
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
### 🟢 Step 1: Identify the Source
|
| 10 |
+
Check the top of the interface. You are working with either:
|
| 11 |
+
* **Full Text:** The original clinical document.
|
| 12 |
+
* **Gold Summary:** A condensed version of the facts.
|
| 13 |
+
|
| 14 |
+
### 🔍 Step 2: Review the Subclaims
|
| 15 |
+
Subclaims are individual facts extracted from the text.
|
| 16 |
+
> *Example: "The patient is 62 years old" or "The X-ray shows pneumonia."*
|
| 17 |
+
|
| 18 |
+
---
|
| 19 |
+
|
| 20 |
+
### ⚖️ Step 3: Annotate by Literacy Level
|
| 21 |
+
You must select checkboxes for **three different audiences**. The goal is to create a "ladder" of information:
|
| 22 |
+
|
| 23 |
+
| Level | Goal | Inclusion Strategy |
|
| 24 |
+
| :--- | :--- | :--- |
|
| 25 |
+
| **🟢 Low** | **Basic Survival** | Only the absolute essentials. What must they know to stay safe? |
|
| 26 |
+
| **🔵 Intermediate** | **Clear Context** | Core info + helpful context. Explain the "what" and "why." |
|
| 27 |
+
| **🟣 Proficient** | **Full Detail** | Everything. Include clinical findings, ages, and specific test data. |
|
| 28 |
+
|
| 29 |
+
---
|
| 30 |
+
|
| 31 |
+
### 📊 Step 4: The Golden Rule (Check Your Percentages)
|
| 32 |
+
To ensure high-quality data, your selections **must** follow this hierarchy:
|
| 33 |
+
# **Low % < Intermediate % < Proficient %**
|
| 34 |
+
|
| 35 |
+
⚠️ **Wait for the Green Light:** If the **Low** level contains more information than **Intermediate**, the system will show a warning. Adjust your checkboxes until the percentages flow from lowest to highest.
|
| 36 |
+
|
| 37 |
+
---
|
| 38 |
+
|
| 39 |
+
### 💡 Quick Tips
|
| 40 |
+
|
| 41 |
+
* **Clinical Relevance:** For **Proficient**, include specific numbers (e.g., "140/90 mmHg") that might be too technical for **Low**.
|
| 42 |
+
|
| 43 |
+
**Ready to start?** Scroll down to begin your first annotation.
|
code/interface/interface_correction_data.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
from openai import OpenAI
|
| 5 |
+
|
| 6 |
+
# --- CONFIGURATION ---
|
| 7 |
+
DATA_PATH = '/home/mshahidul/readctrl/data/annotators_validate_data_(20_80)/code/correction_evaluation_full_text_with_gs.json'
|
| 8 |
+
SAVE_DIR = '/home/mshahidul/readctrl/data/annotators_validate_data_(20_80)/correction_data/'
|
| 9 |
+
PROMPT_TEMPLATE_PATH = "/home/mshahidul/readctrl/prompts/syn_data_gen_diff_label_mod.txt"
|
| 10 |
+
API_FILE_PATH = "/home/mshahidul/api_new.json"
|
| 11 |
+
|
| 12 |
+
# --- INITIALIZATION ---
|
| 13 |
+
# Load API Key
|
| 14 |
+
with open(API_FILE_PATH, "r") as f:
|
| 15 |
+
api_keys = json.load(f)
|
| 16 |
+
client = OpenAI(api_key=api_keys["openai"])
|
| 17 |
+
|
| 18 |
+
# Load Prompt Template
|
| 19 |
+
with open(PROMPT_TEMPLATE_PATH, "r") as f:
|
| 20 |
+
PROMPT_TEMPLATE = f.read()
|
| 21 |
+
|
| 22 |
+
def load_data():
|
| 23 |
+
if os.path.exists(DATA_PATH):
|
| 24 |
+
with open(DATA_PATH, 'r') as f:
|
| 25 |
+
return json.load(f)
|
| 26 |
+
return []
|
| 27 |
+
|
| 28 |
+
DATA = load_data()
|
| 29 |
+
|
| 30 |
+
# --- AI LOGIC ---
|
| 31 |
+
def call_ai_processor(index, full_text, gold_summary):
|
| 32 |
+
"""Calls GPT-5 (OpenAI API) and extracts the text for the current label."""
|
| 33 |
+
try:
|
| 34 |
+
item = DATA[index]
|
| 35 |
+
target_label = item.get('ai_label') # e.g., "low_health_literacy"
|
| 36 |
+
|
| 37 |
+
# Note: 'source_language' should ideally be in your JSON.
|
| 38 |
+
# Defaulting to English if not found.
|
| 39 |
+
source_lang = item.get('language', 'English')
|
| 40 |
+
|
| 41 |
+
# Format the prompt
|
| 42 |
+
prompt = (PROMPT_TEMPLATE
|
| 43 |
+
.replace("<<<FULL_TEXT>>>", full_text)
|
| 44 |
+
.replace("<<<SOURCE_LANGUAGE>>>", source_lang)
|
| 45 |
+
.replace("<<<GOLD_SUMMARY>>>", gold_summary)
|
| 46 |
+
.replace("<<<TARGET_LABEL>>>", target_label))
|
| 47 |
+
# import ipdb; ipdb.set_trace()
|
| 48 |
+
|
| 49 |
+
response = client.chat.completions.create(
|
| 50 |
+
model="gpt-5-mini", # Change to "gpt-5" or specific model name when available
|
| 51 |
+
messages=[{"role": "user", "content": prompt}],
|
| 52 |
+
response_format={ "type": "json_object" }
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
content = json.loads(response.choices[0].message.content)
|
| 56 |
+
|
| 57 |
+
# Extract only the text for the specific label we are currently editing
|
| 58 |
+
# target_label usually matches the keys: low_health_literacy, etc.
|
| 59 |
+
refined_text = content.get(target_label, "Error: Label not found in AI response.")
|
| 60 |
+
return refined_text
|
| 61 |
+
|
| 62 |
+
except Exception as e:
|
| 63 |
+
return f"AI Error: {str(e)}"
|
| 64 |
+
|
| 65 |
+
# --- DATA HELPERS ---
|
| 66 |
+
def get_user_save_path(username):
|
| 67 |
+
clean_name = "".join([c for c in username if c.isalpha() or c.isdigit()]).rstrip()
|
| 68 |
+
return os.path.join(SAVE_DIR, f"final_corrected_{clean_name}.json")
|
| 69 |
+
|
| 70 |
+
def load_user_results(username):
|
| 71 |
+
path = get_user_save_path(username)
|
| 72 |
+
if os.path.exists(path):
|
| 73 |
+
with open(path, 'r') as f:
|
| 74 |
+
return json.load(f)
|
| 75 |
+
return []
|
| 76 |
+
|
| 77 |
+
def get_record(index):
|
| 78 |
+
if 0 <= index < len(DATA):
|
| 79 |
+
item = DATA[index]
|
| 80 |
+
ai_label = item.get('ai_label', '')
|
| 81 |
+
ai_text = item.get('diff_label_texts', {}).get(ai_label, "Text not found")
|
| 82 |
+
gold_summary = item.get('summary', '') # Added this for the AI prompt
|
| 83 |
+
|
| 84 |
+
anno_info = (
|
| 85 |
+
f"Plaban: {item.get('category_plaban')} (Rating: {item.get('rating_plaban')})\n"
|
| 86 |
+
f"Mahi: {item.get('category_mahi')} (Rating: {item.get('rating_mahi')})\n"
|
| 87 |
+
f"Shama: {item.get('category_shama')} (Rating: {item.get('rating_shama')})"
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
return (
|
| 91 |
+
item.get('doc_id'),
|
| 92 |
+
anno_info,
|
| 93 |
+
ai_label.replace("_", " ").title(),
|
| 94 |
+
item.get('fulltext'),
|
| 95 |
+
ai_text,
|
| 96 |
+
index,
|
| 97 |
+
gold_summary
|
| 98 |
+
)
|
| 99 |
+
return None
|
| 100 |
+
|
| 101 |
+
def login_user(username):
|
| 102 |
+
if not username or len(username.strip()) == 0:
|
| 103 |
+
return gr.update(visible=True), gr.update(visible=False), 0, None, "", "", "", "", ""
|
| 104 |
+
|
| 105 |
+
existing_data = load_user_results(username)
|
| 106 |
+
start_index = len(existing_data)
|
| 107 |
+
|
| 108 |
+
if start_index >= len(DATA):
|
| 109 |
+
return gr.update(visible=False), gr.update(visible=True), start_index, "Finished!", "All caught up!", "No more data.", "No more data.", "", ""
|
| 110 |
+
|
| 111 |
+
record = get_record(start_index)
|
| 112 |
+
return (
|
| 113 |
+
gr.update(visible=False),
|
| 114 |
+
gr.update(visible=True),
|
| 115 |
+
start_index,
|
| 116 |
+
record[0], record[1], record[2], record[3], record[4], record[6]
|
| 117 |
+
)
|
| 118 |
+
|
| 119 |
+
def save_and_next(username, index, corrected_text, is_ok):
|
| 120 |
+
user_results = load_user_results(username)
|
| 121 |
+
current_item = DATA[index]
|
| 122 |
+
|
| 123 |
+
# If the user didn't type anything in manual_correction and hit "AI Text is OK", use original
|
| 124 |
+
final_text = current_item.get('diff_label_texts', {}).get(current_item['ai_label']) if is_ok else corrected_text
|
| 125 |
+
|
| 126 |
+
result_entry = {
|
| 127 |
+
"doc_id": current_item['doc_id'],
|
| 128 |
+
"ai_label": current_item['ai_label'],
|
| 129 |
+
"status": "Approved" if is_ok else "Manually Corrected/AI Refined",
|
| 130 |
+
"final_text": final_text,
|
| 131 |
+
"original_ai_text": current_item.get('diff_label_texts', {}).get(current_item['ai_label'])
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
user_results.append(result_entry)
|
| 135 |
+
|
| 136 |
+
with open(get_user_save_path(username), 'w') as f:
|
| 137 |
+
json.dump(user_results, f, indent=4)
|
| 138 |
+
|
| 139 |
+
next_index = index + 1
|
| 140 |
+
if next_index < len(DATA):
|
| 141 |
+
res = get_record(next_index)
|
| 142 |
+
return list(res) + [""]
|
| 143 |
+
else:
|
| 144 |
+
return [None, "Finished!", "Finished!", "No more data.", "No more data.", next_index, "No more data.", ""]
|
| 145 |
+
|
| 146 |
+
# --- GRADIO UI ---
|
| 147 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 148 |
+
gr.Markdown("# 📝 AI Label Correction Interface (v2 with GPT-Refinement)")
|
| 149 |
+
|
| 150 |
+
current_idx = gr.State(0)
|
| 151 |
+
user_session = gr.State("")
|
| 152 |
+
gold_summary_hidden = gr.State("") # To hold the summary for the AI prompt
|
| 153 |
+
|
| 154 |
+
with gr.Row() as login_row:
|
| 155 |
+
with gr.Column(scale=1):
|
| 156 |
+
user_input = gr.Textbox(label="Enter Username to Resume", placeholder="e.g., Shahidul")
|
| 157 |
+
btn_login = gr.Button("Start Annotation", variant="primary")
|
| 158 |
+
|
| 159 |
+
with gr.Column(visible=False) as main_container:
|
| 160 |
+
with gr.Row():
|
| 161 |
+
with gr.Column(scale=1):
|
| 162 |
+
doc_id_display = gr.Textbox(label="Document ID", interactive=False)
|
| 163 |
+
ai_label_display = gr.Label(label="Target AI Label")
|
| 164 |
+
annotator_stats = gr.Textbox(label="Human Annotator Ratings", lines=4, interactive=False)
|
| 165 |
+
|
| 166 |
+
with gr.Column(scale=2):
|
| 167 |
+
full_text_display = gr.Textbox(label="Source Full Text", lines=10, interactive=False)
|
| 168 |
+
|
| 169 |
+
with gr.Row():
|
| 170 |
+
with gr.Column():
|
| 171 |
+
ai_generated_text = gr.Textbox(label="Original AI Text", lines=6, interactive=False)
|
| 172 |
+
with gr.Column():
|
| 173 |
+
manual_correction = gr.Textbox(label="AI Refinement / Manual Correction", placeholder="AI generated text will appear here...", lines=6)
|
| 174 |
+
btn_ai_check = gr.Button("✨ Check & Refine through AI", variant="secondary")
|
| 175 |
+
|
| 176 |
+
with gr.Row():
|
| 177 |
+
btn_ok = gr.Button("✅ Original Text is OK", variant="primary")
|
| 178 |
+
btn_fix = gr.Button("💾 Save Current Correction/AI Text", variant="stop")
|
| 179 |
+
|
| 180 |
+
# --- LOGIC ---
|
| 181 |
+
btn_login.click(
|
| 182 |
+
fn=login_user,
|
| 183 |
+
inputs=[user_input],
|
| 184 |
+
outputs=[login_row, main_container, current_idx, doc_id_display, annotator_stats, ai_label_display, full_text_display, ai_generated_text, gold_summary_hidden]
|
| 185 |
+
).then(fn=lambda username: username, inputs=[user_input], outputs=[user_session])
|
| 186 |
+
|
| 187 |
+
# AI Regeneration Logic
|
| 188 |
+
btn_ai_check.click(
|
| 189 |
+
fn=call_ai_processor,
|
| 190 |
+
inputs=[current_idx, full_text_display, gold_summary_hidden],
|
| 191 |
+
outputs=[manual_correction]
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
action_inputs = [user_session, current_idx, manual_correction]
|
| 195 |
+
action_outputs = [doc_id_display, annotator_stats, ai_label_display, full_text_display, ai_generated_text, current_idx, gold_summary_hidden, manual_correction]
|
| 196 |
+
|
| 197 |
+
btn_ok.click(
|
| 198 |
+
fn=lambda user, idx, txt: save_and_next(user, idx, txt, True),
|
| 199 |
+
inputs=action_inputs,
|
| 200 |
+
outputs=action_outputs
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
btn_fix.click(
|
| 204 |
+
fn=lambda user, idx, txt: save_and_next(user, idx, txt, False),
|
| 205 |
+
inputs=action_inputs,
|
| 206 |
+
outputs=action_outputs
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
if __name__ == "__main__":
|
| 210 |
+
demo.launch(share=True)
|
code/interface/t.py
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from gradio_client import Client
|
| 2 |
+
|
| 3 |
+
client = Client("https://23833b5a465382100f.gradio.live/")
|
| 4 |
+
result = client.predict(
|
| 5 |
+
message="Hello!!",
|
| 6 |
+
api_name="/chat_predict"
|
| 7 |
+
)
|
| 8 |
+
print(result)
|
code/interface/translate_gemma.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from openai import OpenAI
|
| 3 |
+
import base64
|
| 4 |
+
import io
|
| 5 |
+
|
| 6 |
+
# Initialize the client pointing to your vLLM server
|
| 7 |
+
client = OpenAI(
|
| 8 |
+
base_url="http://172.16.34.29:8006/v1",
|
| 9 |
+
api_key="vllm-token",
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
def encode_image_to_base64(image):
|
| 13 |
+
"""Converts PIL image to raw base64 string (no data-uri prefix)."""
|
| 14 |
+
if image is None:
|
| 15 |
+
return None
|
| 16 |
+
buffered = io.BytesIO()
|
| 17 |
+
image.save(buffered, format="JPEG")
|
| 18 |
+
return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
| 19 |
+
|
| 20 |
+
def run_translation(source_code, target_code, text_input, image_input):
|
| 21 |
+
# Construct the base dictionary
|
| 22 |
+
# The schema requires all these keys to be present in the mapping
|
| 23 |
+
payload = {
|
| 24 |
+
"source_lang_code": source_code,
|
| 25 |
+
"target_lang_code": target_code,
|
| 26 |
+
"text": None,
|
| 27 |
+
"image": None
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
if image_input is not None:
|
| 31 |
+
payload["type"] = "image"
|
| 32 |
+
payload["image"] = encode_image_to_base64(image_input)
|
| 33 |
+
else:
|
| 34 |
+
if not text_input.strip():
|
| 35 |
+
return "Please provide text or an image."
|
| 36 |
+
payload["type"] = "text"
|
| 37 |
+
payload["text"] = text_input
|
| 38 |
+
|
| 39 |
+
try:
|
| 40 |
+
# Crucial: We pass the payload as the single item in the content list
|
| 41 |
+
response = client.chat.completions.create(
|
| 42 |
+
model="translate_gemma",
|
| 43 |
+
messages=[{
|
| 44 |
+
"role": "user",
|
| 45 |
+
"content": [payload] # vLLM expects exactly [ { ... } ]
|
| 46 |
+
}],
|
| 47 |
+
max_tokens=500
|
| 48 |
+
)
|
| 49 |
+
return response.choices[0].message.content
|
| 50 |
+
except Exception as e:
|
| 51 |
+
return f"⚠️ Error: {str(e)}"
|
| 52 |
+
|
| 53 |
+
# --- Gradio UI Layout ---
|
| 54 |
+
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 55 |
+
gr.Markdown("# 🌍 TranslateGemma 27B")
|
| 56 |
+
gr.Markdown("Corrected schema for vLLM inference.")
|
| 57 |
+
|
| 58 |
+
with gr.Row():
|
| 59 |
+
src_code = gr.Textbox(label="Source Language Code", value="en")
|
| 60 |
+
tgt_code = gr.Textbox(label="Target Language Code", value="bn")
|
| 61 |
+
|
| 62 |
+
with gr.Row():
|
| 63 |
+
with gr.Column():
|
| 64 |
+
text_box = gr.Textbox(label="Text Input", placeholder="Type English here...", lines=5)
|
| 65 |
+
image_box = gr.Image(label="Image Input", type="pil")
|
| 66 |
+
submit_btn = gr.Button("Translate", variant="primary")
|
| 67 |
+
|
| 68 |
+
with gr.Column():
|
| 69 |
+
output_box = gr.Textbox(label="Bangla Translation", interactive=False, lines=10)
|
| 70 |
+
|
| 71 |
+
submit_btn.click(
|
| 72 |
+
fn=run_translation,
|
| 73 |
+
inputs=[src_code, tgt_code, text_box, image_box],
|
| 74 |
+
outputs=output_box
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
if __name__ == "__main__":
|
| 78 |
+
demo.launch(share=True)
|
code/interface/translation_quality.py
ADDED
|
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def sanitize_username(username: str) -> str:
|
| 8 |
+
"""Make username safe for filesystem paths."""
|
| 9 |
+
if not username:
|
| 10 |
+
return ""
|
| 11 |
+
username = username.strip()
|
| 12 |
+
safe = "".join(ch for ch in username if ch.isalnum() or ch in ("_", "-"))
|
| 13 |
+
return safe
|
| 14 |
+
|
| 15 |
+
def get_user_session_file(username):
|
| 16 |
+
safe = sanitize_username(username)
|
| 17 |
+
return os.path.join(SAVE_DIR, f"ratings_{safe}.json")
|
| 18 |
+
|
| 19 |
+
language="Bengali"
|
| 20 |
+
if language=="Chinese":
|
| 21 |
+
language_code="ch"
|
| 22 |
+
elif language=="Hindi":
|
| 23 |
+
language_code="hi"
|
| 24 |
+
elif language=="Bengali":
|
| 25 |
+
language_code="be"
|
| 26 |
+
else:
|
| 27 |
+
assert False, "Unsupported language"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# Load translation dataset
|
| 31 |
+
TRANSLATION_PATH = f"/home/mshahidul/readctrl/data/translated_data/translation_english2bangla_v1.json"
|
| 32 |
+
with open(TRANSLATION_PATH, "r", encoding="utf-8") as f:
|
| 33 |
+
translation_dataset = json.load(f)[:50]
|
| 34 |
+
|
| 35 |
+
# Load source dataset for English fulltext
|
| 36 |
+
SRC_PATH = f"/home/mshahidul/readctrl/data/testing_data_gs/multiclinsum_gs_train_en.json"
|
| 37 |
+
with open(SRC_PATH, "r", encoding="utf-8") as f:
|
| 38 |
+
src_dataset = json.load(f)[:50]
|
| 39 |
+
|
| 40 |
+
# Merge datasets by index (assume same order)
|
| 41 |
+
dataset = [
|
| 42 |
+
{
|
| 43 |
+
"src_fulltext": src_dataset[i]["fulltext"],
|
| 44 |
+
"translated_fulltext": translation_dataset[i]["fulltext_translated"]["translated_medical_note"],
|
| 45 |
+
"id": translation_dataset[i]["id"]
|
| 46 |
+
}
|
| 47 |
+
for i in range(min(len(src_dataset), len(translation_dataset)))
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
# 2. Configuration for saving
|
| 51 |
+
SAVE_DIR = f"/home/mshahidul/readctrl/data/translated_data/rating_info/{language_code}"
|
| 52 |
+
os.makedirs(SAVE_DIR, exist_ok=True)
|
| 53 |
+
|
| 54 |
+
SESSION_FILE = None # Will be set per user
|
| 55 |
+
|
| 56 |
+
RATING_OPTIONS = [
|
| 57 |
+
("1 - Poor (Incorrect/Nonsense)", 1),
|
| 58 |
+
("2 - Fair (Understandable but awkward)", 2),
|
| 59 |
+
("3 - Good (Accurate/Perfect)", 3)
|
| 60 |
+
]
|
| 61 |
+
|
| 62 |
+
custom_css = """
|
| 63 |
+
.small-header { font-size: 0.85rem !important; font-weight: 600; margin-bottom: -10px; color: #555; }
|
| 64 |
+
.nav-row { background-color: #f9f9f9; padding: 10px; border-radius: 8px; margin-bottom: 15px; }
|
| 65 |
+
"""
|
| 66 |
+
|
| 67 |
+
def save_rating_to_json(data_item, username):
|
| 68 |
+
session_file = get_user_session_file(username)
|
| 69 |
+
output_data = []
|
| 70 |
+
if os.path.exists(session_file):
|
| 71 |
+
with open(session_file, "r", encoding="utf-8") as f:
|
| 72 |
+
try:
|
| 73 |
+
output_data = json.load(f)
|
| 74 |
+
except json.JSONDecodeError:
|
| 75 |
+
output_data = []
|
| 76 |
+
|
| 77 |
+
# Backward/forward compatibility: support either list[record] or dict with "records".
|
| 78 |
+
if isinstance(output_data, dict):
|
| 79 |
+
records = output_data.get("records", [])
|
| 80 |
+
else:
|
| 81 |
+
records = output_data if isinstance(output_data, list) else []
|
| 82 |
+
|
| 83 |
+
# Keep a single record per index (update if it already exists).
|
| 84 |
+
new_index = data_item.get("index")
|
| 85 |
+
updated = False
|
| 86 |
+
for i, rec in enumerate(records):
|
| 87 |
+
if isinstance(rec, dict) and rec.get("index") == new_index:
|
| 88 |
+
records[i] = data_item
|
| 89 |
+
updated = True
|
| 90 |
+
break
|
| 91 |
+
if not updated:
|
| 92 |
+
records.append(data_item)
|
| 93 |
+
|
| 94 |
+
payload = {
|
| 95 |
+
"username": sanitize_username(username) or username,
|
| 96 |
+
"updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 97 |
+
"records": records,
|
| 98 |
+
}
|
| 99 |
+
with open(session_file, "w", encoding="utf-8") as f:
|
| 100 |
+
json.dump(payload, f, ensure_ascii=False, indent=4)
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
def load_user_records(username):
|
| 104 |
+
session_file = get_user_session_file(username)
|
| 105 |
+
if not os.path.exists(session_file):
|
| 106 |
+
return []
|
| 107 |
+
try:
|
| 108 |
+
with open(session_file, "r", encoding="utf-8") as f:
|
| 109 |
+
data = json.load(f)
|
| 110 |
+
if isinstance(data, dict):
|
| 111 |
+
records = data.get("records", [])
|
| 112 |
+
else:
|
| 113 |
+
records = data
|
| 114 |
+
return records if isinstance(records, list) else []
|
| 115 |
+
except Exception:
|
| 116 |
+
return []
|
| 117 |
+
|
| 118 |
+
def load_example(index):
|
| 119 |
+
total = len(dataset)
|
| 120 |
+
index = max(0, min(index, total - 1))
|
| 121 |
+
item = dataset[index]
|
| 122 |
+
progress_pct = (index / total) * 100
|
| 123 |
+
progress_text = f"Sample {index + 1} of {total} ({progress_pct:.1f}%)"
|
| 124 |
+
src_fulltext = item["src_fulltext"]
|
| 125 |
+
translated_fulltext = item["translated_fulltext"]
|
| 126 |
+
return (
|
| 127 |
+
src_fulltext, # src_display
|
| 128 |
+
translated_fulltext, # eng_display
|
| 129 |
+
None, # rating_dropdown (clears selection)
|
| 130 |
+
index, # current_index
|
| 131 |
+
progress_text, # progress_display
|
| 132 |
+
progress_pct, # progress_bar
|
| 133 |
+
index + 1 # jump_input
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
def get_last_index_for_user(username):
|
| 137 |
+
if not username:
|
| 138 |
+
return 0
|
| 139 |
+
records = load_user_records(username)
|
| 140 |
+
done_indices = set()
|
| 141 |
+
for rec in records:
|
| 142 |
+
if isinstance(rec, dict) and isinstance(rec.get("index"), int):
|
| 143 |
+
done_indices.add(rec["index"])
|
| 144 |
+
|
| 145 |
+
# Resume means: first unannotated sample in order.
|
| 146 |
+
for i in range(len(dataset)):
|
| 147 |
+
if i not in done_indices:
|
| 148 |
+
return i
|
| 149 |
+
# Completed.
|
| 150 |
+
return len(dataset)
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def load_example_or_done(index):
|
| 154 |
+
if index >= len(dataset):
|
| 155 |
+
total = len(dataset)
|
| 156 |
+
progress_text = f"✅ Completed all {total} samples"
|
| 157 |
+
return (
|
| 158 |
+
"✅ ALL DONE",
|
| 159 |
+
"✅ ALL DONE",
|
| 160 |
+
None,
|
| 161 |
+
total,
|
| 162 |
+
progress_text,
|
| 163 |
+
100,
|
| 164 |
+
total,
|
| 165 |
+
)
|
| 166 |
+
return load_example(index)
|
| 167 |
+
|
| 168 |
+
def next_item(index, rating, src_txt, eng_txt, username):
|
| 169 |
+
if rating is None:
|
| 170 |
+
raise gr.Error("Please select a rating before proceeding!")
|
| 171 |
+
if not username:
|
| 172 |
+
raise gr.Error("Please enter your username!")
|
| 173 |
+
safe_user = sanitize_username(username)
|
| 174 |
+
if not safe_user:
|
| 175 |
+
raise gr.Error("Username must contain letters/numbers (optionally _ or -).")
|
| 176 |
+
record = {
|
| 177 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 178 |
+
"index": index,
|
| 179 |
+
"src_text": src_txt,
|
| 180 |
+
"translated_text": eng_txt,
|
| 181 |
+
"rating": rating,
|
| 182 |
+
"username": safe_user
|
| 183 |
+
}
|
| 184 |
+
save_rating_to_json(record, safe_user)
|
| 185 |
+
gr.Info(f"Saved record {index + 1} for {safe_user}.")
|
| 186 |
+
|
| 187 |
+
# After saving, resume at first unannotated index.
|
| 188 |
+
next_idx = get_last_index_for_user(safe_user)
|
| 189 |
+
return load_example_or_done(next_idx)
|
| 190 |
+
|
| 191 |
+
def jump_to_instance(target_index):
|
| 192 |
+
return load_example_or_done(target_index - 1)
|
| 193 |
+
|
| 194 |
+
with gr.Blocks(css=custom_css) as demo:
|
| 195 |
+
username_box = gr.Textbox(label="Enter your username", value="", interactive=True)
|
| 196 |
+
login_btn = gr.Button("Start/Resume Session", variant="primary")
|
| 197 |
+
current_index = gr.State(0)
|
| 198 |
+
total_count = len(dataset)
|
| 199 |
+
gr.Markdown(f"### Translation Quality Annotation")
|
| 200 |
+
with gr.Row(elem_classes="nav-row"):
|
| 201 |
+
with gr.Column(scale=2):
|
| 202 |
+
progress_bar = gr.Slider(label="Progress", minimum=0, maximum=100, value=0, interactive=False)
|
| 203 |
+
progress_display = gr.Markdown(f"Sample 1 of {total_count} (0.0%)")
|
| 204 |
+
with gr.Column(scale=1):
|
| 205 |
+
jump_input = gr.Number(label="Jump to Sample #", value=1, precision=0)
|
| 206 |
+
jump_btn = gr.Button("Go", size="sm")
|
| 207 |
+
with gr.Row():
|
| 208 |
+
with gr.Column():
|
| 209 |
+
gr.Markdown("##### Source Fulltext (English)")
|
| 210 |
+
src_display = gr.Textbox(value=dataset[0]["src_fulltext"], interactive=False, lines=12, show_label=False)
|
| 211 |
+
with gr.Column():
|
| 212 |
+
gr.Markdown("##### Fulltext Translation (Bangla)")
|
| 213 |
+
eng_display = gr.Textbox(value=dataset[0]["translated_fulltext"], interactive=False, lines=12, show_label=False)
|
| 214 |
+
rating_dropdown = gr.Dropdown(choices=RATING_OPTIONS, label="Select Rating")
|
| 215 |
+
with gr.Row():
|
| 216 |
+
prev_btn = gr.Button("⬅ Previous (Review)", variant="secondary")
|
| 217 |
+
submit_btn = gr.Button("Save & Next ➡", variant="primary")
|
| 218 |
+
|
| 219 |
+
def login_user(username):
|
| 220 |
+
safe_user = sanitize_username(username)
|
| 221 |
+
if not safe_user:
|
| 222 |
+
raise gr.Error("Please enter a valid username (letters/numbers, _ or -).")
|
| 223 |
+
idx = get_last_index_for_user(safe_user)
|
| 224 |
+
return load_example_or_done(idx)
|
| 225 |
+
|
| 226 |
+
login_btn.click(
|
| 227 |
+
fn=login_user,
|
| 228 |
+
inputs=[username_box],
|
| 229 |
+
outputs=[src_display, eng_display, rating_dropdown, current_index, progress_display, progress_bar, jump_input]
|
| 230 |
+
)
|
| 231 |
+
|
| 232 |
+
submit_btn.click(
|
| 233 |
+
fn=next_item,
|
| 234 |
+
inputs=[current_index, rating_dropdown, src_display, eng_display, username_box],
|
| 235 |
+
outputs=[src_display, eng_display, rating_dropdown, current_index, progress_display, progress_bar, jump_input]
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
# 2. Update Prev Button: removed tr_display from outputs
|
| 239 |
+
prev_btn.click(
|
| 240 |
+
fn=lambda idx: load_example_or_done(idx - 1),
|
| 241 |
+
inputs=[current_index],
|
| 242 |
+
outputs=[src_display, eng_display, rating_dropdown, current_index, progress_display, progress_bar, jump_input]
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
# 3. Update Jump Button: removed tr_display from outputs
|
| 246 |
+
jump_btn.click(
|
| 247 |
+
fn=jump_to_instance,
|
| 248 |
+
inputs=[jump_input],
|
| 249 |
+
outputs=[src_display, eng_display, rating_dropdown, current_index, progress_display, progress_bar, jump_input]
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
if __name__ == "__main__":
|
| 253 |
+
demo.launch(share=True)
|
code/interface/translation_quality_v2.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
from datetime import datetime
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def sanitize_username(username: str) -> str:
|
| 8 |
+
"""Make username safe for filesystem paths."""
|
| 9 |
+
if not username:
|
| 10 |
+
return ""
|
| 11 |
+
username = username.strip()
|
| 12 |
+
safe = "".join(ch for ch in username if ch.isalnum() or ch in ("_", "-"))
|
| 13 |
+
return safe
|
| 14 |
+
|
| 15 |
+
def get_user_session_file(username):
|
| 16 |
+
safe = sanitize_username(username)
|
| 17 |
+
return os.path.join(SAVE_DIR, f"ratings_{safe}.json")
|
| 18 |
+
|
| 19 |
+
language="Bengali"
|
| 20 |
+
if language=="Chinese":
|
| 21 |
+
language_code="ch"
|
| 22 |
+
elif language=="Hindi":
|
| 23 |
+
language_code="hi"
|
| 24 |
+
elif language=="Bengali":
|
| 25 |
+
language_code="be"
|
| 26 |
+
else:
|
| 27 |
+
assert False, "Unsupported language"
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# Load translation dataset (EN -> BN fulltext/summary)
|
| 31 |
+
TRANSLATION_PATH = (
|
| 32 |
+
"/home/mshahidul/readctrl/data/translated_data/translation_wo_judge/"
|
| 33 |
+
"multiclinsum_gs_train_en2bn_gemma(0_200).json"
|
| 34 |
+
)
|
| 35 |
+
with open(TRANSLATION_PATH, "r", encoding="utf-8") as f:
|
| 36 |
+
translation_dataset = json.load(f)
|
| 37 |
+
|
| 38 |
+
dataset = [
|
| 39 |
+
{
|
| 40 |
+
"src_fulltext": item.get("fulltext", ""),
|
| 41 |
+
"translated_fulltext": item.get("translated_fulltext", ""),
|
| 42 |
+
"id": item.get("id"),
|
| 43 |
+
}
|
| 44 |
+
for item in translation_dataset
|
| 45 |
+
][:50]
|
| 46 |
+
|
| 47 |
+
# 2. Configuration for saving
|
| 48 |
+
SAVE_DIR = f"/home/mshahidul/readctrl/data/translated_data/rating_info_v2/{language_code}"
|
| 49 |
+
os.makedirs(SAVE_DIR, exist_ok=True)
|
| 50 |
+
|
| 51 |
+
SESSION_FILE = None # Will be set per user
|
| 52 |
+
|
| 53 |
+
RATING_OPTIONS = [
|
| 54 |
+
("1 - Poor (Incorrect/Nonsense)", 1),
|
| 55 |
+
("2 - Fair (Understandable but awkward)", 2),
|
| 56 |
+
("3 - Good (Accurate/Perfect)", 3)
|
| 57 |
+
]
|
| 58 |
+
|
| 59 |
+
custom_css = """
|
| 60 |
+
.small-header { font-size: 0.85rem !important; font-weight: 600; margin-bottom: -10px; color: #555; }
|
| 61 |
+
.nav-row { background-color: #f9f9f9; padding: 10px; border-radius: 8px; margin-bottom: 15px; }
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
def save_rating_to_json(data_item, username):
|
| 65 |
+
session_file = get_user_session_file(username)
|
| 66 |
+
output_data = []
|
| 67 |
+
if os.path.exists(session_file):
|
| 68 |
+
with open(session_file, "r", encoding="utf-8") as f:
|
| 69 |
+
try:
|
| 70 |
+
output_data = json.load(f)
|
| 71 |
+
except json.JSONDecodeError:
|
| 72 |
+
output_data = []
|
| 73 |
+
|
| 74 |
+
# Backward/forward compatibility: support either list[record] or dict with "records".
|
| 75 |
+
if isinstance(output_data, dict):
|
| 76 |
+
records = output_data.get("records", [])
|
| 77 |
+
else:
|
| 78 |
+
records = output_data if isinstance(output_data, list) else []
|
| 79 |
+
|
| 80 |
+
# Keep a single record per index (update if it already exists).
|
| 81 |
+
new_index = data_item.get("index")
|
| 82 |
+
updated = False
|
| 83 |
+
for i, rec in enumerate(records):
|
| 84 |
+
if isinstance(rec, dict) and rec.get("index") == new_index:
|
| 85 |
+
records[i] = data_item
|
| 86 |
+
updated = True
|
| 87 |
+
break
|
| 88 |
+
if not updated:
|
| 89 |
+
records.append(data_item)
|
| 90 |
+
|
| 91 |
+
payload = {
|
| 92 |
+
"username": sanitize_username(username) or username,
|
| 93 |
+
"updated_at": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 94 |
+
"records": records,
|
| 95 |
+
}
|
| 96 |
+
with open(session_file, "w", encoding="utf-8") as f:
|
| 97 |
+
json.dump(payload, f, ensure_ascii=False, indent=4)
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def load_user_records(username):
|
| 101 |
+
session_file = get_user_session_file(username)
|
| 102 |
+
if not os.path.exists(session_file):
|
| 103 |
+
return []
|
| 104 |
+
try:
|
| 105 |
+
with open(session_file, "r", encoding="utf-8") as f:
|
| 106 |
+
data = json.load(f)
|
| 107 |
+
if isinstance(data, dict):
|
| 108 |
+
records = data.get("records", [])
|
| 109 |
+
else:
|
| 110 |
+
records = data
|
| 111 |
+
return records if isinstance(records, list) else []
|
| 112 |
+
except Exception:
|
| 113 |
+
return []
|
| 114 |
+
|
| 115 |
+
def load_example(index):
|
| 116 |
+
total = len(dataset)
|
| 117 |
+
index = max(0, min(index, total - 1))
|
| 118 |
+
item = dataset[index]
|
| 119 |
+
progress_pct = (index / total) * 100
|
| 120 |
+
progress_text = f"Sample {index + 1} of {total} ({progress_pct:.1f}%)"
|
| 121 |
+
src_fulltext = item["src_fulltext"]
|
| 122 |
+
translated_fulltext = item["translated_fulltext"]
|
| 123 |
+
return (
|
| 124 |
+
src_fulltext, # src_display
|
| 125 |
+
translated_fulltext, # eng_display
|
| 126 |
+
None, # rating_dropdown (clears selection)
|
| 127 |
+
index, # current_index
|
| 128 |
+
progress_text, # progress_display
|
| 129 |
+
progress_pct, # progress_bar
|
| 130 |
+
index + 1 # jump_input
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
def get_last_index_for_user(username):
|
| 134 |
+
if not username:
|
| 135 |
+
return 0
|
| 136 |
+
records = load_user_records(username)
|
| 137 |
+
done_indices = set()
|
| 138 |
+
for rec in records:
|
| 139 |
+
if isinstance(rec, dict) and isinstance(rec.get("index"), int):
|
| 140 |
+
done_indices.add(rec["index"])
|
| 141 |
+
|
| 142 |
+
# Resume means: first unannotated sample in order.
|
| 143 |
+
for i in range(len(dataset)):
|
| 144 |
+
if i not in done_indices:
|
| 145 |
+
return i
|
| 146 |
+
# Completed.
|
| 147 |
+
return len(dataset)
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def load_example_or_done(index):
|
| 151 |
+
if index >= len(dataset):
|
| 152 |
+
total = len(dataset)
|
| 153 |
+
progress_text = f"✅ Completed all {total} samples"
|
| 154 |
+
return (
|
| 155 |
+
"✅ ALL DONE",
|
| 156 |
+
"✅ ALL DONE",
|
| 157 |
+
None,
|
| 158 |
+
total,
|
| 159 |
+
progress_text,
|
| 160 |
+
100,
|
| 161 |
+
total,
|
| 162 |
+
)
|
| 163 |
+
return load_example(index)
|
| 164 |
+
|
| 165 |
+
def next_item(index, rating, src_txt, eng_txt, username):
|
| 166 |
+
if rating is None:
|
| 167 |
+
raise gr.Error("Please select a rating before proceeding!")
|
| 168 |
+
if not username:
|
| 169 |
+
raise gr.Error("Please enter your username!")
|
| 170 |
+
safe_user = sanitize_username(username)
|
| 171 |
+
if not safe_user:
|
| 172 |
+
raise gr.Error("Username must contain letters/numbers (optionally _ or -).")
|
| 173 |
+
record = {
|
| 174 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 175 |
+
"index": index,
|
| 176 |
+
"src_text": src_txt,
|
| 177 |
+
"translated_text": eng_txt,
|
| 178 |
+
"rating": rating,
|
| 179 |
+
"username": safe_user
|
| 180 |
+
}
|
| 181 |
+
save_rating_to_json(record, safe_user)
|
| 182 |
+
gr.Info(f"Saved record {index + 1} for {safe_user}.")
|
| 183 |
+
|
| 184 |
+
# After saving, resume at first unannotated index.
|
| 185 |
+
next_idx = get_last_index_for_user(safe_user)
|
| 186 |
+
return load_example_or_done(next_idx)
|
| 187 |
+
|
| 188 |
+
def jump_to_instance(target_index):
|
| 189 |
+
return load_example_or_done(target_index - 1)
|
| 190 |
+
|
| 191 |
+
with gr.Blocks(css=custom_css) as demo:
|
| 192 |
+
username_box = gr.Textbox(label="Enter your username", value="", interactive=True)
|
| 193 |
+
login_btn = gr.Button("Start/Resume Session", variant="primary")
|
| 194 |
+
current_index = gr.State(0)
|
| 195 |
+
total_count = len(dataset)
|
| 196 |
+
gr.Markdown("## Translation Quality Annotation")
|
| 197 |
+
gr.Markdown("Data generated by TranslateGemma.")
|
| 198 |
+
with gr.Row(elem_classes="nav-row"):
|
| 199 |
+
with gr.Column(scale=2):
|
| 200 |
+
progress_bar = gr.Slider(label="Progress", minimum=0, maximum=100, value=0, interactive=False)
|
| 201 |
+
progress_display = gr.Markdown(f"Sample 1 of {total_count} (0.0%)")
|
| 202 |
+
with gr.Column(scale=1):
|
| 203 |
+
jump_input = gr.Number(label="Jump to Sample #", value=1, precision=0)
|
| 204 |
+
jump_btn = gr.Button("Go", size="sm")
|
| 205 |
+
with gr.Row():
|
| 206 |
+
with gr.Column():
|
| 207 |
+
gr.Markdown("##### Source Fulltext (English)")
|
| 208 |
+
src_display = gr.Textbox(value=dataset[0]["src_fulltext"], interactive=False, lines=12, show_label=False)
|
| 209 |
+
with gr.Column():
|
| 210 |
+
gr.Markdown("##### Fulltext Translation (Bangla)")
|
| 211 |
+
eng_display = gr.Textbox(value=dataset[0]["translated_fulltext"], interactive=False, lines=12, show_label=False)
|
| 212 |
+
rating_dropdown = gr.Dropdown(choices=RATING_OPTIONS, label="Select Rating")
|
| 213 |
+
with gr.Row():
|
| 214 |
+
prev_btn = gr.Button("⬅ Previous (Review)", variant="secondary")
|
| 215 |
+
submit_btn = gr.Button("Save & Next ➡", variant="primary")
|
| 216 |
+
|
| 217 |
+
def login_user(username):
|
| 218 |
+
safe_user = sanitize_username(username)
|
| 219 |
+
if not safe_user:
|
| 220 |
+
raise gr.Error("Please enter a valid username (letters/numbers, _ or -).")
|
| 221 |
+
idx = get_last_index_for_user(safe_user)
|
| 222 |
+
return load_example_or_done(idx)
|
| 223 |
+
|
| 224 |
+
login_btn.click(
|
| 225 |
+
fn=login_user,
|
| 226 |
+
inputs=[username_box],
|
| 227 |
+
outputs=[src_display, eng_display, rating_dropdown, current_index, progress_display, progress_bar, jump_input]
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
submit_btn.click(
|
| 231 |
+
fn=next_item,
|
| 232 |
+
inputs=[current_index, rating_dropdown, src_display, eng_display, username_box],
|
| 233 |
+
outputs=[src_display, eng_display, rating_dropdown, current_index, progress_display, progress_bar, jump_input]
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
# 2. Update Prev Button: removed tr_display from outputs
|
| 237 |
+
prev_btn.click(
|
| 238 |
+
fn=lambda idx: load_example_or_done(idx - 1),
|
| 239 |
+
inputs=[current_index],
|
| 240 |
+
outputs=[src_display, eng_display, rating_dropdown, current_index, progress_display, progress_bar, jump_input]
|
| 241 |
+
)
|
| 242 |
+
|
| 243 |
+
# 3. Update Jump Button: removed tr_display from outputs
|
| 244 |
+
jump_btn.click(
|
| 245 |
+
fn=jump_to_instance,
|
| 246 |
+
inputs=[jump_input],
|
| 247 |
+
outputs=[src_display, eng_display, rating_dropdown, current_index, progress_display, progress_bar, jump_input]
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
if __name__ == "__main__":
|
| 251 |
+
demo.launch(share=True)
|
code/interface/vllm_app.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from openai import OpenAI
|
| 3 |
+
|
| 4 |
+
# Initialize the client
|
| 5 |
+
client = OpenAI(
|
| 6 |
+
base_url="http://localhost:8004/v1",
|
| 7 |
+
api_key="token-not-needed",
|
| 8 |
+
)
|
| 9 |
+
|
| 10 |
+
def predict(message, history):
|
| 11 |
+
history_openai_format = []
|
| 12 |
+
|
| 13 |
+
# Manually build the history to ensure it's clean
|
| 14 |
+
for pair in history:
|
| 15 |
+
# pair[0] is User, pair[1] is Assistant
|
| 16 |
+
if len(pair) >= 2:
|
| 17 |
+
history_openai_format.append({"role": "user", "content": str(pair[0])})
|
| 18 |
+
history_openai_format.append({"role": "assistant", "content": str(pair[1])})
|
| 19 |
+
|
| 20 |
+
# Add the current message
|
| 21 |
+
history_openai_format.append({"role": "user", "content": message})
|
| 22 |
+
|
| 23 |
+
# Create the completion request
|
| 24 |
+
response = client.chat.completions.create(
|
| 25 |
+
model="Qwen/Qwen3-30B-A3B-Instruct-2507",
|
| 26 |
+
messages=history_openai_format,
|
| 27 |
+
temperature=0.7,
|
| 28 |
+
stream=True
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
partial_message = ""
|
| 32 |
+
for chunk in response:
|
| 33 |
+
if chunk.choices[0].delta.content is not None:
|
| 34 |
+
partial_message += chunk.choices[0].delta.content
|
| 35 |
+
yield partial_message
|
| 36 |
+
|
| 37 |
+
# Launch the Gradio ChatInterface without the 'type' argument
|
| 38 |
+
demo = gr.ChatInterface(
|
| 39 |
+
fn=predict,
|
| 40 |
+
title="Qwen3 vLLM Chat",
|
| 41 |
+
description="Interface for Qwen/Qwen3-30B-A3B-Instruct-2507 running on vLLM",
|
| 42 |
+
examples=["What is the capital of France?", "Write a Python function for quicksort."]
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
if __name__ == "__main__":
|
| 46 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
|
code/interface/vllm_app_v2.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# SPDX-License-Identifier: Apache-2.0
|
| 2 |
+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
| 3 |
+
"""Example for starting a Gradio OpenAI Chatbot Webserver
|
| 4 |
+
Start vLLM API server:
|
| 5 |
+
vllm serve meta-llama/Llama-2-7b-chat-hf
|
| 6 |
+
|
| 7 |
+
Start Gradio OpenAI Chatbot Webserver:
|
| 8 |
+
python /home/mshahidul/readctrl/code/interface/vllm_app_v2.py \
|
| 9 |
+
-m Qwen/Qwen3-30B-A3B-Instruct-2507 --model-url http://172.16.34.29:8004/v1
|
| 10 |
+
|
| 11 |
+
Note that `pip install --upgrade gradio` is needed to run this example.
|
| 12 |
+
More details: https://github.com/gradio-app/gradio
|
| 13 |
+
|
| 14 |
+
If your antivirus software blocks the download of frpc for gradio,
|
| 15 |
+
you can install it manually by following these steps:
|
| 16 |
+
|
| 17 |
+
1. Download this file: https://cdn-media.huggingface.co/frpc-gradio-0.3/frpc_linux_amd64
|
| 18 |
+
2. Rename the downloaded file to: frpc_linux_amd64_v0.3
|
| 19 |
+
3. Move the file to this location: /home/user/.cache/huggingface/gradio/frpc
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
import argparse
|
| 23 |
+
|
| 24 |
+
import gradio as gr
|
| 25 |
+
from openai import OpenAI
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def predict(message, history, client, model_name, temp, stop_token_ids):
|
| 29 |
+
messages = [
|
| 30 |
+
{"role": "system", "content": "You are a great AI assistant."},
|
| 31 |
+
*history,
|
| 32 |
+
{"role": "user", "content": message},
|
| 33 |
+
]
|
| 34 |
+
|
| 35 |
+
# Send request to OpenAI API (vLLM server)
|
| 36 |
+
stream = client.chat.completions.create(
|
| 37 |
+
model=model_name,
|
| 38 |
+
messages=messages,
|
| 39 |
+
temperature=temp,
|
| 40 |
+
stream=True,
|
| 41 |
+
extra_body={
|
| 42 |
+
"repetition_penalty": 1,
|
| 43 |
+
"stop_token_ids": [int(id.strip()) for id in stop_token_ids.split(",")]
|
| 44 |
+
if stop_token_ids
|
| 45 |
+
else [],
|
| 46 |
+
},
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
# Collect all chunks and concatenate them into a full message
|
| 50 |
+
full_message = ""
|
| 51 |
+
for chunk in stream:
|
| 52 |
+
full_message += chunk.choices[0].delta.content or ""
|
| 53 |
+
|
| 54 |
+
# Return the full message as a single response
|
| 55 |
+
return full_message
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def parse_args():
|
| 59 |
+
parser = argparse.ArgumentParser(
|
| 60 |
+
description="Chatbot Interface with Customizable Parameters"
|
| 61 |
+
)
|
| 62 |
+
parser.add_argument(
|
| 63 |
+
"--model-url", type=str, default="http://localhost:8000/v1", help="Model URL"
|
| 64 |
+
)
|
| 65 |
+
parser.add_argument(
|
| 66 |
+
"-m", "--model", type=str, required=True, help="Model name for the chatbot"
|
| 67 |
+
)
|
| 68 |
+
parser.add_argument(
|
| 69 |
+
"--temp", type=float, default=0.8, help="Temperature for text generation"
|
| 70 |
+
)
|
| 71 |
+
parser.add_argument(
|
| 72 |
+
"--stop-token-ids", type=str, default="", help="Comma-separated stop token IDs"
|
| 73 |
+
)
|
| 74 |
+
parser.add_argument("--host", type=str, default=None)
|
| 75 |
+
parser.add_argument("--port", type=int, default=8001)
|
| 76 |
+
return parser.parse_args()
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def build_gradio_interface(client, model_name, temp, stop_token_ids):
|
| 80 |
+
def chat_predict(message, history):
|
| 81 |
+
return predict(message, history, client, model_name, temp, stop_token_ids)
|
| 82 |
+
|
| 83 |
+
return gr.ChatInterface(
|
| 84 |
+
fn=chat_predict,
|
| 85 |
+
title="Chatbot Interface",
|
| 86 |
+
description="A simple chatbot powered by vLLM",
|
| 87 |
+
fill_height=True,
|
| 88 |
+
)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def main():
|
| 92 |
+
# Parse the arguments
|
| 93 |
+
args = parse_args()
|
| 94 |
+
|
| 95 |
+
# Set OpenAI's API key and API base to use vLLM's API server
|
| 96 |
+
openai_api_key = "EMPTY"
|
| 97 |
+
openai_api_base = args.model_url
|
| 98 |
+
|
| 99 |
+
# Create an OpenAI client
|
| 100 |
+
client = OpenAI(api_key=openai_api_key, base_url=openai_api_base)
|
| 101 |
+
|
| 102 |
+
# Define the Gradio chatbot interface using the predict function
|
| 103 |
+
gradio_interface = build_gradio_interface(
|
| 104 |
+
client, args.model, args.temp, args.stop_token_ids
|
| 105 |
+
)
|
| 106 |
+
|
| 107 |
+
gradio_interface.queue().launch(
|
| 108 |
+
server_name=args.host, server_port=args.port, share=True
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
if __name__ == "__main__":
|
| 113 |
+
main()
|
| 114 |
+
|
| 115 |
+
# python /home/mshahidul/readctrl/code/interface/vllm_app_v2.py --model Qwen/Qwen3-30B-A3B-Instruct-2507 --model-url http://localhost:8004/v1
|
code/key_subclaims_extract.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from openai import OpenAI
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import tqdm
|
| 5 |
+
|
| 6 |
+
# --- 1. Load Paths and Data ---
|
| 7 |
+
data_path = '/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_syn_data_with_gs_summary_en.json'
|
| 8 |
+
prompt_path = "/home/mshahidul/readctrl/prompts/minimum_info_extract _v2"
|
| 9 |
+
api_file = "/home/mshahidul/api_new.json"
|
| 10 |
+
save_path = "/home/mshahidul/readctrl/data/key_subclaims_testing/key_subclaims.json"
|
| 11 |
+
|
| 12 |
+
# Load the dataset
|
| 13 |
+
with open(data_path, 'r') as f:
|
| 14 |
+
dataset = json.load(f)
|
| 15 |
+
|
| 16 |
+
# Load the prompt template
|
| 17 |
+
with open(prompt_path, "r") as f:
|
| 18 |
+
prompt_template = f.read()
|
| 19 |
+
|
| 20 |
+
# Load API Key
|
| 21 |
+
with open(api_file, "r") as f:
|
| 22 |
+
api_keys = json.load(f)
|
| 23 |
+
openai_api_key = api_keys["openai"]
|
| 24 |
+
|
| 25 |
+
client = OpenAI(api_key=openai_api_key)
|
| 26 |
+
|
| 27 |
+
# --- 2. Helper Functions ---
|
| 28 |
+
def openai_return(prompt, model="gpt-5"):
|
| 29 |
+
"""Send a prompt to GPT and parse strictly formatted JSON."""
|
| 30 |
+
try:
|
| 31 |
+
response = client.chat.completions.create(
|
| 32 |
+
model=model,
|
| 33 |
+
messages=[
|
| 34 |
+
{"role": "system", "content": "You are a helpful assistant that outputs strictly in JSON format."},
|
| 35 |
+
{"role": "user", "content": prompt}
|
| 36 |
+
],
|
| 37 |
+
response_format={"type": "json_object"}
|
| 38 |
+
)
|
| 39 |
+
content = response.choices[0].message.content.strip()
|
| 40 |
+
return json.loads(content)
|
| 41 |
+
except Exception as e:
|
| 42 |
+
print(f"⚠️ Error processing API response: {e}")
|
| 43 |
+
return {"error": str(e), "raw_content": content if 'content' in locals() else None}
|
| 44 |
+
|
| 45 |
+
def format_subclaims(subclaim_list, prefix):
|
| 46 |
+
"""Formats subclaims with IDs (e.g., ST-1, GS-1) for better LLM tracking."""
|
| 47 |
+
if not isinstance(subclaim_list, list):
|
| 48 |
+
return str(subclaim_list)
|
| 49 |
+
return "\n".join([f"{prefix}-{i+1}: {text}" for i, text in enumerate(subclaim_list)])
|
| 50 |
+
|
| 51 |
+
# --- 3. Main Processing Loop ---
|
| 52 |
+
res = []
|
| 53 |
+
if os.path.exists(save_path):
|
| 54 |
+
with open(save_path, "r") as f:
|
| 55 |
+
res = json.load(f)
|
| 56 |
+
|
| 57 |
+
# Start from where we left off
|
| 58 |
+
start_index = len(res)
|
| 59 |
+
num_to_process = 100
|
| 60 |
+
|
| 61 |
+
for i in tqdm.tqdm(range(start_index, min(start_index + num_to_process, len(dataset)))):
|
| 62 |
+
item = dataset[i]
|
| 63 |
+
|
| 64 |
+
# 1. Extract raw data
|
| 65 |
+
source_text = item.get('fulltext', '')
|
| 66 |
+
source_subclaims_list = item.get('fulltext_subclaims', [])
|
| 67 |
+
gold_summary = item.get('summary', '')
|
| 68 |
+
gold_subclaims_list = item.get('summary_subclaims', [])
|
| 69 |
+
|
| 70 |
+
# 2. Format specifically for the prompt (Mapping IDs like ST-1, GS-1)
|
| 71 |
+
# This helps the LLM return the IDs you requested in your Output Format
|
| 72 |
+
source_subclaims_formatted = format_subclaims(source_subclaims_list, "ST")
|
| 73 |
+
gold_subclaims_formatted = format_subclaims(gold_subclaims_list, "GS")
|
| 74 |
+
|
| 75 |
+
# 3. Inject into prompt
|
| 76 |
+
prompt = prompt_template.replace("<<SOURCE_TEXT>>", source_text)\
|
| 77 |
+
.replace("<<SOURCE_TEXT_SUBCLAIMS>>", source_subclaims_formatted)\
|
| 78 |
+
.replace("<<GOLD_SUMMARY>>", gold_summary)\
|
| 79 |
+
.replace("<<GOLD_SUMMARY_SUBCLAIMS>>", gold_subclaims_formatted)
|
| 80 |
+
|
| 81 |
+
# 4. Call API
|
| 82 |
+
api_response = openai_return(prompt)
|
| 83 |
+
|
| 84 |
+
# 5. Build full result object
|
| 85 |
+
result_entry = {
|
| 86 |
+
"index": i,
|
| 87 |
+
"original_id": item.get('id'),
|
| 88 |
+
"input_data": {
|
| 89 |
+
"source_text": source_text,
|
| 90 |
+
"source_subclaims": source_subclaims_list,
|
| 91 |
+
"gold_summary": gold_summary,
|
| 92 |
+
"gold_subclaims": gold_subclaims_list
|
| 93 |
+
},
|
| 94 |
+
"llm_output": api_response
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
res.append(result_entry)
|
| 98 |
+
|
| 99 |
+
# Autosave every 5 samples
|
| 100 |
+
if len(res) % 5 == 0:
|
| 101 |
+
with open(save_path, "w") as f:
|
| 102 |
+
json.dump(res, f, indent=2, ensure_ascii=False)
|
| 103 |
+
|
| 104 |
+
# Final Save
|
| 105 |
+
with open(save_path, "w") as f:
|
| 106 |
+
json.dump(res, f, indent=2, ensure_ascii=False)
|
| 107 |
+
|
| 108 |
+
print(f"\n✅ Finished! Processed {len(res) - start_index} new samples.")
|
| 109 |
+
print(f"Total samples in {save_path}: {len(res)}")
|
code/literacy_thresholds.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import argparse
|
| 3 |
+
import json
|
| 4 |
+
import math
|
| 5 |
+
from statistics import median, quantiles
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
LABEL_ORDER = ["low", "intermediate", "proficient"]
|
| 9 |
+
ORDERED_METRICS = {"source_coverage", "completeness"}
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def normalize_label(key: str) -> str:
|
| 13 |
+
key_l = key.lower()
|
| 14 |
+
for label in LABEL_ORDER:
|
| 15 |
+
if label in key_l:
|
| 16 |
+
return label
|
| 17 |
+
return key_l
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def five_number_summary(values):
|
| 21 |
+
if not values:
|
| 22 |
+
return None
|
| 23 |
+
q1, _, q3 = quantiles(values, n=4, method="inclusive")
|
| 24 |
+
return {
|
| 25 |
+
"min": min(values),
|
| 26 |
+
"q1": q1,
|
| 27 |
+
"median": median(values),
|
| 28 |
+
"q3": q3,
|
| 29 |
+
"max": max(values),
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def remove_outliers_iqr(values):
|
| 34 |
+
if len(values) < 4:
|
| 35 |
+
return values, 0
|
| 36 |
+
q1, _, q3 = quantiles(values, n=4, method="inclusive")
|
| 37 |
+
iqr = q3 - q1
|
| 38 |
+
if math.isclose(iqr, 0.0):
|
| 39 |
+
return values, 0
|
| 40 |
+
lower = q1 - 1.5 * iqr
|
| 41 |
+
upper = q3 + 1.5 * iqr
|
| 42 |
+
filtered = [v for v in values if lower <= v <= upper]
|
| 43 |
+
return filtered, len(values) - len(filtered)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def parse_scores(data, metrics):
|
| 47 |
+
grouped = {label: {m: [] for m in metrics} for label in LABEL_ORDER}
|
| 48 |
+
for item in data:
|
| 49 |
+
levels = item.get("literacy_levels") or {}
|
| 50 |
+
for key, payload in levels.items():
|
| 51 |
+
label = normalize_label(key)
|
| 52 |
+
if label not in grouped:
|
| 53 |
+
continue
|
| 54 |
+
scores = (payload or {}).get("scores") or {}
|
| 55 |
+
for m in metrics:
|
| 56 |
+
if m in scores and scores[m] is not None:
|
| 57 |
+
grouped[label][m].append(scores[m])
|
| 58 |
+
return grouped
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def suggest_thresholds(per_label_summaries, label_order):
|
| 62 |
+
thresholds = {}
|
| 63 |
+
for metric in per_label_summaries:
|
| 64 |
+
thresholds[metric] = {}
|
| 65 |
+
for i in range(len(label_order) - 1):
|
| 66 |
+
lower_label = label_order[i]
|
| 67 |
+
upper_label = label_order[i + 1]
|
| 68 |
+
lower = per_label_summaries[metric].get(lower_label)
|
| 69 |
+
upper = per_label_summaries[metric].get(upper_label)
|
| 70 |
+
if not lower or not upper:
|
| 71 |
+
thresholds[metric][f"{lower_label}_to_{upper_label}"] = None
|
| 72 |
+
continue
|
| 73 |
+
if lower["q3"] < upper["q1"]:
|
| 74 |
+
boundary = (lower["q3"] + upper["q1"]) / 2
|
| 75 |
+
else:
|
| 76 |
+
boundary = (lower["median"] + upper["median"]) / 2
|
| 77 |
+
thresholds[metric][f"{lower_label}_to_{upper_label}"] = boundary
|
| 78 |
+
return thresholds
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def print_summary(metrics, cleaned_by_label, outlier_counts, summaries):
|
| 82 |
+
for label in LABEL_ORDER:
|
| 83 |
+
print(f"\nLabel: {label}")
|
| 84 |
+
for m in metrics:
|
| 85 |
+
vals = cleaned_by_label[label][m]
|
| 86 |
+
summary = summaries[m].get(label)
|
| 87 |
+
removed = outlier_counts[label][m]
|
| 88 |
+
print(f" Metric: {m}")
|
| 89 |
+
print(f" Count (after outliers): {len(vals)}")
|
| 90 |
+
print(f" Outliers removed: {removed}")
|
| 91 |
+
if summary:
|
| 92 |
+
print(
|
| 93 |
+
" Five-number summary: "
|
| 94 |
+
f"min={summary['min']:.4f}, "
|
| 95 |
+
f"q1={summary['q1']:.4f}, "
|
| 96 |
+
f"median={summary['median']:.4f}, "
|
| 97 |
+
f"q3={summary['q3']:.4f}, "
|
| 98 |
+
f"max={summary['max']:.4f}"
|
| 99 |
+
)
|
| 100 |
+
else:
|
| 101 |
+
print(" Five-number summary: n/a")
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def medians_in_order(summaries, metric, label_order):
|
| 105 |
+
medians = []
|
| 106 |
+
for label in label_order:
|
| 107 |
+
summary = summaries.get(metric, {}).get(label)
|
| 108 |
+
if not summary:
|
| 109 |
+
return False
|
| 110 |
+
medians.append(summary["median"])
|
| 111 |
+
return medians[0] <= medians[1] <= medians[2]
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def enforce_ordered_metrics(metrics, grouped, cleaned, outlier_counts, summaries):
|
| 115 |
+
for metric in metrics:
|
| 116 |
+
if metric not in ORDERED_METRICS:
|
| 117 |
+
continue
|
| 118 |
+
if medians_in_order(summaries, metric, LABEL_ORDER):
|
| 119 |
+
continue
|
| 120 |
+
for label in LABEL_ORDER:
|
| 121 |
+
raw_values = grouped[label][metric]
|
| 122 |
+
cleaned[label][metric] = raw_values
|
| 123 |
+
outlier_counts[label][metric] = 0
|
| 124 |
+
if raw_values:
|
| 125 |
+
summaries[metric][label] = five_number_summary(raw_values)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def main():
|
| 129 |
+
parser = argparse.ArgumentParser(
|
| 130 |
+
description="Compute five-number summaries by literacy label with outlier removal."
|
| 131 |
+
)
|
| 132 |
+
parser.add_argument(
|
| 133 |
+
"--input",
|
| 134 |
+
default="/home/mshahidul/readctrl/data/factual_testing/full_details_evaluation_0_80_qwen3-30B_v2.json",
|
| 135 |
+
help="Path to JSON evaluation file.",
|
| 136 |
+
)
|
| 137 |
+
parser.add_argument(
|
| 138 |
+
"--metrics",
|
| 139 |
+
default="factual_attribution,completeness,source_coverage",
|
| 140 |
+
help="Comma-separated metrics to analyze.",
|
| 141 |
+
)
|
| 142 |
+
args = parser.parse_args()
|
| 143 |
+
|
| 144 |
+
metrics = [m.strip() for m in args.metrics.split(",") if m.strip()]
|
| 145 |
+
with open(args.input, "r", encoding="utf-8") as f:
|
| 146 |
+
data = json.load(f)
|
| 147 |
+
|
| 148 |
+
grouped = parse_scores(data, metrics)
|
| 149 |
+
cleaned = {label: {} for label in LABEL_ORDER}
|
| 150 |
+
outlier_counts = {label: {} for label in LABEL_ORDER}
|
| 151 |
+
summaries = {m: {} for m in metrics}
|
| 152 |
+
|
| 153 |
+
for label in LABEL_ORDER:
|
| 154 |
+
for m in metrics:
|
| 155 |
+
values = grouped[label][m]
|
| 156 |
+
filtered, removed = remove_outliers_iqr(values)
|
| 157 |
+
cleaned[label][m] = filtered
|
| 158 |
+
outlier_counts[label][m] = removed
|
| 159 |
+
if filtered:
|
| 160 |
+
summaries[m][label] = five_number_summary(filtered)
|
| 161 |
+
|
| 162 |
+
enforce_ordered_metrics(metrics, grouped, cleaned, outlier_counts, summaries)
|
| 163 |
+
|
| 164 |
+
print_summary(metrics, cleaned, outlier_counts, summaries)
|
| 165 |
+
thresholds = suggest_thresholds(summaries, LABEL_ORDER)
|
| 166 |
+
|
| 167 |
+
print("\nSuggested thresholds (based on cleaned quartiles/medians):")
|
| 168 |
+
for m in metrics:
|
| 169 |
+
print(f" Metric: {m}")
|
| 170 |
+
for k, v in thresholds[m].items():
|
| 171 |
+
if v is None:
|
| 172 |
+
print(f" {k}: n/a")
|
| 173 |
+
else:
|
| 174 |
+
print(f" {k}: {v:.4f}")
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
if __name__ == "__main__":
|
| 178 |
+
main()
|
code/literacy_thresholds_v2.py
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import argparse
|
| 3 |
+
import json
|
| 4 |
+
import math
|
| 5 |
+
from statistics import median, quantiles
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
LABEL_ORDER = ["low", "intermediate", "proficient"]
|
| 9 |
+
TARGET_METRIC = "source_coverage"
|
| 10 |
+
ORDERED_METRICS = {TARGET_METRIC}
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def normalize_label(key: str) -> str:
|
| 14 |
+
key_l = key.lower()
|
| 15 |
+
for label in LABEL_ORDER:
|
| 16 |
+
if label in key_l:
|
| 17 |
+
return label
|
| 18 |
+
return key_l
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def five_number_summary(values):
|
| 22 |
+
if not values:
|
| 23 |
+
return None
|
| 24 |
+
q1, _, q3 = quantiles(values, n=4, method="inclusive")
|
| 25 |
+
return {
|
| 26 |
+
"min": min(values),
|
| 27 |
+
"q1": q1,
|
| 28 |
+
"median": median(values),
|
| 29 |
+
"q3": q3,
|
| 30 |
+
"max": max(values),
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def remove_outliers_iqr(values):
|
| 35 |
+
if len(values) < 4:
|
| 36 |
+
return values, 0
|
| 37 |
+
q1, _, q3 = quantiles(values, n=4, method="inclusive")
|
| 38 |
+
iqr = q3 - q1
|
| 39 |
+
if math.isclose(iqr, 0.0):
|
| 40 |
+
return values, 0
|
| 41 |
+
lower = q1 - 1.5 * iqr
|
| 42 |
+
upper = q3 + 1.5 * iqr
|
| 43 |
+
filtered = [v for v in values if lower <= v <= upper]
|
| 44 |
+
return filtered, len(values) - len(filtered)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def parse_scores(data, metrics):
|
| 48 |
+
grouped = {label: {m: [] for m in metrics} for label in LABEL_ORDER}
|
| 49 |
+
for item in data:
|
| 50 |
+
levels = item.get("literacy_levels") or {}
|
| 51 |
+
for key, payload in levels.items():
|
| 52 |
+
label = normalize_label(key)
|
| 53 |
+
if label not in grouped:
|
| 54 |
+
continue
|
| 55 |
+
scores = (payload or {}).get("scores") or {}
|
| 56 |
+
for m in metrics:
|
| 57 |
+
if m in scores and scores[m] is not None:
|
| 58 |
+
grouped[label][m].append(scores[m])
|
| 59 |
+
return grouped
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def suggest_thresholds(per_label_summaries, label_order):
|
| 63 |
+
thresholds = {}
|
| 64 |
+
for metric in per_label_summaries:
|
| 65 |
+
thresholds[metric] = {}
|
| 66 |
+
for i in range(len(label_order) - 1):
|
| 67 |
+
lower_label = label_order[i]
|
| 68 |
+
upper_label = label_order[i + 1]
|
| 69 |
+
lower = per_label_summaries[metric].get(lower_label)
|
| 70 |
+
upper = per_label_summaries[metric].get(upper_label)
|
| 71 |
+
if not lower or not upper:
|
| 72 |
+
thresholds[metric][f"{lower_label}_to_{upper_label}"] = None
|
| 73 |
+
continue
|
| 74 |
+
if lower["q3"] < upper["q1"]:
|
| 75 |
+
boundary = (lower["q3"] + upper["q1"]) / 2
|
| 76 |
+
else:
|
| 77 |
+
boundary = (lower["median"] + upper["median"]) / 2
|
| 78 |
+
thresholds[metric][f"{lower_label}_to_{upper_label}"] = boundary
|
| 79 |
+
return thresholds
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def print_summary(metrics, cleaned_by_label, outlier_counts, summaries):
|
| 83 |
+
for label in LABEL_ORDER:
|
| 84 |
+
print(f"\nLabel: {label}")
|
| 85 |
+
for m in metrics:
|
| 86 |
+
vals = cleaned_by_label[label][m]
|
| 87 |
+
summary = summaries[m].get(label)
|
| 88 |
+
removed = outlier_counts[label][m]
|
| 89 |
+
print(f" Metric: {m}")
|
| 90 |
+
print(f" Count (after outliers): {len(vals)}")
|
| 91 |
+
print(f" Outliers removed: {removed}")
|
| 92 |
+
if summary:
|
| 93 |
+
print(
|
| 94 |
+
" Five-number summary: "
|
| 95 |
+
f"min={summary['min']:.4f}, "
|
| 96 |
+
f"q1={summary['q1']:.4f}, "
|
| 97 |
+
f"median={summary['median']:.4f}, "
|
| 98 |
+
f"q3={summary['q3']:.4f}, "
|
| 99 |
+
f"max={summary['max']:.4f}"
|
| 100 |
+
)
|
| 101 |
+
else:
|
| 102 |
+
print(" Five-number summary: n/a")
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def medians_in_order(summaries, metric, label_order):
|
| 106 |
+
medians = []
|
| 107 |
+
for label in label_order:
|
| 108 |
+
summary = summaries.get(metric, {}).get(label)
|
| 109 |
+
if not summary:
|
| 110 |
+
return False
|
| 111 |
+
medians.append(summary["median"])
|
| 112 |
+
return medians[0] <= medians[1] <= medians[2]
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def enforce_ordered_metrics(metrics, grouped, cleaned, outlier_counts, summaries):
|
| 116 |
+
for metric in metrics:
|
| 117 |
+
if metric not in ORDERED_METRICS:
|
| 118 |
+
continue
|
| 119 |
+
if medians_in_order(summaries, metric, LABEL_ORDER):
|
| 120 |
+
continue
|
| 121 |
+
for label in LABEL_ORDER:
|
| 122 |
+
raw_values = grouped[label][metric]
|
| 123 |
+
cleaned[label][metric] = raw_values
|
| 124 |
+
outlier_counts[label][metric] = 0
|
| 125 |
+
if raw_values:
|
| 126 |
+
summaries[metric][label] = five_number_summary(raw_values)
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def main():
|
| 130 |
+
parser = argparse.ArgumentParser(
|
| 131 |
+
description="Compute five-number summaries for source_coverage by literacy label."
|
| 132 |
+
)
|
| 133 |
+
parser.add_argument(
|
| 134 |
+
"--input",
|
| 135 |
+
default="/home/mshahidul/readctrl/data/factual_testing/full_details_evaluation_0_80_qwen3-30B_v2.json",
|
| 136 |
+
help="Path to JSON evaluation file.",
|
| 137 |
+
)
|
| 138 |
+
args = parser.parse_args()
|
| 139 |
+
|
| 140 |
+
metrics = [TARGET_METRIC]
|
| 141 |
+
with open(args.input, "r", encoding="utf-8") as f:
|
| 142 |
+
data = json.load(f)
|
| 143 |
+
|
| 144 |
+
grouped = parse_scores(data, metrics)
|
| 145 |
+
cleaned = {label: {} for label in LABEL_ORDER}
|
| 146 |
+
outlier_counts = {label: {} for label in LABEL_ORDER}
|
| 147 |
+
summaries = {m: {} for m in metrics}
|
| 148 |
+
|
| 149 |
+
for label in LABEL_ORDER:
|
| 150 |
+
for m in metrics:
|
| 151 |
+
values = grouped[label][m]
|
| 152 |
+
filtered, removed = remove_outliers_iqr(values)
|
| 153 |
+
cleaned[label][m] = filtered
|
| 154 |
+
outlier_counts[label][m] = removed
|
| 155 |
+
if filtered:
|
| 156 |
+
summaries[m][label] = five_number_summary(filtered)
|
| 157 |
+
|
| 158 |
+
enforce_ordered_metrics(metrics, grouped, cleaned, outlier_counts, summaries)
|
| 159 |
+
|
| 160 |
+
print_summary(metrics, cleaned, outlier_counts, summaries)
|
| 161 |
+
thresholds = suggest_thresholds(summaries, LABEL_ORDER)
|
| 162 |
+
|
| 163 |
+
print("\nSuggested thresholds (based on cleaned quartiles/medians):")
|
| 164 |
+
for m in metrics:
|
| 165 |
+
print(f" Metric: {m}")
|
| 166 |
+
for k, v in thresholds[m].items():
|
| 167 |
+
if v is None:
|
| 168 |
+
print(f" {k}: n/a")
|
| 169 |
+
else:
|
| 170 |
+
print(f" {k}: {v:.4f}")
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
if __name__ == "__main__":
|
| 174 |
+
main()
|
code/old/FH_es.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
|
| 3 |
+
# --- Spanish tokenization ---
|
| 4 |
+
WORD_RE = re.compile(r"[A-Za-zÁÉÍÓÚÜÑáéíóúüñ]+", re.UNICODE)
|
| 5 |
+
|
| 6 |
+
def _tokenize_words_es(text: str):
|
| 7 |
+
return WORD_RE.findall(text)
|
| 8 |
+
|
| 9 |
+
def _count_sentences_es(text: str) -> int:
|
| 10 |
+
# Count sentences via ., !, ?, … and Spanish ¡¿
|
| 11 |
+
sentences = re.split(r"[.!?…]+|[¡¿]", text)
|
| 12 |
+
return max(1, sum(1 for s in sentences if s.strip()))
|
| 13 |
+
|
| 14 |
+
# --- Syllable counting ---
|
| 15 |
+
try:
|
| 16 |
+
import pyphen
|
| 17 |
+
_dic = pyphen.Pyphen(lang='es') # or 'es_ES'
|
| 18 |
+
|
| 19 |
+
def count_syllables_es(word: str) -> int:
|
| 20 |
+
# Use hyphenation positions; count pieces
|
| 21 |
+
hyph = _dic.inserted(word)
|
| 22 |
+
return max(1, hyph.count('-') + 1)
|
| 23 |
+
except Exception:
|
| 24 |
+
# Heuristic fallback (handles hiatus and silent 'u' roughly)
|
| 25 |
+
def count_syllables_es(word: str) -> int:
|
| 26 |
+
w = word.lower()
|
| 27 |
+
|
| 28 |
+
# Treat final 'y' as vowel 'i'
|
| 29 |
+
w = re.sub(r'y$', 'i', w)
|
| 30 |
+
|
| 31 |
+
# Remove silent 'u' before e/i in 'que/qui/gue/gui' (but not 'güe/güi')
|
| 32 |
+
w = re.sub(r'que', 'qe', w)
|
| 33 |
+
w = re.sub(r'qui', 'qi', w)
|
| 34 |
+
w = re.sub(r'gue', 'ge', w)
|
| 35 |
+
w = re.sub(r'gui', 'gi', w)
|
| 36 |
+
|
| 37 |
+
vowels = set("aeiouáéíóúü")
|
| 38 |
+
strong = set("aáeéoóíú") # accented í/ú behave like strong (hiatus)
|
| 39 |
+
n = len(w)
|
| 40 |
+
i = 0
|
| 41 |
+
syll = 0
|
| 42 |
+
while i < n:
|
| 43 |
+
if w[i] not in vowels:
|
| 44 |
+
i += 1
|
| 45 |
+
continue
|
| 46 |
+
# collect contiguous vowels
|
| 47 |
+
j = i + 1
|
| 48 |
+
while j < n and w[j] in vowels:
|
| 49 |
+
j += 1
|
| 50 |
+
seq = w[i:j]
|
| 51 |
+
# one nucleus by default
|
| 52 |
+
nuclei = 1
|
| 53 |
+
# split on strong-strong boundaries (ae, ea, ao, oa, eo, oe, and cases with í/ú)
|
| 54 |
+
for k in range(len(seq) - 1):
|
| 55 |
+
if seq[k] in strong and seq[k + 1] in strong:
|
| 56 |
+
nuclei += 1
|
| 57 |
+
syll += nuclei
|
| 58 |
+
i = j
|
| 59 |
+
return max(1, syll)
|
| 60 |
+
|
| 61 |
+
# --- Fernández–Huerta (FH) ---
|
| 62 |
+
def fernandez_huerta(text: str) -> float | None:
|
| 63 |
+
"""
|
| 64 |
+
Fernández–Huerta readability for Spanish.
|
| 65 |
+
Higher = easier. Typical range ~0–100.
|
| 66 |
+
"""
|
| 67 |
+
words = _tokenize_words_es(text)
|
| 68 |
+
n_words = len(words)
|
| 69 |
+
if n_words == 0:
|
| 70 |
+
return None
|
| 71 |
+
n_sentences = _count_sentences_es(text)
|
| 72 |
+
n_syllables = sum(count_syllables_es(w) for w in words)
|
| 73 |
+
|
| 74 |
+
# FH = 206.84 - 0.60 * (P) - 1.02 * (F)
|
| 75 |
+
# P = (syllables/words)*100, F = words/sentence
|
| 76 |
+
fh = 206.84 - 0.60 * ((n_syllables / n_words) * 100.0) - 1.02 * (n_words / n_sentences)
|
| 77 |
+
return round(fh, 2)
|
| 78 |
+
|
| 79 |
+
# --- Quick check ---
|
| 80 |
+
# if __name__ == "__main__":
|
| 81 |
+
# text_easy = "El corazón es un órgano que bombea sangre. En este caso, funciona bien."
|
| 82 |
+
# text_medium = "El corazón del paciente muestra una función adecuada, aunque se observaron pequeños cambios que deben revisarse."
|
| 83 |
+
# text_hard = "La evaluación cardiológica indicó una función sistólica preservada, con alteraciones discretas en la relajación diastólica."
|
| 84 |
+
# print("Easy FH:", fernandez_huerta(text_easy))
|
| 85 |
+
# print("Medium FH:", fernandez_huerta(text_medium))
|
| 86 |
+
# print("Hard FH:", fernandez_huerta(text_hard))
|
code/old/FH_esV2.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import separasilabas
|
| 3 |
+
|
| 4 |
+
def count_words(text):
|
| 5 |
+
text = ''.join(filter(lambda x: not x.isdigit(), text))
|
| 6 |
+
clean = re.compile(r'\W+')
|
| 7 |
+
text = clean.sub(' ', text).strip()
|
| 8 |
+
return len(text.split()) if len(text.split()) > 0 else 1
|
| 9 |
+
|
| 10 |
+
def count_sentences(text):
|
| 11 |
+
text = text.replace("\n", "")
|
| 12 |
+
sentence_end = re.compile(r'[.:;!?\)\()]')
|
| 13 |
+
sentences = sentence_end.split(text)
|
| 14 |
+
sentences = list(filter(None, sentences))
|
| 15 |
+
return len(sentences) if len(sentences) > 0 else 1
|
| 16 |
+
|
| 17 |
+
def count_all_syllables(text):
|
| 18 |
+
clean = re.compile(r'\W+')
|
| 19 |
+
words = clean.sub(' ', text).strip().split()
|
| 20 |
+
silabizer = separasilabas.silabizer()
|
| 21 |
+
total = 0
|
| 22 |
+
for word in words:
|
| 23 |
+
total += len(silabizer(word))
|
| 24 |
+
return total if total > 0 else 1
|
| 25 |
+
|
| 26 |
+
def Pval(text):
|
| 27 |
+
syllables = count_all_syllables(text)
|
| 28 |
+
words = count_words(text)
|
| 29 |
+
return round(syllables / words, 2)
|
| 30 |
+
|
| 31 |
+
def Fval(text):
|
| 32 |
+
sentences = count_sentences(text)
|
| 33 |
+
words = count_words(text)
|
| 34 |
+
return round(words / sentences, 2)
|
| 35 |
+
|
| 36 |
+
def fernandez_huerta(text):
|
| 37 |
+
return round(206.84 - 60 * Pval(text) - 1.02 * Fval(text), 2)
|
| 38 |
+
|
| 39 |
+
|
code/old/FH_fr.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
try:
|
| 3 |
+
import pyphen
|
| 4 |
+
_hyph_fr = pyphen.Pyphen(lang='fr') # or 'fr_FR'
|
| 5 |
+
except Exception:
|
| 6 |
+
_hyph_fr = None
|
| 7 |
+
|
| 8 |
+
# --- Basic French tokenization ---
|
| 9 |
+
WORD_RE_FR = re.compile(r"[A-Za-zÀ-ÖØ-öø-ÿœŒÆæ]+", re.UNICODE)
|
| 10 |
+
|
| 11 |
+
def tokenize_words_fr(text: str):
|
| 12 |
+
return WORD_RE_FR.findall(text)
|
| 13 |
+
|
| 14 |
+
def count_sentences_fr(text: str):
|
| 15 |
+
# Split on ., !, ?, … ; keep it simple
|
| 16 |
+
parts = re.split(r"[.!?…]+", text)
|
| 17 |
+
return max(1, sum(1 for p in parts if p.strip()))
|
| 18 |
+
|
| 19 |
+
def count_syllables_fr(word: str) -> int:
|
| 20 |
+
if _hyph_fr:
|
| 21 |
+
# Pyphen gives hyphenation points; count pieces as syllables (approx)
|
| 22 |
+
hyph = _hyph_fr.inserted(word)
|
| 23 |
+
return max(1, hyph.count('-') + 1)
|
| 24 |
+
# Fallback: simple vowel-group heuristic (rougher)
|
| 25 |
+
groups = re.findall(r"[aeiouyAEIOUYàâäéèêëîïôöùûüÿœAEIOUYÀÂÄÉÈÊËÎÏÔÖÙÛÜŸŒ]+", word)
|
| 26 |
+
return max(1, len(groups))
|
| 27 |
+
|
| 28 |
+
# --- FRE-FR (Kandel & Moles) ---
|
| 29 |
+
def flesch_kandel_moles_fr(text: str):
|
| 30 |
+
words = tokenize_words_fr(text)
|
| 31 |
+
W = len(words)
|
| 32 |
+
if W == 0:
|
| 33 |
+
return None
|
| 34 |
+
S = count_sentences_fr(text)
|
| 35 |
+
syl = sum(count_syllables_fr(w) for w in words)
|
| 36 |
+
P = (syl / W) * 100.0 # syllables per 100 words
|
| 37 |
+
F = W / S # words per sentence
|
| 38 |
+
score = 207.0 - 1.015 * F - 0.736 * P
|
| 39 |
+
return round(score, 2)
|
| 40 |
+
|
| 41 |
+
# --- LIX / RIX ---
|
| 42 |
+
def lix(text: str):
|
| 43 |
+
words = tokenize_words_fr(text)
|
| 44 |
+
W = len(words)
|
| 45 |
+
if W == 0:
|
| 46 |
+
return None
|
| 47 |
+
S = count_sentences_fr(text)
|
| 48 |
+
long_words = sum(1 for w in words if len(w) > 6)
|
| 49 |
+
return round((W / S) + (100.0 * long_words / W), 2)
|
| 50 |
+
|
| 51 |
+
def rix(text: str):
|
| 52 |
+
words = tokenize_words_fr(text)
|
| 53 |
+
W = len(words)
|
| 54 |
+
if W == 0:
|
| 55 |
+
return None
|
| 56 |
+
S = count_sentences_fr(text)
|
| 57 |
+
long_words = sum(1 for w in words if len(w) > 6)
|
| 58 |
+
return round(long_words / S, 2)
|
| 59 |
+
|
| 60 |
+
# --- Band checks ---
|
| 61 |
+
FRE_FR_BANDS = {
|
| 62 |
+
'B1': (70, 100),
|
| 63 |
+
'B2': (60, 70),
|
| 64 |
+
'B3': (45, 60),
|
| 65 |
+
}
|
| 66 |
+
LIX_BANDS = {
|
| 67 |
+
'B1': (20, 35),
|
| 68 |
+
'B2': (35, 45),
|
| 69 |
+
'B3': (45, 60),
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
def in_band(score, band, bands, delta=0.0):
|
| 73 |
+
if score is None:
|
| 74 |
+
return False
|
| 75 |
+
lo, hi = bands[band]
|
| 76 |
+
return (lo - delta) <= score <= (hi + delta)
|
| 77 |
+
|
| 78 |
+
# Example
|
| 79 |
+
# if __name__ == "__main__":
|
| 80 |
+
# txt = "Le patient se porte bien. Les examens sont rassurants, sans signes d’infection. Un suivi simple est recommandé."
|
| 81 |
+
# fre = flesch_kandel_moles_fr(txt)
|
| 82 |
+
# lx = lix(txt)
|
| 83 |
+
# rx = rix(txt)
|
| 84 |
+
# print("FRE-FR:", fre, "B1?", in_band(fre, 'B1', FRE_FR_BANDS, delta=1.0))
|
| 85 |
+
# print("LIX:", lx, "B1?", in_band(lx, 'B1', LIX_BANDS, delta=2.0))
|
| 86 |
+
# print("RIX:", rx)
|
code/old/FH_pt.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
try:
|
| 3 |
+
import pyphen
|
| 4 |
+
_hyph_pt_br = pyphen.Pyphen(lang='pt_BR')
|
| 5 |
+
_hyph_pt_pt = pyphen.Pyphen(lang='pt_PT')
|
| 6 |
+
except Exception:
|
| 7 |
+
_hyph_pt_br = _hyph_pt_pt = None
|
| 8 |
+
|
| 9 |
+
# --- Tokenization ---
|
| 10 |
+
WORD_RE_PT = re.compile(r"[A-Za-zÀ-ÖØ-öø-ÿ]+", re.UNICODE) # includes áâãà ç éê í óôõ ú ü etc.
|
| 11 |
+
|
| 12 |
+
def tokenize_words_pt(text: str):
|
| 13 |
+
return WORD_RE_PT.findall(text)
|
| 14 |
+
|
| 15 |
+
def count_sentences_pt(text: str):
|
| 16 |
+
# Keep it simple: ., !, ?, … as boundaries
|
| 17 |
+
parts = re.split(r"[.!?…]+", text)
|
| 18 |
+
return max(1, sum(1 for p in parts if p.strip()))
|
| 19 |
+
|
| 20 |
+
def count_syllables_pt(word: str) -> int:
|
| 21 |
+
# Prefer hyphenation dictionaries (pt_BR first, then pt_PT)
|
| 22 |
+
if _hyph_pt_br or _hyph_pt_pt:
|
| 23 |
+
hyph = (_hyph_pt_br or _hyph_pt_pt).inserted(word)
|
| 24 |
+
return max(1, hyph.count('-') + 1)
|
| 25 |
+
# Fallback: vowel-group heuristic (rough)
|
| 26 |
+
groups = re.findall(r"[aeiouyAEIOUYàáâãéêíóôõúüÀÁÂÃÉÊÍÓÔÕÚÜ]+", word)
|
| 27 |
+
return max(1, len(groups))
|
| 28 |
+
|
| 29 |
+
# --- Flesch Reading Ease (Portuguese adaptation) ---
|
| 30 |
+
def flesch_portuguese(text: str):
|
| 31 |
+
words = tokenize_words_pt(text)
|
| 32 |
+
W = len(words)
|
| 33 |
+
if W == 0:
|
| 34 |
+
return None
|
| 35 |
+
S = count_sentences_pt(text)
|
| 36 |
+
syl = sum(count_syllables_pt(w) for w in words)
|
| 37 |
+
F = W / S # words per sentence
|
| 38 |
+
P = syl / W # syllables per word
|
| 39 |
+
score = 248.835 - 1.015 * F - 84.6 * P
|
| 40 |
+
return round(score, 2)
|
| 41 |
+
|
| 42 |
+
# --- LIX / RIX ---
|
| 43 |
+
def lix(text: str):
|
| 44 |
+
words = tokenize_words_pt(text)
|
| 45 |
+
W = len(words)
|
| 46 |
+
if W == 0:
|
| 47 |
+
return None
|
| 48 |
+
S = count_sentences_pt(text)
|
| 49 |
+
long_words = sum(1 for w in words if len(w) > 6)
|
| 50 |
+
return round((W / S) + (100.0 * long_words / W), 2)
|
| 51 |
+
|
| 52 |
+
def rix(text: str):
|
| 53 |
+
words = tokenize_words_pt(text)
|
| 54 |
+
W = len(words)
|
| 55 |
+
if W == 0:
|
| 56 |
+
return None
|
| 57 |
+
S = count_sentences_pt(text)
|
| 58 |
+
long_words = sum(1 for w in words if len(w) > 6)
|
| 59 |
+
return round(long_words / S, 2)
|
| 60 |
+
|
| 61 |
+
# --- Band checks ---
|
| 62 |
+
FRE_PT_BANDS = {
|
| 63 |
+
'B1': (70, 100),
|
| 64 |
+
'B2': (60, 70),
|
| 65 |
+
'B3': (45, 60),
|
| 66 |
+
}
|
| 67 |
+
LIX_BANDS = {
|
| 68 |
+
'B1': (20, 35),
|
| 69 |
+
'B2': (35, 45),
|
| 70 |
+
'B3': (45, 60),
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
def in_band(score, band, bands, delta=0.0):
|
| 74 |
+
if score is None:
|
| 75 |
+
return False
|
| 76 |
+
lo, hi = bands[band]
|
| 77 |
+
return (lo - delta) <= score <= (hi + delta)
|
| 78 |
+
|
| 79 |
+
# Example
|
| 80 |
+
if __name__ == "__main__":
|
| 81 |
+
txt = "O paciente está bem. Os exames não mostram sinais de infecção. Recomenda-se apenas acompanhamento."
|
| 82 |
+
fre = flesch_portuguese(txt)
|
| 83 |
+
lx = lix(txt)
|
| 84 |
+
rx = rix(txt)
|
| 85 |
+
print("FRE-PT:", fre, "B1?", in_band(fre, 'B1', FRE_PT_BANDS, delta=1.0))
|
| 86 |
+
print("LIX:", lx, "B1?", in_band(lx, 'B1', LIX_BANDS, delta=2.0))
|
| 87 |
+
print("RIX:", rx)
|
code/old/generate_thinking_data.ipynb
ADDED
|
@@ -0,0 +1,442 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "markdown",
|
| 5 |
+
"id": "d3bff56e",
|
| 6 |
+
"metadata": {},
|
| 7 |
+
"source": [
|
| 8 |
+
"https://lmarena.ai/c/9fa09cff-fb85-4719-80db-188a19de0803"
|
| 9 |
+
]
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"cell_type": "code",
|
| 13 |
+
"execution_count": null,
|
| 14 |
+
"id": "1a11463f",
|
| 15 |
+
"metadata": {},
|
| 16 |
+
"outputs": [],
|
| 17 |
+
"source": [
|
| 18 |
+
"import json\n",
|
| 19 |
+
"import random\n",
|
| 20 |
+
"from typing import List, Dict, Any, Optional\n",
|
| 21 |
+
"\n",
|
| 22 |
+
"# Your existing prompts for different readability levels\n",
|
| 23 |
+
"PROMPTS = {\n",
|
| 24 |
+
" \"easy\": '''\n",
|
| 25 |
+
"You are an assistant that rewrites Spanish texts to make them very simple and easy to understand.\n",
|
| 26 |
+
"Your goal is to rewrite the provided input text for younger readers (Fernández Huerta 70–100; grade 5–7).\n",
|
| 27 |
+
"Use short sentences, simple words, and friendly tone. Avoid technical or complex expressions.\n",
|
| 28 |
+
"Keep all important factual details, but remove jargon.\n",
|
| 29 |
+
"Return only the rewritten text without commentary.\n",
|
| 30 |
+
"''',\n",
|
| 31 |
+
" \"intermediate\": '''\n",
|
| 32 |
+
"You are an assistant specialized in rewriting Spanish texts with medium readability.\n",
|
| 33 |
+
"Your task is to rewrite the provided input text for general or high‑school‑level readers (Fernández Huerta 50–70; grade 8–12).\n",
|
| 34 |
+
"Use clear and complete sentences, moderately complex vocabulary, and structured narration.\n",
|
| 35 |
+
"Retain all relevant medical or factual information, but phrase it in accessible language.\n",
|
| 36 |
+
"Return only the rewritten text with no explanations.\n",
|
| 37 |
+
"''',\n",
|
| 38 |
+
" \"hard\": '''\n",
|
| 39 |
+
"You are an assistant that rewrites Spanish medical texts with professional, technical precision.\n",
|
| 40 |
+
"Rewrite the following input text using specialized, academic terminology and information‑dense phrasing.\n",
|
| 41 |
+
"The output must target a Fernández Huerta readability index between 0 and 50 (university/professional level).\n",
|
| 42 |
+
"Use clinical vocabulary, formal register, and detailed description of pathophysiology, procedures, and findings.\n",
|
| 43 |
+
"Return only the rewritten text.\n",
|
| 44 |
+
"'''\n",
|
| 45 |
+
"}\n",
|
| 46 |
+
"\n",
|
| 47 |
+
"# Thinking templates for processing medical reports\n",
|
| 48 |
+
"THINKING_TEMPLATES = {\n",
|
| 49 |
+
" \"input_analysis\": [\n",
|
| 50 |
+
" \"\"\"Estoy analizando este informe médico. Primero debo identificar:\n",
|
| 51 |
+
"1. Datos del paciente: {patient_info}\n",
|
| 52 |
+
"2. Diagnóstico principal: {diagnosis}\n",
|
| 53 |
+
"3. Síntomas y signos clínicos: {symptoms}\n",
|
| 54 |
+
"4. Pruebas realizadas: {tests}\n",
|
| 55 |
+
"5. Tratamiento: {treatment}\n",
|
| 56 |
+
"\n",
|
| 57 |
+
"Ahora debo adaptar esta información al nivel de lectura solicitado: {difficulty}.\"\"\",\n",
|
| 58 |
+
"\n",
|
| 59 |
+
" \"\"\"Este es un informe médico que necesito reescribir. Contiene:\n",
|
| 60 |
+
"- Información clínica sobre {diagnosis}\n",
|
| 61 |
+
"- Terminología médica como: {medical_terms}\n",
|
| 62 |
+
"- Datos técnicos que debo {action} según el nivel {difficulty}\n",
|
| 63 |
+
"Mi objetivo es mantener la precisión médica mientras ajusto la complejidad del lenguaje.\"\"\"\n",
|
| 64 |
+
" ],\n",
|
| 65 |
+
" \n",
|
| 66 |
+
" \"easy\": [\n",
|
| 67 |
+
" \"\"\"Para nivel fácil (FH 70-100), debo:\n",
|
| 68 |
+
"1. Cambiar \"{medical_term}\" por \"{simple_term}\"\n",
|
| 69 |
+
"2. Dividir oraciones largas en frases cortas\n",
|
| 70 |
+
"3. Eliminar jerga médica innecesaria\n",
|
| 71 |
+
"4. Usar palabras que un niño de 10-12 años entienda\n",
|
| 72 |
+
"5. Mantener la historia clara y simple\n",
|
| 73 |
+
"\n",
|
| 74 |
+
"Voy a contar esto como una historia sobre {patient_description} que {simple_story}.\"\"\",\n",
|
| 75 |
+
"\n",
|
| 76 |
+
" \"\"\"Necesito simplificar mucho este texto:\n",
|
| 77 |
+
"- Cambiar términos médicos complejos por palabras cotidianas\n",
|
| 78 |
+
"- Usar máximo 10-15 palabras por oración\n",
|
| 79 |
+
"- Explicar todo como si fuera para un niño\n",
|
| 80 |
+
"- Mantener solo la información esencial\n",
|
| 81 |
+
"- Hacer que suene amigable y no aterrador\"\"\",\n",
|
| 82 |
+
" ],\n",
|
| 83 |
+
" \n",
|
| 84 |
+
" \"intermediate\": [\n",
|
| 85 |
+
" \"\"\"Para nivel intermedio (FH 50-70), mi estrategia es:\n",
|
| 86 |
+
"1. Mantener algunos términos médicos pero explicarlos brevemente\n",
|
| 87 |
+
"2. Usar oraciones de complejidad media (15-20 palabras)\n",
|
| 88 |
+
"3. Estructurar la información en párrafos lógicos\n",
|
| 89 |
+
"4. Incluir detalles relevantes sin ser excesivamente técnico\n",
|
| 90 |
+
"5. Vocabulario apropiado para estudiantes de secundaria\n",
|
| 91 |
+
"\n",
|
| 92 |
+
"El texto debe ser informativo pero accesible, manteniendo {key_concepts} pero explicando {complex_terms}.\"\"\",\n",
|
| 93 |
+
"\n",
|
| 94 |
+
" \"\"\"Nivel intermedio requiere equilibrio:\n",
|
| 95 |
+
"- Puedo usar términos como \"{medical_term}\" pero debo contextualizarlos\n",
|
| 96 |
+
"- Las oraciones pueden ser más complejas pero claras\n",
|
| 97 |
+
"- Incluir información sobre causas y efectos\n",
|
| 98 |
+
"- Mantener estructura narrativa coherente\n",
|
| 99 |
+
"- Apropiado para lectores con educación media\"\"\",\n",
|
| 100 |
+
" ],\n",
|
| 101 |
+
" \n",
|
| 102 |
+
" \"hard\": [\n",
|
| 103 |
+
" \"\"\"Para nivel profesional (FH 0-50), debo maximizar la precisión técnica:\n",
|
| 104 |
+
"1. Usar nomenclatura médica internacional: {technical_terms}\n",
|
| 105 |
+
"2. Incluir todos los valores de laboratorio y mediciones específicas\n",
|
| 106 |
+
"3. Emplear terminología especializada sin simplificación\n",
|
| 107 |
+
"4. Formato de historia clínica hospitalaria\n",
|
| 108 |
+
"5. Densidad informativa máxima\n",
|
| 109 |
+
"\n",
|
| 110 |
+
"Estructuraré según: Anamnesis → Exploración física → Pruebas complementarias → Diagnóstico → Plan terapéutico.\"\"\",\n",
|
| 111 |
+
"\n",
|
| 112 |
+
" \"\"\"Reescritura altamente técnica requerida:\n",
|
| 113 |
+
"- Incorporar clasificaciones internacionales (CIE-10, DSM-5, etc.)\n",
|
| 114 |
+
"- Detallar fisiopatología y mecanismos moleculares\n",
|
| 115 |
+
"- Usar abreviaturas médicas estándar\n",
|
| 116 |
+
"- Incluir diagnósticos diferenciales\n",
|
| 117 |
+
"- Lenguaje de publicación científica\n",
|
| 118 |
+
"- Máxima densidad de información médica especializada\"\"\",\n",
|
| 119 |
+
" ]\n",
|
| 120 |
+
"}\n",
|
| 121 |
+
"\n",
|
| 122 |
+
"class MedicalReportProcessor:\n",
|
| 123 |
+
" \"\"\"Process medical reports and create training data with thinking mode.\"\"\"\n",
|
| 124 |
+
" \n",
|
| 125 |
+
" def __init__(self, original_report: str):\n",
|
| 126 |
+
" \"\"\"\n",
|
| 127 |
+
" Initialize with the original medical report.\n",
|
| 128 |
+
" \n",
|
| 129 |
+
" Args:\n",
|
| 130 |
+
" original_report: The original medical report text to be rewritten\n",
|
| 131 |
+
" \"\"\"\n",
|
| 132 |
+
" self.original_report = original_report\n",
|
| 133 |
+
" self.medical_entities = self.extract_medical_entities(original_report)\n",
|
| 134 |
+
" \n",
|
| 135 |
+
" def extract_medical_entities(self, text: str) -> Dict[str, List[str]]:\n",
|
| 136 |
+
" \"\"\"Extract medical entities from the report.\"\"\"\n",
|
| 137 |
+
" # This is a simplified extraction - you might want to use a medical NER model\n",
|
| 138 |
+
" entities = {\n",
|
| 139 |
+
" \"diagnosis\": [],\n",
|
| 140 |
+
" \"symptoms\": [],\n",
|
| 141 |
+
" \"medications\": [],\n",
|
| 142 |
+
" \"tests\": [],\n",
|
| 143 |
+
" \"medical_terms\": []\n",
|
| 144 |
+
" }\n",
|
| 145 |
+
" \n",
|
| 146 |
+
" # Common medical terms to look for\n",
|
| 147 |
+
" diagnosis_keywords = [\"diagnóstico\", \"síndrome\", \"enfermedad\", \"trastorno\", \"patología\", \n",
|
| 148 |
+
" \"neurofibromatosis\", \"nf1\", \"tdah\", \"déficit\"]\n",
|
| 149 |
+
" symptom_keywords = [\"dolor\", \"mancha\", \"nódulo\", \"bulto\", \"lesión\", \"síntoma\",\n",
|
| 150 |
+
" \"retraso\", \"dificultad\", \"problema\"]\n",
|
| 151 |
+
" medication_keywords = [\"medicamento\", \"tratamiento\", \"terapia\", \"metilfenidato\", \"fármaco\"]\n",
|
| 152 |
+
" test_keywords = [\"biopsia\", \"ecografía\", \"análisis\", \"prueba\", \"examen\", \"resonancia\"]\n",
|
| 153 |
+
" \n",
|
| 154 |
+
" text_lower = text.lower()\n",
|
| 155 |
+
" \n",
|
| 156 |
+
" # Extract based on keywords\n",
|
| 157 |
+
" for keyword in diagnosis_keywords:\n",
|
| 158 |
+
" if keyword in text_lower:\n",
|
| 159 |
+
" entities[\"diagnosis\"].append(keyword)\n",
|
| 160 |
+
" \n",
|
| 161 |
+
" for keyword in symptom_keywords:\n",
|
| 162 |
+
" if keyword in text_lower:\n",
|
| 163 |
+
" entities[\"symptoms\"].append(keyword)\n",
|
| 164 |
+
" \n",
|
| 165 |
+
" for keyword in medication_keywords:\n",
|
| 166 |
+
" if keyword in text_lower:\n",
|
| 167 |
+
" entities[\"medications\"].append(keyword)\n",
|
| 168 |
+
" \n",
|
| 169 |
+
" for keyword in test_keywords:\n",
|
| 170 |
+
" if keyword in text_lower:\n",
|
| 171 |
+
" entities[\"tests\"].append(keyword)\n",
|
| 172 |
+
" \n",
|
| 173 |
+
" # Extract all medical terms\n",
|
| 174 |
+
" all_medical = diagnosis_keywords + symptom_keywords + medication_keywords + test_keywords\n",
|
| 175 |
+
" for term in all_medical:\n",
|
| 176 |
+
" if term in text_lower:\n",
|
| 177 |
+
" entities[\"medical_terms\"].append(term)\n",
|
| 178 |
+
" \n",
|
| 179 |
+
" return entities\n",
|
| 180 |
+
" \n",
|
| 181 |
+
" def generate_input_thinking(self, difficulty: str) -> str:\n",
|
| 182 |
+
" \"\"\"Generate thinking for understanding the input medical report.\"\"\"\n",
|
| 183 |
+
" template = random.choice(THINKING_TEMPLATES[\"input_analysis\"])\n",
|
| 184 |
+
" \n",
|
| 185 |
+
" thinking = template.format(\n",
|
| 186 |
+
" patient_info=\"paciente de 18 años\" if \"18 años\" in self.original_report else \"paciente\",\n",
|
| 187 |
+
" diagnosis=\", \".join(self.medical_entities[\"diagnosis\"][:2]) or \"condición médica\",\n",
|
| 188 |
+
" symptoms=\", \".join(self.medical_entities[\"symptoms\"][:3]) or \"síntomas diversos\",\n",
|
| 189 |
+
" tests=\", \".join(self.medical_entities[\"tests\"][:2]) or \"estudios clínicos\",\n",
|
| 190 |
+
" treatment=\", \".join(self.medical_entities[\"medications\"][:2]) or \"tratamiento\",\n",
|
| 191 |
+
" difficulty=difficulty,\n",
|
| 192 |
+
" medical_terms=\", \".join(self.medical_entities[\"medical_terms\"][:3]),\n",
|
| 193 |
+
" action=\"simplificar mucho\" if difficulty == \"easy\" else \"adaptar\" if difficulty == \"intermediate\" else \"tecnificar\"\n",
|
| 194 |
+
" )\n",
|
| 195 |
+
" \n",
|
| 196 |
+
" return thinking\n",
|
| 197 |
+
" \n",
|
| 198 |
+
" def generate_output_thinking(self, difficulty: str, rewritten_text: str) -> str:\n",
|
| 199 |
+
" \"\"\"Generate thinking for the rewriting process.\"\"\"\n",
|
| 200 |
+
" template = random.choice(THINKING_TEMPLATES[difficulty])\n",
|
| 201 |
+
" \n",
|
| 202 |
+
" # Customize based on difficulty\n",
|
| 203 |
+
" if difficulty == \"easy\":\n",
|
| 204 |
+
" thinking = template.format(\n",
|
| 205 |
+
" medical_term=self.medical_entities[\"medical_terms\"][0] if self.medical_entities[\"medical_terms\"] else \"término médico\",\n",
|
| 206 |
+
" simple_term=\"enfermedad\" if \"neurofibromatosis\" in self.medical_entities[\"diagnosis\"] else \"problema de salud\",\n",
|
| 207 |
+
" patient_description=\"un joven\",\n",
|
| 208 |
+
" simple_story=\"tenía una enfermedad especial desde pequeño\"\n",
|
| 209 |
+
" )\n",
|
| 210 |
+
" elif difficulty == \"intermediate\":\n",
|
| 211 |
+
" thinking = template.format(\n",
|
| 212 |
+
" key_concepts=\", \".join(self.medical_entities[\"diagnosis\"][:2]) or \"conceptos médicos principales\",\n",
|
| 213 |
+
" complex_terms=\", \".join(self.medical_entities[\"medical_terms\"][:3]) or \"terminología especializada\",\n",
|
| 214 |
+
" medical_term=self.medical_entities[\"medical_terms\"][0] if self.medical_entities[\"medical_terms\"] else \"término médico\"\n",
|
| 215 |
+
" )\n",
|
| 216 |
+
" else: # hard\n",
|
| 217 |
+
" thinking = template.format(\n",
|
| 218 |
+
" technical_terms=\", \".join(self.medical_entities[\"medical_terms\"][:5]) or \"terminología especializada\"\n",
|
| 219 |
+
" )\n",
|
| 220 |
+
" \n",
|
| 221 |
+
" return thinking\n",
|
| 222 |
+
" \n",
|
| 223 |
+
" def create_training_example(self, difficulty: str, rewritten_text: str, fh_score: float) -> Dict:\n",
|
| 224 |
+
" \"\"\"Create a complete training example with thinking.\"\"\"\n",
|
| 225 |
+
" \n",
|
| 226 |
+
" # Generate system message\n",
|
| 227 |
+
" system_content = PROMPTS[difficulty].strip()\n",
|
| 228 |
+
" \n",
|
| 229 |
+
" # Generate thinking for input and output\n",
|
| 230 |
+
" input_thinking = self.generate_input_thinking(difficulty)\n",
|
| 231 |
+
" output_thinking = self.generate_output_thinking(difficulty, rewritten_text)\n",
|
| 232 |
+
" \n",
|
| 233 |
+
" # Create the message structure\n",
|
| 234 |
+
" messages = [\n",
|
| 235 |
+
" {\n",
|
| 236 |
+
" \"content\": f\"reasoning language: Spanish\\n\\n{system_content}\",\n",
|
| 237 |
+
" \"role\": \"system\",\n",
|
| 238 |
+
" \"thinking\": None\n",
|
| 239 |
+
" },\n",
|
| 240 |
+
" {\n",
|
| 241 |
+
" \"content\": f\"Please rewrite the following medical report to achieve a Fernández Huerta score of {fh_score:.1f} (difficulty level: {difficulty}):\\n\\n{self.original_report}\",\n",
|
| 242 |
+
" \"role\": \"user\",\n",
|
| 243 |
+
" \"thinking\": input_thinking\n",
|
| 244 |
+
" },\n",
|
| 245 |
+
" {\n",
|
| 246 |
+
" \"content\": rewritten_text,\n",
|
| 247 |
+
" \"role\": \"assistant\",\n",
|
| 248 |
+
" \"thinking\": output_thinking\n",
|
| 249 |
+
" }\n",
|
| 250 |
+
" ]\n",
|
| 251 |
+
" \n",
|
| 252 |
+
" return {\"messages\": messages}\n",
|
| 253 |
+
"\n",
|
| 254 |
+
"def process_medical_dataset_with_original(\n",
|
| 255 |
+
" original_reports: List[str],\n",
|
| 256 |
+
" readability_versions_list: List[Dict],\n",
|
| 257 |
+
" include_variations: bool = True\n",
|
| 258 |
+
") -> List[Dict]:\n",
|
| 259 |
+
" \"\"\"\n",
|
| 260 |
+
" Process medical dataset with original reports and create training data.\n",
|
| 261 |
+
" \n",
|
| 262 |
+
" Args:\n",
|
| 263 |
+
" original_reports: List of original medical reports\n",
|
| 264 |
+
" readability_versions_list: List of dictionaries with readability versions\n",
|
| 265 |
+
" include_variations: Whether to include cross-difficulty variations\n",
|
| 266 |
+
" \n",
|
| 267 |
+
" Returns:\n",
|
| 268 |
+
" List of training examples with thinking mode\n",
|
| 269 |
+
" \"\"\"\n",
|
| 270 |
+
" training_dataset = []\n",
|
| 271 |
+
" \n",
|
| 272 |
+
" for original_report, versions_dict in zip(original_reports, readability_versions_list):\n",
|
| 273 |
+
" processor = MedicalReportProcessor(original_report)\n",
|
| 274 |
+
" readability_versions = versions_dict.get(\"readability_versions\", {})\n",
|
| 275 |
+
" \n",
|
| 276 |
+
" # Create training examples for each difficulty level\n",
|
| 277 |
+
" for difficulty, content in readability_versions.items():\n",
|
| 278 |
+
" rewritten_text = content[\"text\"]\n",
|
| 279 |
+
" fh_score = content[\"FH_score\"]\n",
|
| 280 |
+
" \n",
|
| 281 |
+
" training_example = processor.create_training_example(\n",
|
| 282 |
+
" difficulty=difficulty,\n",
|
| 283 |
+
" rewritten_text=rewritten_text,\n",
|
| 284 |
+
" fh_score=fh_score\n",
|
| 285 |
+
" )\n",
|
| 286 |
+
" \n",
|
| 287 |
+
" training_dataset.append(training_example)\n",
|
| 288 |
+
" \n",
|
| 289 |
+
" # Optionally create cross-difficulty variations\n",
|
| 290 |
+
" if include_variations:\n",
|
| 291 |
+
" difficulties = list(readability_versions.keys())\n",
|
| 292 |
+
" \n",
|
| 293 |
+
" # Create some mixed examples (e.g., easy to hard, hard to intermediate)\n",
|
| 294 |
+
" for _ in range(2): # Create 2 variations per report\n",
|
| 295 |
+
" source_diff = random.choice(difficulties)\n",
|
| 296 |
+
" target_diff = random.choice([d for d in difficulties if d != source_diff])\n",
|
| 297 |
+
" \n",
|
| 298 |
+
" # Use source difficulty text as \"original\" for variation\n",
|
| 299 |
+
" source_text = readability_versions[source_diff][\"text\"]\n",
|
| 300 |
+
" target_text = readability_versions[target_diff][\"text\"]\n",
|
| 301 |
+
" target_fh = readability_versions[target_diff][\"FH_score\"]\n",
|
| 302 |
+
" \n",
|
| 303 |
+
" # Create processor for this variation\n",
|
| 304 |
+
" var_processor = MedicalReportProcessor(source_text)\n",
|
| 305 |
+
" variation_example = var_processor.create_training_example(\n",
|
| 306 |
+
" difficulty=target_diff,\n",
|
| 307 |
+
" rewritten_text=target_text,\n",
|
| 308 |
+
" fh_score=target_fh\n",
|
| 309 |
+
" )\n",
|
| 310 |
+
" \n",
|
| 311 |
+
" training_dataset.append(variation_example)\n",
|
| 312 |
+
" \n",
|
| 313 |
+
" return training_dataset\n",
|
| 314 |
+
"\n",
|
| 315 |
+
"# Example usage\n",
|
| 316 |
+
"if __name__ == \"__main__\":\n",
|
| 317 |
+
" # Example original medical reports (these would be your actual original reports)\n",
|
| 318 |
+
" original_medical_reports = [\n",
|
| 319 |
+
" \"\"\"Paciente masculino de 18 años con diagnóstico molecular confirmado de Neurofibromatosis tipo 1 \n",
|
| 320 |
+
" (deleción exones 5-47 del gen NF1), que presenta antecedentes de retraso del desarrollo psicomotor \n",
|
| 321 |
+
" global diagnosticado a los 3 años, trastorno específico del lenguaje de tipo expresivo que requirió \n",
|
| 322 |
+
" intervención fonoaudiológica, y TDAH en tratamiento con metilfenidato 20mg/día con buena respuesta. \n",
|
| 323 |
+
" Hallazgos oftalmológicos incluyen nódulos de Lisch bilaterales, astigmatismo miópico compuesto y \n",
|
| 324 |
+
" euriblefaron bilateral. Motivo de consulta actual: aparición de placa eritematosa de 3cm en muslo \n",
|
| 325 |
+
" izquierdo de 12 meses de evolución y múltiples nódulos subcutáneos móviles no dolorosos en región \n",
|
| 326 |
+
" supraciliar derecha, occipital y muñeca izquierda. Examen físico revela macrocefalia (PC 59cm, >p97), \n",
|
| 327 |
+
" 15 máculas café con leche >1.5cm, efélides axilares e inguinales bilaterales, y 3 máculas \n",
|
| 328 |
+
" rojo-azuladas deprimidas de 0.5-1cm en región lumbar y pectoral derecha. Estudios histopatológicos \n",
|
| 329 |
+
" confirman neurofibromas con inmunohistoquímica S100(+), SOX10(+). Ecografía de partes blandas \n",
|
| 330 |
+
" muestra lesiones hipoecoicas bien delimitadas compatibles con neurofibromas subcutáneos.\"\"\"\n",
|
| 331 |
+
" ]\n",
|
| 332 |
+
" \n",
|
| 333 |
+
" # Your readability versions data\n",
|
| 334 |
+
" readability_data = [\n",
|
| 335 |
+
" {\n",
|
| 336 |
+
" \"readability_versions\": {\n",
|
| 337 |
+
" \"easy\": {\n",
|
| 338 |
+
" \"text\": \"Un joven de 18 años tenía una enfermedad llamada Neurofibromatosis tipo 1 desde que era bebé. Esta enfermedad produce manchas café con leche en la piel y pequeños bultos. Durante su infancia tuvo algunas dificultades para hablar y moverse bien, por lo que recibió terapias especiales. En la adolescencia le dieron medicamentos para mejorar su concentración. A los 18 años fue al dermatólogo porque le salió una nueva mancha en el muslo y algunos bultos en la piel. Le hicieron exámenes y confirmaron que eran parte de su enfermedad. Los médicos clasificaron los distintos tipos de manchas y bultos que tenía en la piel.\",\n",
|
| 339 |
+
" \"FH_score\": 77.16\n",
|
| 340 |
+
" },\n",
|
| 341 |
+
" \"intermediate\": {\n",
|
| 342 |
+
" \"text\": \"Un joven de 18 años con Neurofibromatosis tipo 1, diagnosticada desde el primer año de vida, había presentado dificultades motoras y del lenguaje durante la infancia, además de problemas visuales como nódulos de Lisch y astigmatismo. Fue tratado por Trastorno por Déficit Atencional con buenos resultados académicos. Consultó en Dermatología por una nueva mancha en el muslo izquierdo y la aparición de nódulos en zonas como la muñeca y el cuero cabelludo. En el examen se observaron manchas café con leche, pecas en las axilas y varios bultos pequeños bajo la piel. Se realizaron biopsias y ecografías que confirmaron que las lesiones correspondían a diferentes tipos de neurofibromas superficiales, los cuales fueron clasificados según su forma y localización.\",\n",
|
| 343 |
+
" \"FH_score\": 62.77\n",
|
| 344 |
+
" },\n",
|
| 345 |
+
" \"hard\": {\n",
|
| 346 |
+
" \"text\": \"Varón de 18 años con diagnóstico clínico y molecular de Neurofibromatosis tipo 1 (deleción de exones 5-47 del gen NF1), con antecedentes de retraso psicomotor global, trastorno específico del lenguaje expresivo, TDAH tratado con metilfenidato y hallazgos oftalmológicos compatibles con NF1 (nódulos de Lisch, astigmatismo y euriblefaron). Acude a Dermatología por aparición de placa rosada en muslo izquierdo de un año de evolución y nódulos subcutáneos móviles en región supraciliar derecha, occipital y muñeca. El examen físico revela macrocefalia, múltiples máculas café con leche, efélides axilares y máculas rojo-azuladas deprimidas en región lumbar y pectoral. Las biopsias cutáneas y ecografía de nódulos confirmaron neurofibromas superficiales. Según la clasificación de García-Martínez et al., se diagnosticaron simultáneamente neurofibromas subcutáneos nodulares, cutáneos pseudoatróficos y cutáneos rojo-azulados, evidenciando la heterogeneidad fenotípica de la enfermedad en un mismo paciente.\",\n",
|
| 347 |
+
" \"FH_score\": 39.94\n",
|
| 348 |
+
" }\n",
|
| 349 |
+
" }\n",
|
| 350 |
+
" }\n",
|
| 351 |
+
" ]\n",
|
| 352 |
+
" \n",
|
| 353 |
+
" # Process the dataset with original reports\n",
|
| 354 |
+
" training_dataset = process_medical_dataset_with_original(\n",
|
| 355 |
+
" original_reports=original_medical_reports,\n",
|
| 356 |
+
" readability_versions_list=readability_data,\n",
|
| 357 |
+
" include_variations=True\n",
|
| 358 |
+
" )\n",
|
| 359 |
+
" \n",
|
| 360 |
+
" # Save the training dataset\n",
|
| 361 |
+
" with open(\"medical_report_finetuning_with_thinking.jsonl\", \"w\", encoding=\"utf-8\") as f:\n",
|
| 362 |
+
" for example in training_dataset:\n",
|
| 363 |
+
" f.write(json.dumps(example, ensure_ascii=False) + \"\\n\")\n",
|
| 364 |
+
" \n",
|
| 365 |
+
" # Print example for verification\n",
|
| 366 |
+
" print(\"Example training data with original medical report:\")\n",
|
| 367 |
+
" print(json.dumps(training_dataset[0], ensure_ascii=False, indent=2))\n",
|
| 368 |
+
" \n",
|
| 369 |
+
" # Print statistics\n",
|
| 370 |
+
" print(f\"\\n📊 Dataset Statistics:\")\n",
|
| 371 |
+
" print(f\"Total training examples: {len(training_dataset)}\")\n",
|
| 372 |
+
" print(f\"Number of messages per example: {len(training_dataset[0]['messages'])}\")\n",
|
| 373 |
+
" print(f\"All examples have thinking: {all('thinking' in msg for ex in training_dataset for msg in ex['messages'])}\")\n",
|
| 374 |
+
" \n",
|
| 375 |
+
" # Validate the structure\n",
|
| 376 |
+
" for i, example in enumerate(training_dataset):\n",
|
| 377 |
+
" assert len(example['messages']) == 3, f\"Example {i} doesn't have 3 messages\"\n",
|
| 378 |
+
" assert example['messages'][0]['role'] == 'system', f\"Example {i} first message is not system\"\n",
|
| 379 |
+
" assert example['messages'][1]['role'] == 'user', f\"Example {i} second message is not user\"\n",
|
| 380 |
+
" assert example['messages'][2]['role'] == 'assistant', f\"Example {i} third message is not assistant\"\n",
|
| 381 |
+
" assert 'thinking' in example['messages'][1], f\"Example {i} user message missing thinking\"\n",
|
| 382 |
+
" assert 'thinking' in example['messages'][2], f\"Example {i} assistant message missing thinking\"\n",
|
| 383 |
+
" \n",
|
| 384 |
+
" print(\"✅ All validation checks passed!\")"
|
| 385 |
+
]
|
| 386 |
+
},
|
| 387 |
+
{
|
| 388 |
+
"cell_type": "markdown",
|
| 389 |
+
"id": "123b65b3",
|
| 390 |
+
"metadata": {},
|
| 391 |
+
"source": [
|
| 392 |
+
"Example training data with original medical report:\n",
|
| 393 |
+
"{\n",
|
| 394 |
+
" \"messages\": [\n",
|
| 395 |
+
" {\n",
|
| 396 |
+
" \"content\": \"reasoning language: Spanish\\n\\nYou are an assistant that rewrites Spanish texts to make them very simple and easy to understand.\\nYour goal is to rewrite the provided input text for younger readers (Fernández Huerta 70–100; grade 5–7).\\nUse short sentences, simple words, and friendly tone. Avoid technical or complex expressions.\\nKeep all important factual details, but remove jargon.\\nReturn only the rewritten text without commentary.\",\n",
|
| 397 |
+
" \"role\": \"system\",\n",
|
| 398 |
+
" \"thinking\": null\n",
|
| 399 |
+
" },\n",
|
| 400 |
+
" {\n",
|
| 401 |
+
" \"content\": \"Please rewrite the following medical report to achieve a Fernández Huerta score of 77.2 (difficulty level: easy):\\n\\nPaciente masculino de 18 años con diagnóstico molecular confirmado de Neurofibromatosis tipo 1 \\n (deleción exones 5-47 del gen NF1), que presenta antecedentes de retraso del desarrollo psicomotor \\n global diagnosticado a los 3 años, trastorno específico del lenguaje de tipo expresivo que requirió \\n intervención fonoaudiológica, y TDAH en tratamiento con metilfenidato 20mg/día con buena respuesta. \\n Hallazgos oftalmológicos incluyen nódulos de Lisch bilaterales, astigmatismo miópico compuesto y \\n euriblefaron bilateral. Motivo de consulta actual: aparición de placa eritematosa de 3cm en muslo \\n izquierdo de 12 meses de evolución y múltiples nódulos subcutáneos móviles no dolorosos en región \\n supraciliar derecha, occipital y muñeca izquierda. Examen físico revela macrocefalia (PC 59cm, >p97), \\n 15 máculas café con leche >1.5cm, efélides axilares e inguinales bilaterales, y 3 máculas \\n rojo-azuladas deprimidas de 0.5-1cm en región lumbar y pectoral derecha. Estudios histopatológicos \\n confirman neurofibromas con inmunohistoquímica S100(+), SOX10(+). Ecografía de partes blandas \\n muestra lesiones hipoecoicas bien delimitadas compatibles con neurofibromas subcutáneos.\",\n",
|
| 402 |
+
" \"role\": \"user\",\n",
|
| 403 |
+
" \"thinking\": \"Estoy analizando este informe médico. Primero debo identificar:\\n1. Datos del paciente: paciente de 18 años\\n2. Diagnóstico principal: diagnóstico, trastorno\\n3. Síntomas y signos clínicos: dolor, nódulo, retraso\\n4. Pruebas realizadas: ecografía, examen\\n5. Tratamiento: tratamiento, metilfenidato\\n\\nAhora debo adaptar esta información al nivel de lectura solicitado: easy.\"\n",
|
| 404 |
+
" },\n",
|
| 405 |
+
" {\n",
|
| 406 |
+
" \"content\": \"Un joven de 18 años tenía una enfermedad llamada Neurofibromatosis tipo 1 desde que era bebé. Esta enfermedad produce manchas café con leche en la piel y pequeños bultos. Durante su infancia tuvo algunas dificultades para hablar y moverse bien, por lo que recibió terapias especiales. En la adolescencia le dieron medicamentos para mejorar su concentración. A los 18 años fue al dermatólogo porque le salió una nueva mancha en el muslo y algunos bultos en la piel. Le hicieron exámenes y confirmaron que eran parte de su enfermedad. Los médicos clasificaron los distintos tipos de manchas y bultos que tenía en la piel.\",\n",
|
| 407 |
+
" \"role\": \"assistant\",\n",
|
| 408 |
+
" \"thinking\": \"Necesito simplificar mucho este texto:\\n- Cambiar términos médicos complejos por palabras cotidianas\\n- Usar máximo 10-15 palabras por oración\\n- Explicar todo como si fuera para un niño\\n- Mantener solo la información esencial\\n- Hacer que suene amigable y no aterrador\"\n",
|
| 409 |
+
" }\n",
|
| 410 |
+
" ]\n",
|
| 411 |
+
"}\n",
|
| 412 |
+
"\n",
|
| 413 |
+
"📊 Dataset Statistics:\n",
|
| 414 |
+
"Total training examples: 5\n",
|
| 415 |
+
"Number of messages per example: 3\n",
|
| 416 |
+
"All examples have thinking: True\n",
|
| 417 |
+
"✅ All validation checks passed!"
|
| 418 |
+
]
|
| 419 |
+
}
|
| 420 |
+
],
|
| 421 |
+
"metadata": {
|
| 422 |
+
"kernelspec": {
|
| 423 |
+
"display_name": "unsloth_latest",
|
| 424 |
+
"language": "python",
|
| 425 |
+
"name": "python3"
|
| 426 |
+
},
|
| 427 |
+
"language_info": {
|
| 428 |
+
"codemirror_mode": {
|
| 429 |
+
"name": "ipython",
|
| 430 |
+
"version": 3
|
| 431 |
+
},
|
| 432 |
+
"file_extension": ".py",
|
| 433 |
+
"mimetype": "text/x-python",
|
| 434 |
+
"name": "python",
|
| 435 |
+
"nbconvert_exporter": "python",
|
| 436 |
+
"pygments_lexer": "ipython3",
|
| 437 |
+
"version": "3.11.13"
|
| 438 |
+
}
|
| 439 |
+
},
|
| 440 |
+
"nbformat": 4,
|
| 441 |
+
"nbformat_minor": 5
|
| 442 |
+
}
|
code/old/readability_controlv2.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
| 3 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
|
| 4 |
+
import torch
|
| 5 |
+
import time
|
| 6 |
+
import random
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def initialize_and_touch(tensor):
|
| 11 |
+
tensor.zero_()
|
| 12 |
+
torch.cuda.synchronize()
|
| 13 |
+
|
| 14 |
+
def dummy_compute(tensor):
|
| 15 |
+
result = torch.matmul(tensor, tensor.t())
|
| 16 |
+
torch.cuda.synchronize()
|
| 17 |
+
return result
|
| 18 |
+
|
| 19 |
+
device = torch.device("cuda")
|
| 20 |
+
total_memory = torch.cuda.get_device_properties(device).total_memory
|
| 21 |
+
print(f"Total VRAM: {total_memory / (1024**3):.2f} GB")
|
| 22 |
+
|
| 23 |
+
allocated_tensors = []
|
| 24 |
+
chunk_size_bytes = 4 * 1024**3 # 4 GiB
|
| 25 |
+
element_size = torch.tensor([], dtype=torch.float32).element_size()
|
| 26 |
+
chunk_elements = chunk_size_bytes // element_size
|
| 27 |
+
|
| 28 |
+
# Make the chunk roughly square
|
| 29 |
+
side = int(chunk_elements ** 0.5)
|
| 30 |
+
|
| 31 |
+
allocated = 0
|
| 32 |
+
target = total_memory * 0.95
|
| 33 |
+
|
| 34 |
+
print("Allocating and initializing memory...")
|
| 35 |
+
while allocated < target:
|
| 36 |
+
try:
|
| 37 |
+
# Allocate a 2D tensor
|
| 38 |
+
chunk = torch.empty((side, side), dtype=torch.float32, device=device)
|
| 39 |
+
initialize_and_touch(chunk)
|
| 40 |
+
allocated_tensors.append(chunk)
|
| 41 |
+
allocated += chunk_size_bytes
|
| 42 |
+
print(f"Allocated: {allocated / (1024**3):.2f} GB", end='\r')
|
| 43 |
+
except RuntimeError as e:
|
| 44 |
+
if 'out of memory' in str(e).lower():
|
| 45 |
+
print(f"\nOut of memory after {allocated / (1024**3):.2f} GB")
|
| 46 |
+
break
|
| 47 |
+
else:
|
| 48 |
+
raise
|
| 49 |
+
|
| 50 |
+
print(f"\nHolding {allocated / (1024**3):.2f} GB in {len(allocated_tensors)} chunks.")
|
| 51 |
+
print("Running dummy compute every 30 seconds to show GPU utilization...")
|
| 52 |
+
|
| 53 |
+
compute_interval = 30
|
| 54 |
+
last_compute = time.time()
|
| 55 |
+
|
| 56 |
+
while True:
|
| 57 |
+
now = time.time()
|
| 58 |
+
if now - last_compute >= compute_interval:
|
| 59 |
+
if allocated_tensors:
|
| 60 |
+
t = random.choice(allocated_tensors)
|
| 61 |
+
try:
|
| 62 |
+
side = min(t.shape[0], 8000)
|
| 63 |
+
_ = dummy_compute(t[:side, :side])
|
| 64 |
+
print(f"[{time.strftime('%H:%M:%S')}] GPU compute spike (util ↑)")
|
| 65 |
+
except Exception as e:
|
| 66 |
+
print(f"Compute failed (expected if chunk too big): {e}")
|
| 67 |
+
last_compute = now
|
| 68 |
+
|
| 69 |
+
time.sleep(1)
|
code/old/resonability_check_completeness_openai_V2.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, json
|
| 2 |
+
def return_prompts(reference_summary, generated_summary, subclaims_json, difficulty_level):
|
| 3 |
+
prompt=f'''
|
| 4 |
+
You are a **medical summarization quality evaluator**.
|
| 5 |
+
Your goal is to decide whether the inclusion or omission of each subclaim in the generated summary is *reasonable*, given the target readability level.
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
### **Input**
|
| 10 |
+
|
| 11 |
+
```
|
| 12 |
+
Readability Level: {difficulty_level}
|
| 13 |
+
|
| 14 |
+
Reference Summary:
|
| 15 |
+
{reference_summary}
|
| 16 |
+
|
| 17 |
+
Generated Summary:
|
| 18 |
+
{generated_summary}
|
| 19 |
+
|
| 20 |
+
Subclaims with Support Results:
|
| 21 |
+
{subclaims_json}
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
---
|
| 25 |
+
|
| 26 |
+
### **Task**
|
| 27 |
+
|
| 28 |
+
For each subclaim:
|
| 29 |
+
|
| 30 |
+
1. Read `result`:
|
| 31 |
+
|
| 32 |
+
* `1` = the subclaim is supported or clearly mentioned in the generated summary.
|
| 33 |
+
* `0` = the subclaim is missing or not supported.
|
| 34 |
+
|
| 35 |
+
2. Based on readability level and medical relevance, decide whether this inclusion/omission is **reasonable**, **partially reasonable**, or **unreasonable**.
|
| 36 |
+
|
| 37 |
+
3. Provide a short justification (1–2 sentences) explaining your reasoning.
|
| 38 |
+
|
| 39 |
+
---
|
| 40 |
+
|
| 41 |
+
### **Output Format**
|
| 42 |
+
|
| 43 |
+
Return structured JSON:
|
| 44 |
+
|
| 45 |
+
```json
|
| 46 |
+
{{
|
| 47 |
+
"readability_level": "<easy/intermediate/hard>",
|
| 48 |
+
"evaluations": [
|
| 49 |
+
{{
|
| 50 |
+
"subclaim_id": <id>,
|
| 51 |
+
"subclaim_text": "<text>",
|
| 52 |
+
"result": <0 or 1>,
|
| 53 |
+
"reasonableness": "<reasonable | partially_reasonable | unreasonable>",
|
| 54 |
+
"justification": "<short explanation>"
|
| 55 |
+
}},
|
| 56 |
+
...
|
| 57 |
+
]
|
| 58 |
+
}}
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
---
|
| 62 |
+
|
| 63 |
+
### **Evaluation Guidelines**
|
| 64 |
+
|
| 65 |
+
| Readability Level | Reasonable Omission | Unreasonable Omission |
|
| 66 |
+
| ----------------- | ------------------------------------------------------------ | ------------------------------------------------- |
|
| 67 |
+
| **Easy** | Technical, anatomical, quantitative, or procedural details. | Key clinical findings, diagnoses, or outcomes. |
|
| 68 |
+
| **Intermediate** | Minor imaging details or measurements. | Any main diagnostic finding or cause–effect link. |
|
| 69 |
+
| **Hard** | Very few omissions acceptable; mostly stylistic compression. | Any missing clinical or diagnostic information. |
|
| 70 |
+
|
| 71 |
+
'''
|
| 72 |
+
return prompt
|
| 73 |
+
|
| 74 |
+
from openai import OpenAI
|
| 75 |
+
|
| 76 |
+
file_path = "/home/mshahidul/api_new.json"
|
| 77 |
+
with open(file_path, "r") as file:
|
| 78 |
+
api_keys = json.load(file)
|
| 79 |
+
|
| 80 |
+
openai_api_key = api_keys.get("openai")
|
| 81 |
+
|
| 82 |
+
client = OpenAI(api_key=openai_api_key)
|
| 83 |
+
def openai_return(prompt):
|
| 84 |
+
response = client.chat.completions.create(
|
| 85 |
+
model="gpt-5",
|
| 86 |
+
messages=[
|
| 87 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
| 88 |
+
{"role": "user", "content": prompt}
|
| 89 |
+
]
|
| 90 |
+
)
|
| 91 |
+
cleaned_response = response.choices[0].message.content.strip().replace("```json", "").replace("```", "")
|
| 92 |
+
return json.loads(cleaned_response)
|
| 93 |
+
|
| 94 |
+
import json
|
| 95 |
+
file_path = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json"
|
| 96 |
+
|
| 97 |
+
with open(file_path, 'r') as f:
|
| 98 |
+
synthetic_data = json.load(f)
|
| 99 |
+
|
| 100 |
+
file_path_qwen3_32B = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json"
|
| 101 |
+
|
| 102 |
+
with open(file_path_qwen3_32B, 'r') as f:
|
| 103 |
+
qwen3_32B_results = json.load(f)
|
| 104 |
+
|
| 105 |
+
# dict_keys(['id', 'full_text', 'ref_summary', 'readability_versions'])
|
| 106 |
+
# print(f"Full text: {synthetic_data[0]['full_text']}")
|
| 107 |
+
res=[]
|
| 108 |
+
save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/syn_data_resonability_check_20_gpt5.json"
|
| 109 |
+
if os.path.exists(save_path):
|
| 110 |
+
with open(save_path, 'r') as f:
|
| 111 |
+
res = json.load(f)
|
| 112 |
+
exist_check_ids = set([(item['id'], item['difficulty_level']) for item in res])
|
| 113 |
+
print(f"Resuming from {len(res)} entries")
|
| 114 |
+
import tqdm
|
| 115 |
+
for ind in tqdm.tqdm(range(0,20)):
|
| 116 |
+
print(f"Processing index: {ind}")
|
| 117 |
+
for version in ["easy", "intermediate", "hard"]:
|
| 118 |
+
if (synthetic_data[ind]['id'], version) in exist_check_ids:
|
| 119 |
+
print(f"Skipping {synthetic_data[ind]['id']} {version}")
|
| 120 |
+
continue
|
| 121 |
+
ref_summary = (f"{synthetic_data[ind]['ref_summary']['text']}")
|
| 122 |
+
generated_summary = (f"{synthetic_data[ind]['readability_versions'][version]['text']}")
|
| 123 |
+
subclaims_results = (f"{qwen3_32B_results[ind]['completeness']['results']}")
|
| 124 |
+
try:
|
| 125 |
+
prompt = return_prompts(ref_summary, generated_summary, subclaims_results, version)
|
| 126 |
+
res.append({
|
| 127 |
+
"id": synthetic_data[ind]['id'],
|
| 128 |
+
"difficulty_level": version,
|
| 129 |
+
"reasonableness": openai_return(prompt)
|
| 130 |
+
})
|
| 131 |
+
if len(res)%2==0:
|
| 132 |
+
print(f"Completed {len(res)} out of 300")
|
| 133 |
+
with open(save_path, 'w') as outfile:
|
| 134 |
+
json.dump(res, outfile, indent=2)
|
| 135 |
+
except Exception as e:
|
| 136 |
+
print(f"Error at {ind} {version}: {e}")
|
| 137 |
+
# print(prompt)
|
| 138 |
+
# assert False
|
| 139 |
+
with open(save_path, 'w') as outfile:
|
| 140 |
+
json.dump(res, outfile, indent=2)
|
code/old/resonability_check_completeness_openai_V3.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, json
|
| 2 |
+
def return_prompts(reference_summary, generated_summary, subclaims_json, difficulty_level):
|
| 3 |
+
prompt=f'''
|
| 4 |
+
You are a **medical summarization quality evaluator**.
|
| 5 |
+
Your goal is to decide whether the inclusion or omission of each subclaim in the generated summary is *reasonable*, given the target readability level.
|
| 6 |
+
|
| 7 |
+
---
|
| 8 |
+
|
| 9 |
+
### **Input**
|
| 10 |
+
|
| 11 |
+
```
|
| 12 |
+
Readability Level: {difficulty_level}
|
| 13 |
+
|
| 14 |
+
Reference Summary:
|
| 15 |
+
{reference_summary}
|
| 16 |
+
|
| 17 |
+
Generated Summary:
|
| 18 |
+
{generated_summary}
|
| 19 |
+
|
| 20 |
+
Subclaims with Support Results:
|
| 21 |
+
{subclaims_json}
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
---
|
| 25 |
+
|
| 26 |
+
### **Task**
|
| 27 |
+
|
| 28 |
+
For each subclaim:
|
| 29 |
+
|
| 30 |
+
1. Read `result`:
|
| 31 |
+
|
| 32 |
+
* `1` = the subclaim is supported or clearly mentioned in the generated summary.
|
| 33 |
+
* `0` = the subclaim is missing or not supported.
|
| 34 |
+
|
| 35 |
+
2. Based on readability level and medical relevance, decide whether this inclusion/omission is **reasonable**, **partially reasonable**, or **unreasonable**.
|
| 36 |
+
|
| 37 |
+
3. Provide a short justification (1–2 sentences) explaining your reasoning.
|
| 38 |
+
|
| 39 |
+
---
|
| 40 |
+
|
| 41 |
+
### **Output Format**
|
| 42 |
+
|
| 43 |
+
Return structured JSON:
|
| 44 |
+
|
| 45 |
+
```json
|
| 46 |
+
{{
|
| 47 |
+
"readability_level": "<easy/intermediate/hard>",
|
| 48 |
+
"evaluations": [
|
| 49 |
+
{{
|
| 50 |
+
"subclaim_id": <id>,
|
| 51 |
+
"subclaim_text": "<text>",
|
| 52 |
+
"result": <0 or 1>,
|
| 53 |
+
"reasonableness": "<reasonable | partially_reasonable | unreasonable>",
|
| 54 |
+
"justification": "<short explanation>"
|
| 55 |
+
}},
|
| 56 |
+
...
|
| 57 |
+
]
|
| 58 |
+
}}
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
---
|
| 62 |
+
|
| 63 |
+
### **Evaluation Guidelines**
|
| 64 |
+
|
| 65 |
+
| Readability Level | Reasonable Omission | Unreasonable Omission |
|
| 66 |
+
| ----------------- | ------------------------------------------------------------ | ------------------------------------------------- |
|
| 67 |
+
| **Easy** | Technical, anatomical, quantitative, or procedural details. | Key clinical findings, diagnoses, or outcomes. |
|
| 68 |
+
| **Intermediate** | Minor imaging details or measurements. | Any main diagnostic finding or cause–effect link. |
|
| 69 |
+
| **Hard** | Very few omissions acceptable; mostly stylistic compression. | Any missing clinical or diagnostic information. |
|
| 70 |
+
|
| 71 |
+
'''
|
| 72 |
+
return prompt
|
| 73 |
+
|
| 74 |
+
from openai import OpenAI
|
| 75 |
+
|
| 76 |
+
file_path = "/home/mshahidul/api_new.json"
|
| 77 |
+
with open(file_path, "r") as file:
|
| 78 |
+
api_keys = json.load(file)
|
| 79 |
+
|
| 80 |
+
openai_api_key = api_keys.get("openai")
|
| 81 |
+
|
| 82 |
+
client = OpenAI(api_key=openai_api_key)
|
| 83 |
+
def openai_return(prompt):
|
| 84 |
+
response = client.chat.completions.create(
|
| 85 |
+
model="gpt-5",
|
| 86 |
+
messages=[
|
| 87 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
| 88 |
+
{"role": "user", "content": prompt}
|
| 89 |
+
]
|
| 90 |
+
)
|
| 91 |
+
cleaned_response = response.choices[0].message.content.strip().replace("```json", "").replace("```", "")
|
| 92 |
+
return json.loads(cleaned_response)
|
| 93 |
+
|
| 94 |
+
import json
|
| 95 |
+
file_path = "/home/mshahidul/readctrl/data/training_data_subclaim_verifier/synthetic_data_es_subclaims_100.json"
|
| 96 |
+
|
| 97 |
+
with open(file_path, 'r') as f:
|
| 98 |
+
synthetic_data = json.load(f)
|
| 99 |
+
|
| 100 |
+
file_path_qwen3_32B = "/home/mshahidul/readctrl/results/dataset_quality_check/subclaim_verifier_results_100_qwen3-32B.json"
|
| 101 |
+
|
| 102 |
+
with open(file_path_qwen3_32B, 'r') as f:
|
| 103 |
+
qwen3_32B_results = json.load(f)
|
| 104 |
+
|
| 105 |
+
# dict_keys(['id', 'full_text', 'ref_summary', 'readability_versions'])
|
| 106 |
+
# print(f"Full text: {synthetic_data[0]['full_text']}")
|
| 107 |
+
res=[]
|
| 108 |
+
save_path = "/home/mshahidul/readctrl/results/dataset_quality_check/syn_data_resonability_check_20_gpt5.json"
|
| 109 |
+
if os.path.exists(save_path):
|
| 110 |
+
with open(save_path, 'r') as f:
|
| 111 |
+
res = json.load(f)
|
| 112 |
+
exist_check_ids = set([(item['id'], item['difficulty_level']) for item in res])
|
| 113 |
+
print(f"Resuming from {len(res)} entries")
|
| 114 |
+
import tqdm
|
| 115 |
+
for ind in tqdm.tqdm(range(0,20)):
|
| 116 |
+
print(f"Processing index: {ind}")
|
| 117 |
+
for version in ["easy", "intermediate", "hard"]:
|
| 118 |
+
if (synthetic_data[ind]['id'], version) in exist_check_ids:
|
| 119 |
+
print(f"Skipping {synthetic_data[ind]['id']} {version}")
|
| 120 |
+
continue
|
| 121 |
+
ref_summary = (f"{synthetic_data[ind]['ref_summary']['text']}")
|
| 122 |
+
generated_summary = (f"{synthetic_data[ind]['readability_versions'][version]['text']}")
|
| 123 |
+
subclaims_results = (f"{qwen3_32B_results[ind]['completeness']['results']}")
|
| 124 |
+
try:
|
| 125 |
+
prompt = return_prompts(ref_summary, generated_summary, subclaims_results, version)
|
| 126 |
+
res.append({
|
| 127 |
+
"id": synthetic_data[ind]['id'],
|
| 128 |
+
"difficulty_level": version,
|
| 129 |
+
"reasonableness": openai_return(prompt)
|
| 130 |
+
})
|
| 131 |
+
if len(res)%2==0:
|
| 132 |
+
print(f"Completed {len(res)} out of 300")
|
| 133 |
+
with open(save_path, 'w') as outfile:
|
| 134 |
+
json.dump(res, outfile, indent=2)
|
| 135 |
+
except Exception as e:
|
| 136 |
+
print(f"Error at {ind} {version}: {e}")
|
| 137 |
+
# print(prompt)
|
| 138 |
+
# assert False
|
| 139 |
+
with open(save_path, 'w') as outfile:
|
| 140 |
+
json.dump(res, outfile, indent=2)
|
code/old/synthetic_data_generationV3.py
ADDED
|
@@ -0,0 +1,348 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import time
|
| 4 |
+
from openai import OpenAI
|
| 5 |
+
import tqdm
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
client = OpenAI(api_key=json.load(open('/home/mshahidul/api.json', 'r'))['openai_api_key'])
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
# MODIFICATION: Create a dictionary to hold prompts for multiple languages.
|
| 12 |
+
ALL_PROMPTS = {
|
| 13 |
+
"en": {
|
| 14 |
+
"B1": """You are a summarization assistant. Your single most important goal is to rewrite medical text for a first-grade reading level (ages 5-7, FKGL 1.0-4.0). Simplicity is more important than detail.
|
| 15 |
+
|
| 16 |
+
Core Mandate:
|
| 17 |
+
- TARGET AUDIENCE: A 6-year-old child.
|
| 18 |
+
- PRIMARY GOAL: Extreme simplicity. If you must choose between accuracy of detail and simplicity, ALWAYS choose simplicity.
|
| 19 |
+
|
| 20 |
+
Strict Rules You Must Follow:
|
| 21 |
+
- SENTENCE LENGTH: Keep almost all sentences under 10 words. Use very short, simple sentences.
|
| 22 |
+
- VOCABULARY: Use only very common, everyday words that a first-grader would know. Avoid any medical or scientific terms. Instead of 'femur', say 'thigh bone'. Instead of 'benign', say 'not harmful'.
|
| 23 |
+
- TONE: Be very gentle, calm, and reassuring. Like a kind doctor explaining something to a small child.
|
| 24 |
+
- STRUCTURE: Use short paragraphs, often just one or two sentences long.
|
| 25 |
+
- FOCUS: Only mention the most important one or two points from the original text. Omit all other details.
|
| 26 |
+
|
| 27 |
+
- Never use emojis.
|
| 28 |
+
- Do not explain pronunciation.
|
| 29 |
+
- DO NOT use any medical jargon.
|
| 30 |
+
""",
|
| 31 |
+
"B2": """You are a summarization assistant trained to rewrite medical summaries for a middle school reading level (ages 11–14, FKGL 6.0–9.0). Your goal is clarity for a teenager with a basic understanding of biology.
|
| 32 |
+
|
| 33 |
+
Core Mandate:
|
| 34 |
+
- TARGET AUDIENCE: A 14-year-old in a 9th-grade biology class.
|
| 35 |
+
- PRIMARY GOAL: Clarity and straightforward explanation.
|
| 36 |
+
|
| 37 |
+
Strict Rules You Must Follow:
|
| 38 |
+
- SENTENCE LENGTH: Vary sentence length, but aim for an average of 12-18 words. Avoid long, complex sentences.
|
| 39 |
+
- VOCABULARY: You can use basic medical terms (e.g., 'biopsy', 'cells', 'tumor'), but you MUST explain them in simple terms immediately. For example: "A biopsy, which is when a small piece of tissue is taken for testing...".
|
| 40 |
+
- TONE: Be empathetic but direct. Use an educational and informative tone, like a science teacher.
|
| 41 |
+
- STRUCTURE: Organize the summary into logical paragraphs. You can use simple headings if it helps clarity (e.g., "What They Found," "What It Means").
|
| 42 |
+
- FOCUS: Summarize the main findings and their implications. Omit minor or highly technical details.
|
| 43 |
+
|
| 44 |
+
- Never use emojis.
|
| 45 |
+
- Do not explain pronunciation.
|
| 46 |
+
""",
|
| 47 |
+
"B3": """You are a summarization assistant trained to rewrite medical summaries for an educated, non-medical adult (ages 17+, FKGL 12.0+). Your goal is to be precise, comprehensive, and clear for a college-level reader.
|
| 48 |
+
|
| 49 |
+
Core Mandate:
|
| 50 |
+
- TARGET AUDIENCE: A curious college student or adult with no medical training.
|
| 51 |
+
- PRIMARY GOAL: Precision and structured clarity.
|
| 52 |
+
|
| 53 |
+
Strict Rules You Must Follow:
|
| 54 |
+
- SENTENCE LENGTH: Use clear, well-constructed sentences. Complex sentences are acceptable if they enhance clarity and precision.
|
| 55 |
+
- VOCABULARY: Use correct medical terminology. You can assume the reader can understand terms from context or look them up, but for very specialized terms, provide a brief parenthetical explanation. For example: "...showed evidence of hyperplasia (an increase in the number of cells)."
|
| 56 |
+
- TONE: Maintain a professional, empathetic, and respectful tone. Be authoritative but not clinical or cold.
|
| 57 |
+
- STRUCTURE: Provide a detailed and structured summary. Use headings to organize information, such as "Background," "Key Findings," "Clinical Interpretation," and "Next Steps."
|
| 58 |
+
- FOCUS: Be comprehensive and faithful to the source summary. Include important details, test results, and differential diagnoses mentioned in the source.
|
| 59 |
+
|
| 60 |
+
- Never use emojis.
|
| 61 |
+
- Do not explain pronunciation.
|
| 62 |
+
"""
|
| 63 |
+
},
|
| 64 |
+
"es": {
|
| 65 |
+
"B1": """Eres un asistente de resumen. Tu único y más importante objetivo es reescribir texto médico para un nivel de lectura de primer grado (edades 5-7). La simplicidad es más importante que el detalle.
|
| 66 |
+
|
| 67 |
+
Mandato Principal:
|
| 68 |
+
- PÚBLICO OBJETIVO: Un niño de 6 años.
|
| 69 |
+
- OBJETIVO PRIMARIO: Simplicidad extrema. Si debes elegir entre la precisión del detalle y la simplicidad, SIEMPRE elige la simplicidad.
|
| 70 |
+
|
| 71 |
+
Reglas Estrictas que Debes Seguir:
|
| 72 |
+
- IDIOMA: El resumen DEBE estar escrito en español.
|
| 73 |
+
- LONGITUD DE LA ORACIÓN: Casi todas las oraciones deben tener menos de 10 palabras. Usa frases muy cortas y simples.
|
| 74 |
+
- VOCABULARIO: Usa solo palabras cotidianas y muy comunes que un niño de primer grado conocería. Evita cualquier término médico o científico. En lugar de 'fémur', di 'hueso del muslo'. En lugar de 'benigno', di 'que no es dañino'.
|
| 75 |
+
- TONO: Sé muy gentil, calmado y tranquilizador. Como un doctor amable explicándole algo a un niño pequeño.
|
| 76 |
+
- ESTRUCTURA: Usa párrafos cortos, a menudo de solo una o dos oraciones.
|
| 77 |
+
- ENFOQUE: Menciona solo el punto más importante o los dos puntos más importantes del texto original. Omite todos los demás detalles.
|
| 78 |
+
|
| 79 |
+
- Nunca uses emojis.
|
| 80 |
+
- No expliques la pronunciación.
|
| 81 |
+
- NO uses jerga médica.
|
| 82 |
+
""",
|
| 83 |
+
"B2": """Eres un asistente de resumen entrenado para reescribir resúmenes médicos para un nivel de lectura de secundaria (edades 11–14). Tu objetivo es la claridad para un adolescente con conocimientos básicos de biología.
|
| 84 |
+
|
| 85 |
+
Mandato Principal:
|
| 86 |
+
- PÚBLICO OBJETIVO: Un estudiante de 14 años en una clase de biología de secundaria.
|
| 87 |
+
- OBJETIVO PRIMARIO: Claridad y explicación directa.
|
| 88 |
+
|
| 89 |
+
Reglas Estrictas que Debes Seguir:
|
| 90 |
+
- IDIOMA: El resumen DEBE estar escrito en español.
|
| 91 |
+
- LONGITUD DE LA ORACIÓN: Varía la longitud de las oraciones, pero busca un promedio de 12-18 palabras. Evita las oraciones largas y complejas.
|
| 92 |
+
- VOCABULARIO: Puedes usar términos médicos básicos (ej., 'biopsia', 'células', 'tumor'), pero DEBES explicarlos en términos sencillos inmediatamente. Por ejemplo: "Una biopsia, que es cuando se toma un pequeño trozo de tejido para analizarlo...".
|
| 93 |
+
- TONO: Sé empático pero directo. Usa un tono educativo e informativo, como un profesor de ciencias.
|
| 94 |
+
- ESTRUCTURA: Organiza el resumen en párrafos lógicos. Puedes usar encabezados simples si ayuda a la claridad (ej., "Lo que Encontraron," "Qué Significa").
|
| 95 |
+
- ENFOQUE: Resume los hallazgos principales y sus implicaciones. Omite detalles menores o muy técnicos.
|
| 96 |
+
|
| 97 |
+
- Nunca uses emojis.
|
| 98 |
+
- No expliques la pronunciación.
|
| 99 |
+
""",
|
| 100 |
+
"B3": """Eres un asistente de resumen entrenado para reescribir resúmenes médicos para un adulto educado no médico (edades 17+). Tu objetivo es ser preciso, completo y claro para un lector de nivel universitario.
|
| 101 |
+
|
| 102 |
+
Mandato Principal:
|
| 103 |
+
- PÚBLICO OBJETIVO: Un estudiante universitario o un adulto curioso sin formación médica.
|
| 104 |
+
- OBJETIVO PRIMARIO: Precisión y claridad estructurada.
|
| 105 |
+
|
| 106 |
+
Reglas Estrictas que Debes Seguir:
|
| 107 |
+
- IDIOMA: El resumen DEBE estar escrito en español.
|
| 108 |
+
- LONGITUD DE LA ORACIÓN: Usa oraciones claras y bien construidas. Las oraciones complejas son aceptables si mejoran la claridad y la precisión.
|
| 109 |
+
- VOCABULARIO: Usa la terminología médica correcta. Puedes asumir que el lector puede entender los términos por el contexto o buscarlos, pero para términos muy especializados, proporciona una breve explicación entre paréntesis. Por ejemplo: "...mostró evidencia de hiperplasia (un aumento en el número de células)."
|
| 110 |
+
- TONO: Mantén un tono profesional, empático y respetuoso. Sé autoritario pero no clínico o frío.
|
| 111 |
+
- ESTRUCTURA: Proporciona un resumen detallado y estructurado. Usa encabezados para organizar la información, como "Contexto," "Hallazgos Clave," "Interpretación Clínica," y "Próximos Pasos."
|
| 112 |
+
- ENFOQUE: Sé completo y fiel al resumen original. Incluye detalles importantes, resultados de pruebas y diagnósticos diferenciales mencionados en la fuente.
|
| 113 |
+
|
| 114 |
+
- Nunca uses emojis.
|
| 115 |
+
- No expliques la pronunciación.
|
| 116 |
+
"""
|
| 117 |
+
},
|
| 118 |
+
"fr": {
|
| 119 |
+
"B1": """Vous êtes un assistant de résumé. Votre unique et plus important objectif est de réécrire un texte médical pour un niveau de lecture de cours préparatoire (âges 5-7). La simplicité est plus importante que le détail.
|
| 120 |
+
|
| 121 |
+
Mandat Principal :
|
| 122 |
+
- PUBLIC CIBLE : Un enfant de 6 ans.
|
| 123 |
+
- OBJECTIF PRINCIPAL : Simplicité extrême. Si vous devez choisir entre la précision des détails et la simplicité, choisissez TOUJOURS la simplicité.
|
| 124 |
+
|
| 125 |
+
Règles Strictes à Suivre Impérativement :
|
| 126 |
+
- LANGUE : Le résumé DOIT être rédigé en français.
|
| 127 |
+
- LONGUEUR DES PHRASES : Presque toutes les phrases doivent faire moins de 10 mots. Utilisez des phrases très courtes et simples.
|
| 128 |
+
- VOCABULAIRE : Utilisez uniquement des mots très courants et quotidiens qu'un enfant de cet âge connaîtrait. Évitez tout terme médical ou scientifique. Au lieu de 'fémur', dites 'l'os de la cuisse'. Au lieu de 'bénin', dites 'pas dangereux'.
|
| 129 |
+
- TON : Soyez très doux, calme et rassurant. Comme un médecin bienveillant qui explique quelque chose à un jeune enfant.
|
| 130 |
+
- STRUCTURE : Utilisez des paragraphes courts, souvent composés d'une ou deux phrases seulement.
|
| 131 |
+
- ENFOQUE : Mentionnez uniquement le ou les deux points les plus importants du texte original. Omettez tous les autres détails.
|
| 132 |
+
|
| 133 |
+
- N'utilisez jamais d'emojis.
|
| 134 |
+
- N'expliquez pas la prononciation.
|
| 135 |
+
- N'utilisez AUCUN jargon médical.
|
| 136 |
+
""",
|
| 137 |
+
"B2": """Vous êtes un assistant de résumé entraîné à réécrire des résumés médicaux pour un niveau de lecture de collège (âges 11–14). Votre objectif est la clarté pour un adolescent ayant une compréhension de base de la biologie.
|
| 138 |
+
|
| 139 |
+
Mandat Principal :
|
| 140 |
+
- PUBLIC CIBLE : Un adolescent de 14 ans en classe de biologie au collège.
|
| 141 |
+
- OBJECTIF PRINCIPAL : Clarté et explication directe.
|
| 142 |
+
|
| 143 |
+
Règles Strictes à Suivre Impérativement :
|
| 144 |
+
- LANGUE : Le résumé DOIT être rédigé en français.
|
| 145 |
+
- LONGUEUR DES PHRASES : Variez la longueur des phrases, mais visez une moyenne de 12-18 mots. Évitez les phrases longues et complexes.
|
| 146 |
+
- VOCABULAIRE : Vous pouvez utiliser des termes médicaux de base (ex: 'biopsie', 'cellules', 'tumeur'), mais vous DEVEZ les expliquer en termes simples immédiatement. Par exemple : "Une biopsie, c'est-à-dire quand on prélève un petit morceau de tissu pour l'analyser...".
|
| 147 |
+
- TON : Soyez empathique mais direct. Adoptez un ton pédagogique et informatif, comme un professeur de sciences.
|
| 148 |
+
- STRUCTURE : Organisez le résumé en paragraphes logiques. Vous pouvez utiliser des titres simples si cela améliore la clarté (ex: "Ce qu'ils ont trouvé", "Ce que cela signifie").
|
| 149 |
+
- ENFOQUE : Résumez les principales observations et leurs implications. Omettez les détails mineurs ou très techniques.
|
| 150 |
+
|
| 151 |
+
- N'utilisez jamais d'emojis.
|
| 152 |
+
- N'expliquez pas la prononciation.
|
| 153 |
+
""",
|
| 154 |
+
"B3": """Vous êtes un assistant de résumé entraîné à réécrire des résumés médicaux pour un adulte éduqué non-médecin (âges 17+). Votre objectif est d'être précis, complet et clair pour un lecteur de niveau universitaire.
|
| 155 |
+
|
| 156 |
+
Mandat Principal :
|
| 157 |
+
- PUBLIC CIBLE : Un étudiant ou un adulte curieux sans formation médicale.
|
| 158 |
+
- OBJECTIF PRINCIPAL : Précision et clarté structurée.
|
| 159 |
+
|
| 160 |
+
Règles Strictes à Suivre Impérativement :
|
| 161 |
+
- LANGUE : Le résumé DOIT être rédigé en français.
|
| 162 |
+
- LONGUEUR DES PHRASES : Utilisez des phrases claires et bien construites. Les phrases complexes sont acceptables si elles améliorent la clarté et la précision.
|
| 163 |
+
- VOCABULAIRE : Utilisez la terminologie médicale correcte. Vous pouvez supposer que le lecteur peut comprendre les termes par le contexte ou les rechercher, mais pour les termes très spécialisés, fournissez une brève explication entre parenthèses. Par exemple : "...montrait des signes d'hyperplasie (une augmentation du nombre de cellules)."
|
| 164 |
+
- TON : Maintenez un ton professionnel, empathique et respectueux. Soyez directif mais ni clinique ni froid.
|
| 165 |
+
- STRUCTURE : Fournissez un résumé détaillé et structuré. Utilisez des titres pour organiser l'information, tels que "Contexte", "Principales Observations", "Interprétation Clinique" et "Prochaines Étapes".
|
| 166 |
+
- ENFOQUE : Soyez complet et fidèle au résumé source. Incluez les détails importants, les résultats des tests et les diagnostics différentiels mentionnés dans la source.
|
| 167 |
+
|
| 168 |
+
- N'utilisez jamais d'emojis.
|
| 169 |
+
- N'expliquez pas la prononciation.
|
| 170 |
+
"""
|
| 171 |
+
},
|
| 172 |
+
|
| 173 |
+
"pt": {
|
| 174 |
+
"B1": """Você é um assistente de resumo. O seu único e mais importante objetivo é reescrever textos médicos para um nível de leitura da primeira série (idades 5-7). A simplicidade é mais importante que os detalhes.
|
| 175 |
+
|
| 176 |
+
Mandato Principal:
|
| 177 |
+
- PÚBLICO-ALVO: Uma criança de 6 anos.
|
| 178 |
+
- OBJETIVO PRINCIPAL: Simplicidade extrema. Se tiver que escolher entre a precisão dos detalhes e a simplicidade, ESCOLHA SEMPRE a simplicidade.
|
| 179 |
+
|
| 180 |
+
Regras Rígidas que Você Deve Seguir:
|
| 181 |
+
- IDIOMA: O resumo DEVE ser escrito em português.
|
| 182 |
+
- COMPRIMENTO DAS FRASES: Quase todas as frases devem ter menos de 10 palavras. Use frases muito curtas e simples.
|
| 183 |
+
- VOCABULÁRIO: Use apenas palavras quotidianas e muito comuns que uma criança da primeira série conheceria. Evite qualquer termo médico ou científico. Em vez de 'fêmur', diga 'o osso da coxa'. Em vez de 'benigno', diga 'que não faz mal'.
|
| 184 |
+
- TOM: Seja muito gentil, calmo e tranquilizador. Como um médico amável a explicar algo a uma criança pequena.
|
| 185 |
+
- ESTRUTURA: Use parágrafos curtos, muitas vezes com apenas uma ou duas frases.
|
| 186 |
+
- FOCO: Mencione apenas um ou dois dos pontos mais importantes do texto original. Omita todos os outros detalhes.
|
| 187 |
+
|
| 188 |
+
- Nunca use emojis.
|
| 189 |
+
- Não explique a pronúncia.
|
| 190 |
+
- NÃO use NENHUM jargão médico.
|
| 191 |
+
""",
|
| 192 |
+
"B2": """Você é um assistente de resumo treinado para reescrever resumos médicos para um nível de leitura do ensino fundamental II (idades 11–14). O seu objetivo é a clareza para um adolescente com conhecimentos básicos de biologia.
|
| 193 |
+
|
| 194 |
+
Mandato Principal:
|
| 195 |
+
- PÚBLICO-ALVO: Um adolescente de 14 anos numa aula de biologia.
|
| 196 |
+
- OBJETIVO PRINCIPAL: Clareza e explicação direta.
|
| 197 |
+
|
| 198 |
+
Regras Rígidas que Você Deve Seguir:
|
| 199 |
+
- IDIOMA: O resumo DEVE ser escrito em português.
|
| 200 |
+
- COMPRIMENTO DAS FRASES: Varie o comprimento das frases, mas procure uma média de 12 a 18 palavras. Evite frases longas e complexas.
|
| 201 |
+
- VOCABULÁRIO: Pode usar termos médicos básicos (ex: 'biópsia', 'células', 'tumor'), mas você DEVE explicá-los em termos simples imediatamente. Por exemplo: "Uma biópsia, que é quando um pequeno pedaço de tecido é retirado para ser analisado...".
|
| 202 |
+
- TOM: Seja empático, mas direto. Use um tom educativo e informativo, como um professor de ciências.
|
| 203 |
+
- ESTRUTURA: Organize o resumo em parágrafos lógicos. Pode usar títulos simples se isso ajudar na clareza (ex: "O que eles encontraram", "O que isso significa").
|
| 204 |
+
- FOCO: Resuma os principais achados e as suas implicações. Omita detalhes menores ou muito técnicos.
|
| 205 |
+
|
| 206 |
+
- Nunca use emojis.
|
| 207 |
+
- Não explique a pronúncia.
|
| 208 |
+
""",
|
| 209 |
+
"B3": """Você é um assistente de resumo treinado para reescrever resumos médicos para um adulto instruído, mas sem formação médica (idades 17+). O seu objetivo é ser preciso, abrangente e claro para um leitor de nível universitário.
|
| 210 |
+
|
| 211 |
+
Mandato Principal:
|
| 212 |
+
- PÚBLICO-ALVO: Um estudante universitário ou adulto curioso sem formação médica.
|
| 213 |
+
- OBJETIVO PRINCIPAL: Precisão e clareza estruturada.
|
| 214 |
+
|
| 215 |
+
Regras Rígidas que Você Deve Seguir:
|
| 216 |
+
- IDIOMA: O resumo DEVE ser escrito em português.
|
| 217 |
+
- COMPRIMENTO DAS FRASES: Use frases claras e bem construídas. Frases complexas são aceitáveis se melhorarem a clareza e a precisão.
|
| 218 |
+
- VOCABULÁRIO: Use a terminologia médica correta. Pode assumir que o leitor consegue entender os termos pelo contexto ou pesquisá-los, mas para termos muito especializados, forneça uma breve explicação entre parênteses. Por exemplo: "...mostrou evidência de hiperplasia (um aumento no número de células)."
|
| 219 |
+
- TOM: Mantenha um tom profissional, empático e respeitoso. Seja confiante, mas não clínico ou frio.
|
| 220 |
+
- ESTRUTURA: Forneça um resumo detalhado e estruturado. Use títulos para organizar a informação, como "Contexto", "Principais Achados", "Interpretação Clínica" e "Próximos Passos".
|
| 221 |
+
- FOCO: Seja abrangente e fiel ao resumo original. Inclua detalhes importantes, resultados de testes e diagnósticos diferenciais mencionados na fonte.
|
| 222 |
+
|
| 223 |
+
- Nunca use emojis.
|
| 224 |
+
- Não explique a pronúncia.
|
| 225 |
+
"""
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
}
|
| 229 |
+
USER_PROMPT_TEMPLATES = {
|
| 230 |
+
"en": """Please rewrite the following expert summary for the specified target audience. Use the full article for context if needed.
|
| 231 |
+
**Full Article Context:**
|
| 232 |
+
{article}
|
| 233 |
+
**Expert Summary to Rewrite:**
|
| 234 |
+
{gold_summary}
|
| 235 |
+
""",
|
| 236 |
+
"es": """Por favor, reescribe el siguiente resumen de experto para el público objetivo especificado. Usa el artículo completo como contexto si es necesario.
|
| 237 |
+
**Contexto del Artículo Completo:**
|
| 238 |
+
{article}
|
| 239 |
+
**Resumen de Experto a Reescribir:**
|
| 240 |
+
{gold_summary}
|
| 241 |
+
""",
|
| 242 |
+
"fr": """Veuillez réécrire le résumé d'expert suivant pour le public cible spécifié. Utilisez l'article complet comme contexte si nécessaire.
|
| 243 |
+
**Contexte de l'Article Complet :**
|
| 244 |
+
{article}
|
| 245 |
+
**Résumé d'Expert à Réécrire :**
|
| 246 |
+
{gold_summary}
|
| 247 |
+
""",
|
| 248 |
+
"pt": """Por favor, reescreva o seguinte resumo de especialista para o público-alvo especificado. Use o artigo completo como contexto, se necessário.
|
| 249 |
+
**Contexto do Artigo Completo:**
|
| 250 |
+
{article}
|
| 251 |
+
**Resumo do Especialista a Ser Reescrito:**
|
| 252 |
+
{gold_summary}
|
| 253 |
+
"""
|
| 254 |
+
}
|
| 255 |
+
|
| 256 |
+
def generate_synthetic_summary(article, gold_summary, band, lang):
|
| 257 |
+
"""Call an OpenAI model to generate a synthetic summary for a given readability band and language."""
|
| 258 |
+
prompts_for_lang = ALL_PROMPTS.get(lang)
|
| 259 |
+
user_prompt_template = USER_PROMPT_TEMPLATES.get(lang)
|
| 260 |
+
if not prompts_for_lang or not user_prompt_template:
|
| 261 |
+
raise ValueError(f"No prompts available for language: {lang}")
|
| 262 |
+
|
| 263 |
+
system_prompt = prompts_for_lang[band]
|
| 264 |
+
user_prompt = user_prompt_template.format(article=article, gold_summary=gold_summary)
|
| 265 |
+
|
| 266 |
+
for attempt in range(3):
|
| 267 |
+
try:
|
| 268 |
+
response = client.chat.completions.create(
|
| 269 |
+
model="gpt-4.1-mini",
|
| 270 |
+
messages=[
|
| 271 |
+
{"role": "system", "content": system_prompt},
|
| 272 |
+
{"role": "user", "content": user_prompt}
|
| 273 |
+
],
|
| 274 |
+
temperature=0.3
|
| 275 |
+
)
|
| 276 |
+
return response.choices[0].message.content.strip()
|
| 277 |
+
except Exception as e:
|
| 278 |
+
print(f"API call failed on attempt {attempt + 1} for band {band}: {e}")
|
| 279 |
+
if attempt < 2:
|
| 280 |
+
time.sleep(5)
|
| 281 |
+
else:
|
| 282 |
+
print(f"Failed to generate summary for band {band} after 3 attempts.")
|
| 283 |
+
return None
|
| 284 |
+
|
| 285 |
+
def build_synthetic_dataset(input_path, output_path, lang, max_samples=None):
|
| 286 |
+
"""Generate a synthetic dataset from a JSON file for a specific language."""
|
| 287 |
+
results = []
|
| 288 |
+
processed_articles = set()
|
| 289 |
+
if os.path.exists(output_path):
|
| 290 |
+
with open(output_path, 'r', encoding='utf-8') as f:
|
| 291 |
+
try:
|
| 292 |
+
results = json.load(f)
|
| 293 |
+
processed_articles = {item['article'] for item in results}
|
| 294 |
+
print(f"Loaded {len(results)} existing records from {output_path}.")
|
| 295 |
+
except json.JSONDecodeError:
|
| 296 |
+
print(f"Warning: Could not decode JSON from {output_path}. Starting fresh.")
|
| 297 |
+
results = []
|
| 298 |
+
|
| 299 |
+
with open(input_path, "r", encoding='utf-8') as f:
|
| 300 |
+
data = json.load(f)
|
| 301 |
+
|
| 302 |
+
items_to_process = [item for item in data if item["fulltext"] not in processed_articles]
|
| 303 |
+
print(f"Found {len(items_to_process)} new articles to process.")
|
| 304 |
+
|
| 305 |
+
for item in tqdm.tqdm(items_to_process):
|
| 306 |
+
if max_samples and len(results) >= max_samples:
|
| 307 |
+
print(f"Reached max_samples limit of {max_samples}.")
|
| 308 |
+
break
|
| 309 |
+
|
| 310 |
+
article, gold = item["fulltext"], item["summary"]
|
| 311 |
+
|
| 312 |
+
synthetic_summaries = {}
|
| 313 |
+
all_bands_successful = True
|
| 314 |
+
for band in ["B1", "B2", "B3"]:
|
| 315 |
+
synthetic = generate_synthetic_summary(article, gold, band, lang=lang)
|
| 316 |
+
if synthetic:
|
| 317 |
+
synthetic_summaries[band] = synthetic
|
| 318 |
+
else:
|
| 319 |
+
all_bands_successful = False
|
| 320 |
+
break
|
| 321 |
+
|
| 322 |
+
if all_bands_successful:
|
| 323 |
+
results.append({
|
| 324 |
+
"article": article,
|
| 325 |
+
"gold_summary": gold,
|
| 326 |
+
"synthetic_summary": synthetic_summaries
|
| 327 |
+
})
|
| 328 |
+
|
| 329 |
+
if len(results) % 5 == 0 and len(results) > len(processed_articles):
|
| 330 |
+
print(f"Processed {len(results)} total samples, saving progress...")
|
| 331 |
+
with open(output_path, "w", encoding='utf-8') as f:
|
| 332 |
+
json.dump(results, f, ensure_ascii=False, indent=4)
|
| 333 |
+
|
| 334 |
+
print("Generation complete. Saving final dataset...")
|
| 335 |
+
with open(output_path, "w", encoding='utf-8') as f:
|
| 336 |
+
json.dump(results, f, ensure_ascii=False, indent=4)
|
| 337 |
+
print(f"Dataset saved to {output_path}")
|
| 338 |
+
|
| 339 |
+
# --- Example Usage for English ---
|
| 340 |
+
# To run for English, set lang = "en" and point to your English data file.
|
| 341 |
+
lang = "pt"
|
| 342 |
+
path = f"/home/mshahidul/readctrl/data/testing_data_gs/multiclinsum_gs_train_{lang}.json"
|
| 343 |
+
output_file = f"/home/mshahidul/readctrl/generating_data/{lang}_syntheticV1.json"
|
| 344 |
+
if os.path.exists(output_file):
|
| 345 |
+
temp=output_file.split("/")[-1].replace(".json","")
|
| 346 |
+
output_file = f"/home/mshahidul/readctrl/generating_data/{lang}_syntheticV{int(temp[-1])+1}.json"
|
| 347 |
+
|
| 348 |
+
build_synthetic_dataset(path, output_file, lang=lang, max_samples=100)
|
code/old/sz_es.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import re
|
| 2 |
+
import pyphen
|
| 3 |
+
|
| 4 |
+
# --- Basic Spanish text stats ---
|
| 5 |
+
_dic = pyphen.Pyphen(lang='es_ES')
|
| 6 |
+
|
| 7 |
+
_word_re = re.compile(r"[A-Za-zÁÉÍÓÚÜÑáéíóúüñ]+", re.UNICODE)
|
| 8 |
+
|
| 9 |
+
def _tokenize_words(text):
|
| 10 |
+
return _word_re.findall(text)
|
| 11 |
+
|
| 12 |
+
def _count_sentences(text):
|
| 13 |
+
# Split on ., !, ?, and Spanish ¡¿ — keep it simple
|
| 14 |
+
parts = re.split(r"[.!?¡¿]+", text)
|
| 15 |
+
return max(1, sum(1 for p in parts if p.strip()))
|
| 16 |
+
|
| 17 |
+
def _count_syllables_es(word):
|
| 18 |
+
parts = _dic.hyphenate(word)
|
| 19 |
+
return (len(parts) + 1) if parts else 1
|
| 20 |
+
|
| 21 |
+
def _text_stats_es(text):
|
| 22 |
+
words = _tokenize_words(text)
|
| 23 |
+
W = len(words)
|
| 24 |
+
S = _count_sentences(text)
|
| 25 |
+
syl = sum(_count_syllables_es(w) for w in words) if W else 0
|
| 26 |
+
LW = sum(1 for w in words if len(w) > 6) # LIX long words (>6 chars)
|
| 27 |
+
return W, S, syl, LW
|
| 28 |
+
|
| 29 |
+
# --- Szigriszt–Pazos (INFLESZ) ---
|
| 30 |
+
def szigriszt_pazos(text):
|
| 31 |
+
W, S, syl, _ = _text_stats_es(text)
|
| 32 |
+
if W == 0 or S == 0:
|
| 33 |
+
return None
|
| 34 |
+
# Reading ease: higher = easier
|
| 35 |
+
return 206.835 - 62.3 * (syl / W) - (W / S)
|
| 36 |
+
|
| 37 |
+
# --- LIX (language-agnostic) ---
|
| 38 |
+
def lix(text):
|
| 39 |
+
W, S, _, LW = _text_stats_es(text)
|
| 40 |
+
if W == 0 or S == 0:
|
| 41 |
+
return None
|
| 42 |
+
return (W / S) + (100.0 * LW / W)
|
| 43 |
+
|
| 44 |
+
# Example bands (tune to your corpus)
|
| 45 |
+
SZ_BANDS = {
|
| 46 |
+
'B1': (65, 100), # easy to very easy
|
| 47 |
+
'B2': (55, 65), # normal
|
| 48 |
+
'B3': (40, 55), # somewhat hard
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
LIX_BANDS = {
|
| 52 |
+
'B1': (20, 35), # easier
|
| 53 |
+
'B2': (35, 45), # mid
|
| 54 |
+
'B3': (45, 60), # harder
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
def in_band(score, band, bands, delta=0.0):
|
| 58 |
+
if score is None:
|
| 59 |
+
return False
|
| 60 |
+
lo, hi = bands[band]
|
| 61 |
+
return (lo - delta) <= score <= (hi + delta)
|
| 62 |
+
|
| 63 |
+
# Example usage
|
| 64 |
+
text = "Las vacunas salvan millones de vidas cada año. Son seguras y eficaces."
|
| 65 |
+
sz = szigriszt_pazos(text)
|
| 66 |
+
lx = lix(text)
|
| 67 |
+
# print("Szigriszt:", sz, "B1?", in_band(sz, 'B1', SZ_BANDS, delta=2))
|
| 68 |
+
# print("LIX:", lx, "B1?", in_band(lx, 'B1', LIX_BANDS, delta=2))
|
code/rc.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import argparse
|
| 4 |
+
parser = argparse.ArgumentParser()
|
| 5 |
+
parser.add_argument("--g", type=str, default="2", help="GPU ID")
|
| 6 |
+
args = parser.parse_args()
|
| 7 |
+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
| 8 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = str(args.g)
|
| 9 |
+
|
| 10 |
+
import torch
|
| 11 |
+
import time
|
| 12 |
+
|
| 13 |
+
# Set the specific GPU device (change the index if it's not GPU 0; check with nvidia-smi)
|
| 14 |
+
# torch.cuda.set_device(0)
|
| 15 |
+
|
| 16 |
+
# Get total memory in bytes (should be around 85e9 for A100 80GB, but use reported value)
|
| 17 |
+
total_memory = torch.cuda.get_device_properties(0).total_memory
|
| 18 |
+
|
| 19 |
+
# List to hold allocated tensors
|
| 20 |
+
allocated_tensors = []
|
| 21 |
+
|
| 22 |
+
# Chunk size: Allocate in 4GB chunks to avoid fragmentation issues (adjust if needed)
|
| 23 |
+
chunk_size_bytes = 4 * 1024**3 # 4 GiB
|
| 24 |
+
chunk_elements = chunk_size_bytes // torch.tensor([], dtype=torch.float32).element_size()
|
| 25 |
+
|
| 26 |
+
try:
|
| 27 |
+
allocated = 0
|
| 28 |
+
while allocated < total_memory * 0.85: # Allocate up to 95% to leave some headroom
|
| 29 |
+
chunk = torch.empty(chunk_elements, dtype=torch.float32, device='cuda')
|
| 30 |
+
allocated_tensors.append(chunk)
|
| 31 |
+
allocated += chunk_size_bytes
|
| 32 |
+
# Optional: Touch the memory to force allocation
|
| 33 |
+
chunk.zero_()
|
| 34 |
+
torch.cuda.synchronize()
|
| 35 |
+
except RuntimeError as e:
|
| 36 |
+
if 'out of memory' in str(e).lower():
|
| 37 |
+
print(f"Allocated approximately {allocated / (1024**3):.2f} GB. Holding VRAM on A100.")
|
| 38 |
+
else:
|
| 39 |
+
raise e
|
| 40 |
+
|
| 41 |
+
# Hold the memory indefinitely
|
| 42 |
+
print("VRAM occupied. Running forever to hold it.")
|
| 43 |
+
while True:
|
| 44 |
+
time.sleep(3600) # Sleep 1 hour to minimize CPU usage; script will hold until killed
|
code/readability_final_res_process.ipynb
ADDED
|
@@ -0,0 +1,349 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": null,
|
| 6 |
+
"id": "30a7b117",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"import json\n",
|
| 11 |
+
"import os\n",
|
| 12 |
+
"\n",
|
| 13 |
+
"# Define the file paths\n",
|
| 14 |
+
"file_paths = [\n",
|
| 15 |
+
" '/home/mshahidul/readctrl/data/reasoning/refined_evaluated_support_0_100_qwen3-32B.json',\n",
|
| 16 |
+
" '/home/mshahidul/readctrl/data/reasoning/refined_evaluated_support_100_200_qwen3-32B.json',\n",
|
| 17 |
+
" '/home/mshahidul/readctrl/data/reasoning/refined_evaluated_support_200_300_qwen3-32B.json'\n",
|
| 18 |
+
"]\n",
|
| 19 |
+
"\n",
|
| 20 |
+
"merged_data = []\n",
|
| 21 |
+
"\n",
|
| 22 |
+
"# Loop through and append data\n",
|
| 23 |
+
"for file_path in file_paths:\n",
|
| 24 |
+
" if os.path.exists(file_path):\n",
|
| 25 |
+
" with open(file_path, 'r', encoding='utf-8') as f:\n",
|
| 26 |
+
" data = json.load(f)\n",
|
| 27 |
+
" # Assuming each file contains a list of objects\n",
|
| 28 |
+
" if isinstance(data, list):\n",
|
| 29 |
+
" merged_data.extend(data)\n",
|
| 30 |
+
" else:\n",
|
| 31 |
+
" merged_data.append(data)\n",
|
| 32 |
+
" print(f\"Successfully loaded: {file_path}\")\n",
|
| 33 |
+
" else:\n",
|
| 34 |
+
" print(f\"Warning: File not found: {file_path}\")\n",
|
| 35 |
+
"\n",
|
| 36 |
+
"# Save the merged result\n",
|
| 37 |
+
"output_path = '/home/mshahidul/readctrl/data/reasoning/refined_evaluated_support_merged_0_300_qwen3-32B.json'\n",
|
| 38 |
+
"with open(output_path, 'w', encoding='utf-8') as f:\n",
|
| 39 |
+
" json.dump(merged_data, f, indent=4)\n",
|
| 40 |
+
"\n",
|
| 41 |
+
"print(f\"\\nTotal records merged: {len(merged_data)}\")\n",
|
| 42 |
+
"print(f\"Merged file saved to: {output_path}\")"
|
| 43 |
+
]
|
| 44 |
+
},
|
| 45 |
+
{
|
| 46 |
+
"cell_type": "code",
|
| 47 |
+
"execution_count": null,
|
| 48 |
+
"id": "27ab3270",
|
| 49 |
+
"metadata": {},
|
| 50 |
+
"outputs": [],
|
| 51 |
+
"source": [
|
| 52 |
+
"import json\n",
|
| 53 |
+
"\n",
|
| 54 |
+
"# Define file paths\n",
|
| 55 |
+
"readability_path = '/home/mshahidul/readctrl/data/classified_readability/classified_multiclinsum_test_en.json'\n",
|
| 56 |
+
"reasoning_path = '/home/mshahidul/readctrl/data/reasoning/refined_evaluated_support_merged_0_300_qwen3-32B.json'\n",
|
| 57 |
+
"output_path = '/home/mshahidul/readctrl/data/reasoning/merged_readability_reasoning_en_final.json'\n",
|
| 58 |
+
"\n",
|
| 59 |
+
"# 1. Load the readability data and create a lookup map\n",
|
| 60 |
+
"with open(readability_path, 'r') as f:\n",
|
| 61 |
+
" readability_data = json.load(f)\n",
|
| 62 |
+
"\n",
|
| 63 |
+
"# Create a dictionary for O(1) lookup: {id: score}\n",
|
| 64 |
+
"readability_lookup = {item['id']: item['readability_score'] for item in readability_data}\n",
|
| 65 |
+
"\n",
|
| 66 |
+
"# 2. Load the reasoning data\n",
|
| 67 |
+
"with open(reasoning_path, 'r') as f:\n",
|
| 68 |
+
" reasoning_data = json.load(f)\n",
|
| 69 |
+
"\n",
|
| 70 |
+
"# 3. Merge the scores into the reasoning data\n",
|
| 71 |
+
"merged_count = 0\n",
|
| 72 |
+
"for entry in reasoning_data:\n",
|
| 73 |
+
" entry_id = entry.get('id')\n",
|
| 74 |
+
" if entry_id in readability_lookup:\n",
|
| 75 |
+
" # Add the score to the existing dictionary\n",
|
| 76 |
+
" entry['readability_score'] = readability_lookup[entry_id]\n",
|
| 77 |
+
" merged_count += 1\n",
|
| 78 |
+
" else:\n",
|
| 79 |
+
" # Optional: Handle cases where an ID is missing in the readability file\n",
|
| 80 |
+
" entry['readability_score'] = None\n",
|
| 81 |
+
"\n",
|
| 82 |
+
"# 4. Save the merged result\n",
|
| 83 |
+
"with open(output_path, 'w') as f:\n",
|
| 84 |
+
" json.dump(reasoning_data, f, indent=4)\n",
|
| 85 |
+
"\n",
|
| 86 |
+
"print(f\"Successfully merged {merged_count} records. Saved to {output_path}\")"
|
| 87 |
+
]
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"cell_type": "code",
|
| 91 |
+
"execution_count": 3,
|
| 92 |
+
"id": "2ef2e0b6",
|
| 93 |
+
"metadata": {},
|
| 94 |
+
"outputs": [
|
| 95 |
+
{
|
| 96 |
+
"name": "stdout",
|
| 97 |
+
"output_type": "stream",
|
| 98 |
+
"text": [
|
| 99 |
+
"Threshold set to: 90.0%\n",
|
| 100 |
+
"Successfully saved 192 records to: /home/mshahidul/readctrl/data/final_result/processed_threshold_results.json\n"
|
| 101 |
+
]
|
| 102 |
+
}
|
| 103 |
+
],
|
| 104 |
+
"source": [
|
| 105 |
+
"import json\n",
|
| 106 |
+
"import os\n",
|
| 107 |
+
"\n",
|
| 108 |
+
"# Configuration\n",
|
| 109 |
+
"input_file = '/home/mshahidul/readctrl/data/reasoning/merged_readability_reasoning_en_final.json'\n",
|
| 110 |
+
"output_dir = '/home/mshahidul/readctrl/data/final_result'\n",
|
| 111 |
+
"output_filename = 'processed_threshold_results.json'\n",
|
| 112 |
+
"\n",
|
| 113 |
+
"# Set your threshold here (e.g., 0.90 for 90%, 0.85 for 85%)\n",
|
| 114 |
+
"SUPPORT_THRESHOLD = 0.90 \n",
|
| 115 |
+
"\n",
|
| 116 |
+
"def process_with_threshold(threshold):\n",
|
| 117 |
+
" # Ensure the output folder exists\n",
|
| 118 |
+
" if not os.path.exists(output_dir):\n",
|
| 119 |
+
" os.makedirs(output_dir)\n",
|
| 120 |
+
"\n",
|
| 121 |
+
" # Load the source data\n",
|
| 122 |
+
" try:\n",
|
| 123 |
+
" with open(input_file, 'r') as f:\n",
|
| 124 |
+
" data = json.load(f)\n",
|
| 125 |
+
" except FileNotFoundError:\n",
|
| 126 |
+
" print(f\"Error: Source file not found at {input_file}\")\n",
|
| 127 |
+
" return\n",
|
| 128 |
+
"\n",
|
| 129 |
+
" final_output = []\n",
|
| 130 |
+
"\n",
|
| 131 |
+
" for item in data:\n",
|
| 132 |
+
" evals = item.get('subclaim_evaluations', [])\n",
|
| 133 |
+
" \n",
|
| 134 |
+
" if not evals:\n",
|
| 135 |
+
" continue # Skip items with no subclaims to evaluate\n",
|
| 136 |
+
" \n",
|
| 137 |
+
" # Calculate the percentage of supported subclaims\n",
|
| 138 |
+
" supported_count = sum(1 for sub in evals if sub.get('support_label') == 'supported')\n",
|
| 139 |
+
" support_ratio = supported_count / len(evals)\n",
|
| 140 |
+
" \n",
|
| 141 |
+
" # Keep if it meets the threshold (e.g., 0.90)\n",
|
| 142 |
+
" if support_ratio >= threshold:\n",
|
| 143 |
+
" clean_item = item.copy()\n",
|
| 144 |
+
" \n",
|
| 145 |
+
" # Map readability_score to difficulty\n",
|
| 146 |
+
" score = clean_item.get('readability_score', 0)\n",
|
| 147 |
+
" if score >= 4:\n",
|
| 148 |
+
" clean_item['difficulty'] = 'easy'\n",
|
| 149 |
+
" elif score == 3:\n",
|
| 150 |
+
" clean_item['difficulty'] = 'medium'\n",
|
| 151 |
+
" else:\n",
|
| 152 |
+
" clean_item['difficulty'] = 'hard'\n",
|
| 153 |
+
" \n",
|
| 154 |
+
" # Add metadata about the support ratio for transparency\n",
|
| 155 |
+
" clean_item['support_percentage'] = round(support_ratio * 100, 2)\n",
|
| 156 |
+
" \n",
|
| 157 |
+
" # Remove the subclaim_evaluations field\n",
|
| 158 |
+
" if 'subclaim_evaluations' in clean_item:\n",
|
| 159 |
+
" del clean_item['subclaim_evaluations']\n",
|
| 160 |
+
" \n",
|
| 161 |
+
" final_output.append(clean_item)\n",
|
| 162 |
+
"\n",
|
| 163 |
+
" # Save to a single JSON file\n",
|
| 164 |
+
" target_path = os.path.join(output_dir, output_filename)\n",
|
| 165 |
+
" with open(target_path, 'w', encoding='utf-8') as out_f:\n",
|
| 166 |
+
" json.dump(final_output, out_f, indent=4, ensure_ascii=False)\n",
|
| 167 |
+
" \n",
|
| 168 |
+
" print(f\"Threshold set to: {threshold * 100}%\")\n",
|
| 169 |
+
" print(f\"Successfully saved {len(final_output)} records to: {target_path}\")\n",
|
| 170 |
+
"\n",
|
| 171 |
+
"if __name__ == \"__main__\":\n",
|
| 172 |
+
" process_with_threshold(SUPPORT_THRESHOLD)"
|
| 173 |
+
]
|
| 174 |
+
},
|
| 175 |
+
{
|
| 176 |
+
"cell_type": "code",
|
| 177 |
+
"execution_count": 4,
|
| 178 |
+
"id": "295a4a2a",
|
| 179 |
+
"metadata": {},
|
| 180 |
+
"outputs": [
|
| 181 |
+
{
|
| 182 |
+
"name": "stdout",
|
| 183 |
+
"output_type": "stream",
|
| 184 |
+
"text": [
|
| 185 |
+
"Success! Merged data saved to: /home/mshahidul/readctrl/data/factual_testing/merged_evaluated_support_0_300.json\n"
|
| 186 |
+
]
|
| 187 |
+
}
|
| 188 |
+
],
|
| 189 |
+
"source": [
|
| 190 |
+
"import json\n",
|
| 191 |
+
"import os\n",
|
| 192 |
+
"\n",
|
| 193 |
+
"# List of file paths to merge\n",
|
| 194 |
+
"file_paths = [\n",
|
| 195 |
+
" '/home/mshahidul/readctrl/data/factual_testing/evaluated_support_0_100_qwen3-32B.json',\n",
|
| 196 |
+
" '/home/mshahidul/readctrl/data/factual_testing/evaluated_support_100_200_qwen3-32B.json',\n",
|
| 197 |
+
" '/home/mshahidul/readctrl/data/factual_testing/evaluated_support_200_300_qwen3-32B.json'\n",
|
| 198 |
+
"]\n",
|
| 199 |
+
"\n",
|
| 200 |
+
"merged_data = []\n",
|
| 201 |
+
"\n",
|
| 202 |
+
"# Iterate through each file and append its contents to the list\n",
|
| 203 |
+
"for file_path in file_paths:\n",
|
| 204 |
+
" if os.path.exists(file_path):\n",
|
| 205 |
+
" with open(file_path, 'r', encoding='utf-8') as f:\n",
|
| 206 |
+
" data = json.load(f)\n",
|
| 207 |
+
" # If the JSON is a list, extend the merged list\n",
|
| 208 |
+
" if isinstance(data, list):\n",
|
| 209 |
+
" merged_data.extend(data)\n",
|
| 210 |
+
" # If the JSON is a single dictionary, append it\n",
|
| 211 |
+
" else:\n",
|
| 212 |
+
" merged_data.append(data)\n",
|
| 213 |
+
" else:\n",
|
| 214 |
+
" print(f\"Warning: File not found - {file_path}\")\n",
|
| 215 |
+
"\n",
|
| 216 |
+
"# Save the combined data to a new file\n",
|
| 217 |
+
"output_file = '/home/mshahidul/readctrl/data/factual_testing/merged_evaluated_support_0_300.json'\n",
|
| 218 |
+
"\n",
|
| 219 |
+
"with open(output_file, 'w', encoding='utf-8') as f:\n",
|
| 220 |
+
" json.dump(merged_data, f, indent=4)\n",
|
| 221 |
+
"\n",
|
| 222 |
+
"print(f\"Success! Merged data saved to: {output_file}\")"
|
| 223 |
+
]
|
| 224 |
+
},
|
| 225 |
+
{
|
| 226 |
+
"cell_type": "code",
|
| 227 |
+
"execution_count": 8,
|
| 228 |
+
"id": "e7ba1534",
|
| 229 |
+
"metadata": {},
|
| 230 |
+
"outputs": [
|
| 231 |
+
{
|
| 232 |
+
"name": "stdout",
|
| 233 |
+
"output_type": "stream",
|
| 234 |
+
"text": [
|
| 235 |
+
"Updating scores for 100 documents...\n",
|
| 236 |
+
"Successfully updated scores for 100 documents.\n",
|
| 237 |
+
"File saved to: /home/mshahidul/readctrl/data/reasoning/updated_scores/refined_v2_full_evaluation_200_300_qwen3-32B.json\n"
|
| 238 |
+
]
|
| 239 |
+
}
|
| 240 |
+
],
|
| 241 |
+
"source": [
|
| 242 |
+
"import json\n",
|
| 243 |
+
"import argparse\n",
|
| 244 |
+
"import os\n",
|
| 245 |
+
"\n",
|
| 246 |
+
"def calculate_scores(data):\n",
|
| 247 |
+
" \"\"\"\n",
|
| 248 |
+
" Recalculates factual_attribution and completeness scores based on \n",
|
| 249 |
+
" the updated labels in attribution_details and completeness_details.\n",
|
| 250 |
+
" \"\"\"\n",
|
| 251 |
+
" updated_count = 0\n",
|
| 252 |
+
"\n",
|
| 253 |
+
" for doc in data:\n",
|
| 254 |
+
" # 1. Recalculate Factual Attribution Score\n",
|
| 255 |
+
" attribution_list = doc.get('attribution_details', [])\n",
|
| 256 |
+
" if attribution_list:\n",
|
| 257 |
+
" supported_attr = sum(1 for item in attribution_list if item.get('label') == 'supported')\n",
|
| 258 |
+
" doc['scores']['factual_attribution'] = supported_attr / len(attribution_list)\n",
|
| 259 |
+
" else:\n",
|
| 260 |
+
" doc['scores']['factual_attribution'] = 0.0\n",
|
| 261 |
+
"\n",
|
| 262 |
+
" # 2. Recalculate Completeness Score\n",
|
| 263 |
+
" completeness_list = doc.get('completeness_details', [])\n",
|
| 264 |
+
" if completeness_list:\n",
|
| 265 |
+
" supported_comp = sum(1 for item in completeness_list if item.get('present_in_summary') == 'supported')\n",
|
| 266 |
+
" doc['scores']['completeness'] = supported_comp / len(completeness_list)\n",
|
| 267 |
+
" else:\n",
|
| 268 |
+
" doc['scores']['completeness'] = 0.0\n",
|
| 269 |
+
" \n",
|
| 270 |
+
" updated_count += 1\n",
|
| 271 |
+
"\n",
|
| 272 |
+
" return data, updated_count\n",
|
| 273 |
+
"\n",
|
| 274 |
+
"if __name__ == \"__main__\":\n",
|
| 275 |
+
" # parser = argparse.ArgumentParser(description=\"Update scores in refined clinical evaluation JSON.\")\n",
|
| 276 |
+
" # parser.add_argument(\"--input_file\", type=str, required=True, help=\"Path to the refined JSON file.\")\n",
|
| 277 |
+
" # parser.add_argument(\"--output_file\", type=str, help=\"Path to save the updated JSON. If omitted, overwrites input.\")\n",
|
| 278 |
+
" # args = parser.parse_args()\n",
|
| 279 |
+
" input_file = '/home/mshahidul/readctrl/data/reasoning/refined_v2_full_evaluation_200_300_qwen3-32B.json'\n",
|
| 280 |
+
" output_path = \"/home/mshahidul/readctrl/data/reasoning/updated_scores\"\n",
|
| 281 |
+
" output_file = os.path.join(output_path, os.path.basename(input_file))\n",
|
| 282 |
+
" # Load data\n",
|
| 283 |
+
" with open(input_file, 'r') as f:\n",
|
| 284 |
+
" data = json.load(f)\n",
|
| 285 |
+
"\n",
|
| 286 |
+
" print(f\"Updating scores for {len(data)} documents...\")\n",
|
| 287 |
+
" \n",
|
| 288 |
+
" # Process\n",
|
| 289 |
+
" updated_data, count = calculate_scores(data)\n",
|
| 290 |
+
"\n",
|
| 291 |
+
" \n",
|
| 292 |
+
" \n",
|
| 293 |
+
" # Save results\n",
|
| 294 |
+
" with open(output_file, 'w') as f:\n",
|
| 295 |
+
" json.dump(updated_data, f, indent=2, ensure_ascii=False)\n",
|
| 296 |
+
"\n",
|
| 297 |
+
" print(f\"Successfully updated scores for {count} documents.\")\n",
|
| 298 |
+
" print(f\"File saved to: {output_file}\")"
|
| 299 |
+
]
|
| 300 |
+
},
|
| 301 |
+
{
|
| 302 |
+
"cell_type": "code",
|
| 303 |
+
"execution_count": 12,
|
| 304 |
+
"id": "612109dc",
|
| 305 |
+
"metadata": {},
|
| 306 |
+
"outputs": [
|
| 307 |
+
{
|
| 308 |
+
"name": "stdout",
|
| 309 |
+
"output_type": "stream",
|
| 310 |
+
"text": [
|
| 311 |
+
"dict_keys(['index', 'id', 'fulltext', 'fulltext_subclaims', 'summary', 'summary_subclaims', 'diff_label_texts', 'diff_label_subclaims', 'readability_score'])\n",
|
| 312 |
+
"dict_keys(['low_health_literacy', 'intermediate_health_literacy', 'proficient_health_literacy'])\n",
|
| 313 |
+
"dict_keys(['low_health_literacy', 'intermediate_health_literacy', 'proficient_health_literacy'])\n"
|
| 314 |
+
]
|
| 315 |
+
}
|
| 316 |
+
],
|
| 317 |
+
"source": [
|
| 318 |
+
"# /home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_syn_data_with_gs_summary_en.json\n",
|
| 319 |
+
"import json\n",
|
| 320 |
+
"with open('/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_syn_data_with_gs_summary_en.json', 'r') as f:\n",
|
| 321 |
+
" anno_data = json.load(f)\n",
|
| 322 |
+
"print(anno_data[0].keys())\n",
|
| 323 |
+
"print(anno_data[0]['diff_label_texts'].keys())\n",
|
| 324 |
+
"print(anno_data[0]['diff_label_subclaims'].keys())"
|
| 325 |
+
]
|
| 326 |
+
}
|
| 327 |
+
],
|
| 328 |
+
"metadata": {
|
| 329 |
+
"kernelspec": {
|
| 330 |
+
"display_name": "un",
|
| 331 |
+
"language": "python",
|
| 332 |
+
"name": "python3"
|
| 333 |
+
},
|
| 334 |
+
"language_info": {
|
| 335 |
+
"codemirror_mode": {
|
| 336 |
+
"name": "ipython",
|
| 337 |
+
"version": 3
|
| 338 |
+
},
|
| 339 |
+
"file_extension": ".py",
|
| 340 |
+
"mimetype": "text/x-python",
|
| 341 |
+
"name": "python",
|
| 342 |
+
"nbconvert_exporter": "python",
|
| 343 |
+
"pygments_lexer": "ipython3",
|
| 344 |
+
"version": "3.11.14"
|
| 345 |
+
}
|
| 346 |
+
},
|
| 347 |
+
"nbformat": 4,
|
| 348 |
+
"nbformat_minor": 5
|
| 349 |
+
}
|
code/test.ipynb
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": null,
|
| 6 |
+
"id": "25745a03",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"# /home/mshahidul/readctrl/data/translated_data/translation_wo_judge/multiclinsum_gs_train_en2bn_gemma(0_200).json\n",
|
| 11 |
+
"import json\n",
|
| 12 |
+
"with open(\"/home/mshahidul/readctrl/data/translated_data/translation_wo_judge/multiclinsum_gs_train_en2bn_gemma(0_200).json\", \"r\") as f:\n",
|
| 13 |
+
" data = json.load(f)\n",
|
| 14 |
+
"\n",
|
| 15 |
+
"for item in data:\n",
|
| 16 |
+
" \n",
|
| 17 |
+
"\n",
|
| 18 |
+
"\n"
|
| 19 |
+
]
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"cell_type": "code",
|
| 23 |
+
"execution_count": 6,
|
| 24 |
+
"id": "a170a10b",
|
| 25 |
+
"metadata": {},
|
| 26 |
+
"outputs": [
|
| 27 |
+
{
|
| 28 |
+
"data": {
|
| 29 |
+
"text/plain": [
|
| 30 |
+
"'14-year-old previously healthy adolescent who presented to the Primary Emergency Care Service (PEC) of Osorno with a 11-day history of a predominantly nocturnal irritative cough. Symptomatic treatment was indicated, evolving with dyspnoea and orthopnoea. He presented to the Emergency Department of the Osorno Base Hospital (OBH), with severe respiratory distress, intolerance to supine position, and abdominal pain. He was admitted to the Paediatric Intensive Care Unit (PICU), tachycardic, hypertensive, polypneic, oxygen saturation 96% with FiO2 35%, rosy, hydrated and well perfused, with flat jugular veins, small bilateral supraclavicular lymphadenopathies. The thorax was without retraction of soft tissue, maintained in a genupectoral position, with decreased pulmonary murmurs in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The soft abdomen was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and the cardiac auscultation had muffled tones, without breath sounds. The abdominal soft tissue was not easily depressible and sensitive in both hypochondria, with doubtful visceral enlargements and no injuries. The chest radiograph showed a superior mediastinal mass and atelectasis of the right middle lobe associated with ipsilateral pleural effusion. Contrast-enhanced chest X-ray was not performed due to contraindication of anaesthesia, as stated in the summary of transfer from OBH. He was transferred in a serious condition to the PICU HBV, with a Mediastinal Compression Syndrome, with clinical suspicion of non-Hodgkin lymphoma. He was evaluated by the paediatric haemato-oncology, paediatric surgery, paediatric intensive care, imaging, radiotherapy and paediatric oncology teams, with a normal pulmonary murmur in both bases, and\\n\\nA nephrological evaluation was performed, which confirmed renal failure secondary to tumor lysis syndrome, without dialysis urgency and tendency to hypertension, with creatinine 1.54 mg/dL, phosphemia 11 mg/dL, without hypernatremia. It continued with hyperhydration, diuretic (furosemide) and antihypertensive (amlodipine). From the respiratory point of view, it presented oxygen requirement, with FIO2 35% by mask of Venturi, suspending this supply on the third day of admission. It evolved with episodes of psychomotor agitation, associated to the diagnosis in process, which was treated according to the institutional protocol of psychomotor agitation, with psychological and psychiatric support, with satisfactory evolution. On the third day of admission and treatment a CT scan of the thorax, abdomen and pelvis was performed with contrast, observing an increase in the size of the thymus, of homogeneous aspect, probably in the context of a lymphoproliferative process and findings suggestive of pulmonary thromboembolism. The angioCT of the thorax showed thrombosis of the jugular vein, extensive bilateral pleural effusion associated to atelectatic phenomena in both bases, with signs of medical bilateral nephrosis. Anticoagulation with enoxaparin (1 mg/kg dose, every 12 hours) was indicated for twenty days. Then the angioCT of control showed resolution of the thrombosis.On the fourth day of admission and treatment, a diagnostic and extension study was performed, which included, among others, a complete biochemical profile including lipid profile, granulopoietic hyperplasia of the bone marrow (myelogram), flow cytometry (bone marrow) in which no cells with a predominant clonal or neoplastic immunophenotype of haemological lineage were observed, flow cytometry in peripheral blood negative for neoplastic cells, cytological of pleural fluid negative for neoplastic cells, flow cytometry of pleural fluid without evidence of haemological neoplasia. It was presented to the paediatric oncological committee, highlighting that it was not possible to take a biopsy of the tumour given that the mediastinal mass disappeared with the cytoreductive treatment, assuming the diagnosis of lymphoblastic lymphoma by the clinical picture and the response to treatment, according to the PINDA 0516 protocol. This protocol contemplates in Induction IA eight doses of Lasp E. coli of 10,000 IU/m2. Having received seven doses of L-asp and with a cumulative dose of ninety thousand international units plus glucocorticoid (prednisone), presented a picture of decline, vomiting, abdominal pain and mild dehydration. There was suspicion of pancreatitis, which was ruled out by normal amylase/lipase values and normal hepatic tests. At that time it had plasma electrolyte profile with hyponatraemia of 126 mOsm/kg and urinary osmolality of 510 mOsm/kg, both normal values. With hyponatraemia and hypertriglyceridaemia, there was suspicion of RAM of pseudohyponatraemia secondary to hypertriglyceridaemia associated to L-asp. It was evaluated by Gastroenterology and Endocrinology, indicating a diet low in refined sugars and rich in fiber, fibrates (ciprofibrato 100 mg oral daily) and omega 3 (4 g oral daily), until triglyceride values of 300 mg/dL were achieved. Two weeks later the triglycerides had a value of 79 mg/dL. Ciprofibrato and omega3 were suspended, indicating prophylactic use associated to corticoid and L-asp treatment. A total of twelve doses of L-asp were completed with a cumulative dose of one hundred and eighty four thousand international units corresponding to the induction protocol. The suspicion of RAM was subjected to causality evaluation, with the modified Karch and Lasagna algorithm by WHO5, which resulted in “Definitive” RAM for the association of L-asp and Prednisone\\n'"
|
| 31 |
+
]
|
| 32 |
+
},
|
| 33 |
+
"execution_count": 6,
|
| 34 |
+
"metadata": {},
|
| 35 |
+
"output_type": "execute_result"
|
| 36 |
+
}
|
| 37 |
+
],
|
| 38 |
+
"source": [
|
| 39 |
+
"txt"
|
| 40 |
+
]
|
| 41 |
+
}
|
| 42 |
+
],
|
| 43 |
+
"metadata": {
|
| 44 |
+
"kernelspec": {
|
| 45 |
+
"display_name": "unsloth",
|
| 46 |
+
"language": "python",
|
| 47 |
+
"name": "python3"
|
| 48 |
+
},
|
| 49 |
+
"language_info": {
|
| 50 |
+
"codemirror_mode": {
|
| 51 |
+
"name": "ipython",
|
| 52 |
+
"version": 3
|
| 53 |
+
},
|
| 54 |
+
"file_extension": ".py",
|
| 55 |
+
"mimetype": "text/x-python",
|
| 56 |
+
"name": "python",
|
| 57 |
+
"nbconvert_exporter": "python",
|
| 58 |
+
"pygments_lexer": "ipython3",
|
| 59 |
+
"version": "3.11.11"
|
| 60 |
+
}
|
| 61 |
+
},
|
| 62 |
+
"nbformat": 4,
|
| 63 |
+
"nbformat_minor": 5
|
| 64 |
+
}
|
code/text_classifier/dspy.ipynb
ADDED
|
@@ -0,0 +1,224 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"cells": [
|
| 3 |
+
{
|
| 4 |
+
"cell_type": "code",
|
| 5 |
+
"execution_count": null,
|
| 6 |
+
"id": "8a9d70f0",
|
| 7 |
+
"metadata": {},
|
| 8 |
+
"outputs": [],
|
| 9 |
+
"source": [
|
| 10 |
+
"import dspy\n",
|
| 11 |
+
"import json\n",
|
| 12 |
+
"from typing import Literal\n",
|
| 13 |
+
"from dspy.teleprompt import BootstrapFewShotWithRandomSearch\n",
|
| 14 |
+
"from dspy.evaluate import Evaluate\n",
|
| 15 |
+
"\n",
|
| 16 |
+
"# --- 1. LLM Configuration ---\n",
|
| 17 |
+
"api_file = \"/home/mshahidul/api_new.json\"\n",
|
| 18 |
+
"with open(api_file, \"r\") as f:\n",
|
| 19 |
+
" api_keys = json.load(f)\n",
|
| 20 |
+
"openai_api_key = api_keys[\"openai\"]\n",
|
| 21 |
+
"\n",
|
| 22 |
+
"# Student: Local vLLM (Deployment Model)\n",
|
| 23 |
+
"vllm_model = dspy.LM(\n",
|
| 24 |
+
" model='Qwen/Qwen3-30B-A3B-Instruct-2507',\n",
|
| 25 |
+
" api_base=\"http://172.16.34.29:8030/v1\",\n",
|
| 26 |
+
" api_key=\"EMPTY\",\n",
|
| 27 |
+
" temperature=0.0\n",
|
| 28 |
+
")\n",
|
| 29 |
+
"\n",
|
| 30 |
+
"# Teacher: OpenAI (High-quality rationale generation)\n",
|
| 31 |
+
"# Note: Ensure 'gpt-5' is the correct model name in your environment (usually 'gpt-4-turbo' or 'gpt-4o')\n",
|
| 32 |
+
"openai_model_teacher = dspy.LM(model='gpt-5', api_key=openai_api_key)\n",
|
| 33 |
+
"openai_model_student = dspy.LM(model='gpt-5-mini', api_key=openai_api_key)\n",
|
| 34 |
+
"\n",
|
| 35 |
+
"# Default LM for DSPy runtime\n",
|
| 36 |
+
"# Use the local vLLM for fast iteration; switch to openai_model_student if needed.\n",
|
| 37 |
+
"# dspy.configure(lm=vllm_model)\n",
|
| 38 |
+
"dspy.configure(lm=openai_model_student)"
|
| 39 |
+
]
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"cell_type": "code",
|
| 43 |
+
"execution_count": null,
|
| 44 |
+
"id": "0f350ef4",
|
| 45 |
+
"metadata": {},
|
| 46 |
+
"outputs": [],
|
| 47 |
+
"source": [
|
| 48 |
+
"class HealthLiteracySignature(dspy.Signature):\n",
|
| 49 |
+
" \"\"\"\n",
|
| 50 |
+
" Classify the health literacy level of a generated text \n",
|
| 51 |
+
" based on the original full source text.\n",
|
| 52 |
+
" \"\"\"\n",
|
| 53 |
+
" full_text = dspy.InputField(desc=\"The original clinical or source medical text.\")\n",
|
| 54 |
+
" generated_text = dspy.InputField(desc=\"The rewritten medical text to classify for health literacy based on the original source text.\")\n",
|
| 55 |
+
" \n",
|
| 56 |
+
" # Using Literal ensures the output is constrained to your three categories\n",
|
| 57 |
+
" literacy_label = dspy.OutputField(desc=\"One of: low_health_literacy, intermediate_health_literacy, proficient_health_literacy\")"
|
| 58 |
+
]
|
| 59 |
+
},
|
| 60 |
+
{
|
| 61 |
+
"cell_type": "code",
|
| 62 |
+
"execution_count": null,
|
| 63 |
+
"id": "e369f8e8",
|
| 64 |
+
"metadata": {},
|
| 65 |
+
"outputs": [],
|
| 66 |
+
"source": [
|
| 67 |
+
"class HealthLiteracyClassifier(dspy.Module):\n",
|
| 68 |
+
" def __init__(self):\n",
|
| 69 |
+
" super().__init__()\n",
|
| 70 |
+
" # Use ChainOfThought for better reasoning on medical jargon\n",
|
| 71 |
+
" self.classifier = dspy.ChainOfThought(HealthLiteracySignature)\n",
|
| 72 |
+
"\n",
|
| 73 |
+
" def forward(self, full_text, generated_text):\n",
|
| 74 |
+
" return self.classifier(full_text=full_text, generated_text=generated_text)"
|
| 75 |
+
]
|
| 76 |
+
},
|
| 77 |
+
{
|
| 78 |
+
"cell_type": "code",
|
| 79 |
+
"execution_count": null,
|
| 80 |
+
"id": "055542d5",
|
| 81 |
+
"metadata": {},
|
| 82 |
+
"outputs": [],
|
| 83 |
+
"source": [
|
| 84 |
+
"def prepare_data(raw_data):\n",
|
| 85 |
+
" dataset = []\n",
|
| 86 |
+
" for item in raw_data:\n",
|
| 87 |
+
" example = dspy.Example(\n",
|
| 88 |
+
" full_text=item['fulltext'],\n",
|
| 89 |
+
" generated_text=item['diff_label_texts'],\n",
|
| 90 |
+
" literacy_label=item['label'] # Matches the Signature field\n",
|
| 91 |
+
" ).with_inputs('full_text', 'generated_text')\n",
|
| 92 |
+
" dataset.append(example)\n",
|
| 93 |
+
" return dataset[:100], dataset[100:]"
|
| 94 |
+
]
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"cell_type": "code",
|
| 98 |
+
"execution_count": null,
|
| 99 |
+
"id": "e570be47",
|
| 100 |
+
"metadata": {},
|
| 101 |
+
"outputs": [],
|
| 102 |
+
"source": [
|
| 103 |
+
"import json\n",
|
| 104 |
+
"path = \"/home/mshahidul/readctrl/code/text_classifier/verified_combined_0-80.json\"\n",
|
| 105 |
+
"raw_data = json.load(open(path))\n",
|
| 106 |
+
"trainset, testset = prepare_data(raw_data)"
|
| 107 |
+
]
|
| 108 |
+
},
|
| 109 |
+
{
|
| 110 |
+
"cell_type": "code",
|
| 111 |
+
"execution_count": null,
|
| 112 |
+
"id": "39e90da8",
|
| 113 |
+
"metadata": {},
|
| 114 |
+
"outputs": [],
|
| 115 |
+
"source": [
|
| 116 |
+
"def health_literacy_metric(gold, pred, trace=None):\n",
|
| 117 |
+
" # Use 'literacy_label' because that is what's in your Signature\n",
|
| 118 |
+
" if not pred or not hasattr(pred, 'literacy_label'):\n",
|
| 119 |
+
" return False\n",
|
| 120 |
+
" \n",
|
| 121 |
+
" # Standardize both for comparison\n",
|
| 122 |
+
" gold_label = str(gold.literacy_label).strip().lower()\n",
|
| 123 |
+
" pred_label = str(pred.literacy_label).strip().lower()\n",
|
| 124 |
+
" \n",
|
| 125 |
+
" return gold_label == pred_label\n",
|
| 126 |
+
"\n",
|
| 127 |
+
"optimizer = BootstrapFewShotWithRandomSearch(\n",
|
| 128 |
+
" metric=health_literacy_metric,\n",
|
| 129 |
+
" max_bootstrapped_demos=3,\n",
|
| 130 |
+
" num_candidate_programs=8, \n",
|
| 131 |
+
" teacher_settings=dict(lm=openai_model_teacher)\n",
|
| 132 |
+
")\n",
|
| 133 |
+
"\n",
|
| 134 |
+
"# 3. Compile! This creates the \"optimized prompt\"\n",
|
| 135 |
+
"compiled_classifier = optimizer.compile(HealthLiteracyClassifier(), trainset=trainset)\n",
|
| 136 |
+
"\n",
|
| 137 |
+
"evaluator = Evaluate(devset=testset, metric=health_literacy_metric, num_threads=1, display_progress=True)\n",
|
| 138 |
+
"accuracy_score = evaluator(compiled_classifier)\n",
|
| 139 |
+
"compiled_classifier.save(\"health_literacy_model.json\")"
|
| 140 |
+
]
|
| 141 |
+
},
|
| 142 |
+
{
|
| 143 |
+
"cell_type": "markdown",
|
| 144 |
+
"id": "425291ff",
|
| 145 |
+
"metadata": {},
|
| 146 |
+
"source": [
|
| 147 |
+
"## "
|
| 148 |
+
]
|
| 149 |
+
},
|
| 150 |
+
{
|
| 151 |
+
"cell_type": "code",
|
| 152 |
+
"execution_count": 9,
|
| 153 |
+
"id": "f8ae33e8",
|
| 154 |
+
"metadata": {},
|
| 155 |
+
"outputs": [
|
| 156 |
+
{
|
| 157 |
+
"name": "stdout",
|
| 158 |
+
"output_type": "stream",
|
| 159 |
+
"text": [
|
| 160 |
+
"vllm-gpt-oss-20b_teacher-gpt5_v1\n",
|
| 161 |
+
"{'accuracy_score': 78.57, 'num_results': 84}\n",
|
| 162 |
+
"vllm-gemma-3-12b-it_teacher-gpt5_v1\n",
|
| 163 |
+
"{'accuracy_score': 79.76, 'num_results': 84}\n",
|
| 164 |
+
"vllm-Qwen2.5-7B-Instruct_teacher-gpt5_v1\n",
|
| 165 |
+
"{'accuracy_score': 59.52, 'num_results': 84}\n",
|
| 166 |
+
"student-gpt5-mini_teacher-gpt5_(fulltxt+gen_sum)\n",
|
| 167 |
+
"{'score': 88.1, 'results': 84}\n",
|
| 168 |
+
"vllm-Meta-Llama-3.1-8B-Instruct_teacher-gpt5_v1\n",
|
| 169 |
+
"{'accuracy_score': 78.57, 'num_results': 84}\n",
|
| 170 |
+
"vllm-phi-4_teacher-gpt5_v1\n",
|
| 171 |
+
"{'accuracy_score': 73.81, 'num_results': 84}\n",
|
| 172 |
+
"vllm-qwen3-8b_teacher-gpt5_v1\n",
|
| 173 |
+
"{'accuracy_score': 73.81, 'num_results': 84}\n",
|
| 174 |
+
"student-gpt5-mini_teacher-gpt5_v1\n",
|
| 175 |
+
"{'accuracy_score': 78.57, 'num_results': 84}\n"
|
| 176 |
+
]
|
| 177 |
+
}
|
| 178 |
+
],
|
| 179 |
+
"source": [
|
| 180 |
+
"# /home/mshahidul/readctrl/code/text_classifier/dspy_model\n",
|
| 181 |
+
"import os,json\n",
|
| 182 |
+
"folders = os.listdir(\"/home/mshahidul/readctrl/code/text_classifier/dspy_model\")\n",
|
| 183 |
+
"for folder in folders:\n",
|
| 184 |
+
" if os.path.isdir(f\"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder}\"):\n",
|
| 185 |
+
" files = os.listdir(f\"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder}\")\n",
|
| 186 |
+
" for file in files:\n",
|
| 187 |
+
" if file.endswith(\"accuracy.json\"):\n",
|
| 188 |
+
" path=(f\"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder}/{file}\")\n",
|
| 189 |
+
" print(path.split(\"/\")[-2])\n",
|
| 190 |
+
" data = json.load(open(f\"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder}/{file}\"))\n",
|
| 191 |
+
" print(data)\n"
|
| 192 |
+
]
|
| 193 |
+
},
|
| 194 |
+
{
|
| 195 |
+
"cell_type": "code",
|
| 196 |
+
"execution_count": null,
|
| 197 |
+
"id": "4c236110",
|
| 198 |
+
"metadata": {},
|
| 199 |
+
"outputs": [],
|
| 200 |
+
"source": []
|
| 201 |
+
}
|
| 202 |
+
],
|
| 203 |
+
"metadata": {
|
| 204 |
+
"kernelspec": {
|
| 205 |
+
"display_name": "unsloth",
|
| 206 |
+
"language": "python",
|
| 207 |
+
"name": "python3"
|
| 208 |
+
},
|
| 209 |
+
"language_info": {
|
| 210 |
+
"codemirror_mode": {
|
| 211 |
+
"name": "ipython",
|
| 212 |
+
"version": 3
|
| 213 |
+
},
|
| 214 |
+
"file_extension": ".py",
|
| 215 |
+
"mimetype": "text/x-python",
|
| 216 |
+
"name": "python",
|
| 217 |
+
"nbconvert_exporter": "python",
|
| 218 |
+
"pygments_lexer": "ipython3",
|
| 219 |
+
"version": "3.11.11"
|
| 220 |
+
}
|
| 221 |
+
},
|
| 222 |
+
"nbformat": 4,
|
| 223 |
+
"nbformat_minor": 5
|
| 224 |
+
}
|
code/text_classifier/qwen3_(4b)_instruct.py
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
|
| 6 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
|
| 7 |
+
from datasets import load_dataset
|
| 8 |
+
from unsloth import FastLanguageModel
|
| 9 |
+
from trl import SFTConfig, SFTTrainer
|
| 10 |
+
|
| 11 |
+
from unsloth.chat_templates import get_chat_template, train_on_responses_only
|
| 12 |
+
|
| 13 |
+
MODEL_NAME = "unsloth/Qwen3-8B"
|
| 14 |
+
DATA_PATH = "verified_combined_0-80.json"
|
| 15 |
+
TEST_DATA_PATH = "verified_combined_0-80_test.json"
|
| 16 |
+
MAX_SEQ_LENGTH = 4096
|
| 17 |
+
FP16_SAVE_DIR = "/home/mshahidul/readctrl_model/full_model/classifier_model"
|
| 18 |
+
TEST_SPLIT_RATIO = 0.1
|
| 19 |
+
SPLIT_SEED = 3407
|
| 20 |
+
|
| 21 |
+
SYSTEM_PROMPT = (
|
| 22 |
+
"You are an expert medical editor and Health Literacy specialist. "
|
| 23 |
+
"Classify the health literacy level of the provided text."
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
USER_PROMPT = """Classify the health literacy level of the rewritten text.
|
| 27 |
+
|
| 28 |
+
Labels:
|
| 29 |
+
- low_health_literacy: very simple, living-room language, minimal jargon.
|
| 30 |
+
- intermediate_health_literacy: standard public-friendly language, limited jargon.
|
| 31 |
+
- proficient_health_literacy: technical, clinical, or academic language.
|
| 32 |
+
|
| 33 |
+
Input:
|
| 34 |
+
Full Source Text:
|
| 35 |
+
<<<FULLTEXT>>>
|
| 36 |
+
|
| 37 |
+
Rewritten Text:
|
| 38 |
+
<<<DIFF_LABEL_TEXTS>>>
|
| 39 |
+
|
| 40 |
+
Output: Return only one label string from the list above."""
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def build_messages(fulltext: str, diff_label_texts: str, label: str):
|
| 44 |
+
user_content = USER_PROMPT.replace("<<<FULLTEXT>>>", fulltext).replace(
|
| 45 |
+
"<<<DIFF_LABEL_TEXTS>>>", diff_label_texts
|
| 46 |
+
)
|
| 47 |
+
return [
|
| 48 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 49 |
+
{"role": "user", "content": user_content},
|
| 50 |
+
{"role": "assistant", "content": label},
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def main():
|
| 55 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 56 |
+
model_name=MODEL_NAME,
|
| 57 |
+
max_seq_length=MAX_SEQ_LENGTH,
|
| 58 |
+
load_in_4bit=False,
|
| 59 |
+
load_in_8bit=False,
|
| 60 |
+
full_finetuning=False,
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
model = FastLanguageModel.get_peft_model(
|
| 64 |
+
model,
|
| 65 |
+
r=32,
|
| 66 |
+
target_modules=[
|
| 67 |
+
"q_proj",
|
| 68 |
+
"k_proj",
|
| 69 |
+
"v_proj",
|
| 70 |
+
"o_proj",
|
| 71 |
+
"gate_proj",
|
| 72 |
+
"up_proj",
|
| 73 |
+
"down_proj",
|
| 74 |
+
],
|
| 75 |
+
lora_alpha=32,
|
| 76 |
+
lora_dropout=0,
|
| 77 |
+
bias="none",
|
| 78 |
+
use_gradient_checkpointing="unsloth",
|
| 79 |
+
random_state=3407,
|
| 80 |
+
use_rslora=False,
|
| 81 |
+
loftq_config=None,
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
tokenizer = get_chat_template(tokenizer, chat_template="qwen3-instruct")
|
| 85 |
+
dataset = load_dataset("json", data_files=DATA_PATH, split="train")
|
| 86 |
+
split = dataset.train_test_split(test_size=TEST_SPLIT_RATIO, seed=SPLIT_SEED)
|
| 87 |
+
train_dataset = split["train"]
|
| 88 |
+
test_dataset = split["test"]
|
| 89 |
+
test_dataset.to_json(TEST_DATA_PATH)
|
| 90 |
+
|
| 91 |
+
def formatting_prompts_func(examples):
|
| 92 |
+
texts = []
|
| 93 |
+
for fulltext, diff_label_texts, label in zip(
|
| 94 |
+
examples["fulltext"],
|
| 95 |
+
examples["diff_label_texts"],
|
| 96 |
+
examples["label"],
|
| 97 |
+
):
|
| 98 |
+
messages = build_messages(fulltext, diff_label_texts, label)
|
| 99 |
+
text = tokenizer.apply_chat_template(
|
| 100 |
+
messages, tokenize=False, add_generation_prompt=False
|
| 101 |
+
)
|
| 102 |
+
texts.append(text)
|
| 103 |
+
return {"text": texts}
|
| 104 |
+
|
| 105 |
+
train_dataset = train_dataset.map(formatting_prompts_func, batched=True)
|
| 106 |
+
|
| 107 |
+
trainer = SFTTrainer(
|
| 108 |
+
model=model,
|
| 109 |
+
processing_class=tokenizer,
|
| 110 |
+
train_dataset=train_dataset,
|
| 111 |
+
eval_dataset=None,
|
| 112 |
+
args=SFTConfig(
|
| 113 |
+
dataset_text_field="text",
|
| 114 |
+
per_device_train_batch_size=64,
|
| 115 |
+
gradient_accumulation_steps=16,
|
| 116 |
+
warmup_steps=5,
|
| 117 |
+
# max_steps=60,
|
| 118 |
+
num_train_epochs=1,
|
| 119 |
+
learning_rate=2e-4,
|
| 120 |
+
logging_steps=1,
|
| 121 |
+
optim="adamw_8bit",
|
| 122 |
+
weight_decay=0.001,
|
| 123 |
+
lr_scheduler_type="linear",
|
| 124 |
+
seed=3407,
|
| 125 |
+
report_to="none",
|
| 126 |
+
),
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
trainer = train_on_responses_only(
|
| 130 |
+
trainer,
|
| 131 |
+
instruction_part="<|im_start|>user\n",
|
| 132 |
+
response_part="<|im_start|>assistant\n",
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
trainer.train()
|
| 136 |
+
|
| 137 |
+
os.makedirs(FP16_SAVE_DIR, exist_ok=True)
|
| 138 |
+
model.save_pretrained_merged(
|
| 139 |
+
FP16_SAVE_DIR,
|
| 140 |
+
tokenizer,
|
| 141 |
+
save_method="merged_16bit",
|
| 142 |
+
)
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
if __name__ == "__main__":
|
| 146 |
+
main()
|
code/text_classifier/test_saved_dspy_vllm_gen_text_only.py
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import traceback
|
| 5 |
+
import urllib.error
|
| 6 |
+
import urllib.request
|
| 7 |
+
|
| 8 |
+
import dspy
|
| 9 |
+
from dspy.evaluate import Evaluate
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
DEFAULT_API_BASE = "http://172.16.34.22:8040/v1"
|
| 13 |
+
DEFAULT_MODEL_PATH = (
|
| 14 |
+
"/home/mshahidul/readctrl/code/text_classifier/dspy_model/vllm-Meta-Llama-3.1-8B-Instruct_teacher-gpt5_v1/model.json"
|
| 15 |
+
)
|
| 16 |
+
DEFAULT_TEST_PATH = "/home/mshahidul/readctrl/code/text_classifier/data/verified_combined_0-80_clean200.json"
|
| 17 |
+
DEFAULT_OUTPUT_PATH = (
|
| 18 |
+
"/home/mshahidul/readctrl/code/text_classifier/accuracy/"
|
| 19 |
+
"vllm-llama-3.1-8b-awq-int4_teacher-gpt5_v1_clean200_eval.json"
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
class HealthLiteracySignature(dspy.Signature):
|
| 24 |
+
generated_text = dspy.InputField(
|
| 25 |
+
desc="A version of the source text rewritten for a specific audience."
|
| 26 |
+
)
|
| 27 |
+
literacy_label = dspy.OutputField(
|
| 28 |
+
desc=(
|
| 29 |
+
"Classification: low_health_literacy (simple words, no jargon), "
|
| 30 |
+
"intermediate_health_literacy (moderate technicality), or "
|
| 31 |
+
"proficient_health_literacy (highly technical/original level)."
|
| 32 |
+
)
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class HealthLiteracyClassifier(dspy.Module):
|
| 37 |
+
def __init__(self):
|
| 38 |
+
super().__init__()
|
| 39 |
+
self.classifier = dspy.ChainOfThought(HealthLiteracySignature)
|
| 40 |
+
|
| 41 |
+
def forward(self, generated_text):
|
| 42 |
+
return self.classifier(generated_text=generated_text)
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def parse_args():
|
| 46 |
+
parser = argparse.ArgumentParser(
|
| 47 |
+
description="Load a saved DSPy model and evaluate on test set."
|
| 48 |
+
)
|
| 49 |
+
parser.add_argument("--model-path", default=DEFAULT_MODEL_PATH)
|
| 50 |
+
parser.add_argument("--test-path", default=DEFAULT_TEST_PATH)
|
| 51 |
+
parser.add_argument(
|
| 52 |
+
"--api-base",
|
| 53 |
+
default=os.environ.get("VLLM_API_BASE", DEFAULT_API_BASE),
|
| 54 |
+
)
|
| 55 |
+
parser.add_argument("--num-threads", type=int, default=1)
|
| 56 |
+
parser.add_argument("--output-path", default=DEFAULT_OUTPUT_PATH)
|
| 57 |
+
parser.add_argument(
|
| 58 |
+
"--provide-traceback",
|
| 59 |
+
action="store_true",
|
| 60 |
+
help="Print full traceback if runtime error happens.",
|
| 61 |
+
)
|
| 62 |
+
return parser.parse_args()
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def check_api_base(api_base):
|
| 66 |
+
models_url = api_base.rstrip("/") + "/models"
|
| 67 |
+
req = urllib.request.Request(models_url, method="GET")
|
| 68 |
+
try:
|
| 69 |
+
with urllib.request.urlopen(req, timeout=5) as resp:
|
| 70 |
+
if resp.status >= 400:
|
| 71 |
+
raise RuntimeError(
|
| 72 |
+
f"Endpoint reachable but unhealthy: {models_url} (status={resp.status})"
|
| 73 |
+
)
|
| 74 |
+
except urllib.error.URLError as exc:
|
| 75 |
+
raise ConnectionError(
|
| 76 |
+
"Cannot reach OpenAI-compatible endpoint. "
|
| 77 |
+
f"api_base={api_base}. "
|
| 78 |
+
"Start your vLLM server or pass correct --api-base."
|
| 79 |
+
) from exc
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def load_testset(path):
|
| 83 |
+
examples = []
|
| 84 |
+
if path.endswith(".jsonl"):
|
| 85 |
+
with open(path, "r") as f:
|
| 86 |
+
for line in f:
|
| 87 |
+
if not line.strip():
|
| 88 |
+
continue
|
| 89 |
+
record = json.loads(line)
|
| 90 |
+
example = dspy.Example(
|
| 91 |
+
generated_text=record["generated_text"],
|
| 92 |
+
literacy_label=record["literacy_label"],
|
| 93 |
+
).with_inputs("generated_text")
|
| 94 |
+
examples.append(example)
|
| 95 |
+
else:
|
| 96 |
+
with open(path, "r") as f:
|
| 97 |
+
records = json.load(f)
|
| 98 |
+
for record in records:
|
| 99 |
+
text = record.get("generated_text", record.get("diff_label_texts"))
|
| 100 |
+
label = record.get("literacy_label", record.get("label"))
|
| 101 |
+
if not text or not label:
|
| 102 |
+
continue
|
| 103 |
+
example = dspy.Example(
|
| 104 |
+
generated_text=text,
|
| 105 |
+
literacy_label=label,
|
| 106 |
+
).with_inputs("generated_text")
|
| 107 |
+
examples.append(example)
|
| 108 |
+
return examples
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def health_literacy_metric(gold, pred, trace=None):
|
| 112 |
+
if not pred or not hasattr(pred, "literacy_label"):
|
| 113 |
+
return False
|
| 114 |
+
gold_label = str(gold.literacy_label).strip().lower()
|
| 115 |
+
pred_label = str(pred.literacy_label).strip().lower()
|
| 116 |
+
return gold_label in pred_label
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def load_compiled_classifier(path):
|
| 120 |
+
if hasattr(dspy, "load"):
|
| 121 |
+
try:
|
| 122 |
+
return dspy.load(path)
|
| 123 |
+
except Exception as exc:
|
| 124 |
+
print(
|
| 125 |
+
f"[warning] dspy.load failed ({type(exc).__name__}); "
|
| 126 |
+
"trying module.load(...)"
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
classifier = HealthLiteracyClassifier()
|
| 130 |
+
try:
|
| 131 |
+
classifier.load(path)
|
| 132 |
+
except Exception as exc:
|
| 133 |
+
raise RuntimeError(f"Failed to load compiled model from {path}") from exc
|
| 134 |
+
return classifier
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def main():
|
| 138 |
+
args = parse_args()
|
| 139 |
+
|
| 140 |
+
if not os.path.exists(args.model_path):
|
| 141 |
+
raise FileNotFoundError(f"Model file not found: {args.model_path}")
|
| 142 |
+
if not os.path.exists(args.test_path):
|
| 143 |
+
raise FileNotFoundError(f"Test file not found: {args.test_path}")
|
| 144 |
+
|
| 145 |
+
try:
|
| 146 |
+
check_api_base(args.api_base)
|
| 147 |
+
|
| 148 |
+
lm = dspy.LM(
|
| 149 |
+
model="openai/dspy",
|
| 150 |
+
api_base=args.api_base,
|
| 151 |
+
api_key="EMPTY",
|
| 152 |
+
temperature=0.0,
|
| 153 |
+
)
|
| 154 |
+
dspy.configure(lm=lm)
|
| 155 |
+
|
| 156 |
+
testset = load_testset(args.test_path)
|
| 157 |
+
compiled_classifier = load_compiled_classifier(args.model_path)
|
| 158 |
+
|
| 159 |
+
evaluator = Evaluate(
|
| 160 |
+
devset=testset,
|
| 161 |
+
metric=health_literacy_metric,
|
| 162 |
+
num_threads=args.num_threads,
|
| 163 |
+
display_progress=True,
|
| 164 |
+
)
|
| 165 |
+
evaluation_result = evaluator(compiled_classifier)
|
| 166 |
+
accuracy_score = (
|
| 167 |
+
float(evaluation_result.score)
|
| 168 |
+
if hasattr(evaluation_result, "score")
|
| 169 |
+
else float(evaluation_result)
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
output_data = {
|
| 173 |
+
"model_path": args.model_path,
|
| 174 |
+
"test_path": args.test_path,
|
| 175 |
+
"accuracy_score": accuracy_score,
|
| 176 |
+
"num_results": len(getattr(evaluation_result, "results", []) or []),
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
os.makedirs(os.path.dirname(args.output_path), exist_ok=True)
|
| 180 |
+
with open(args.output_path, "w") as f:
|
| 181 |
+
json.dump(output_data, f, indent=2)
|
| 182 |
+
|
| 183 |
+
print(evaluation_result)
|
| 184 |
+
print(json.dumps(output_data, indent=2))
|
| 185 |
+
except Exception as exc:
|
| 186 |
+
print(f"[error] {type(exc).__name__}: {exc}")
|
| 187 |
+
if args.provide_traceback:
|
| 188 |
+
traceback.print_exc()
|
| 189 |
+
raise
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
if __name__ == "__main__":
|
| 193 |
+
main()
|
code/text_classifier/text_classifier_dspy.py
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import dspy
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import random
|
| 5 |
+
from typing import Literal
|
| 6 |
+
from dspy.teleprompt import BootstrapFewShotWithRandomSearch
|
| 7 |
+
from dspy.evaluate import Evaluate
|
| 8 |
+
|
| 9 |
+
# --- 1. LLM Configuration ---
|
| 10 |
+
api_file = "/home/mshahidul/api_new.json"
|
| 11 |
+
with open(api_file, "r") as f:
|
| 12 |
+
api_keys = json.load(f)
|
| 13 |
+
openai_api_key = api_keys["openai"]
|
| 14 |
+
|
| 15 |
+
# Student: Local vLLM (Deployment Model)
|
| 16 |
+
vllm_model = dspy.LM(
|
| 17 |
+
model='Qwen/Qwen3-30B-A3B-Instruct-2507',
|
| 18 |
+
api_base="http://172.16.34.29:8030/v1",
|
| 19 |
+
api_key="EMPTY",
|
| 20 |
+
temperature=0.0
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# Teacher: OpenAI (High-quality rationale generation)
|
| 24 |
+
# Note: Ensure 'gpt-5' is the correct model name in your environment (usually 'gpt-4-turbo' or 'gpt-4o')
|
| 25 |
+
openai_model_teacher = dspy.LM(model='gpt-5', api_key=openai_api_key)
|
| 26 |
+
openai_model_student = dspy.LM(model='gpt-5-mini', api_key=openai_api_key)
|
| 27 |
+
|
| 28 |
+
# Default LM for DSPy runtime
|
| 29 |
+
# Use the local vLLM for fast iteration; switch to openai_model_student if needed.
|
| 30 |
+
# dspy.configure(lm=vllm_model)
|
| 31 |
+
dspy.configure(lm=openai_model_student)
|
| 32 |
+
|
| 33 |
+
class HealthLiteracySignature(dspy.Signature):
|
| 34 |
+
"""
|
| 35 |
+
Analyze the linguistic complexity, use of medical jargon, and sentence
|
| 36 |
+
structure of 'generated_text' relative to 'full_text' to determine
|
| 37 |
+
the health literacy level.
|
| 38 |
+
"""
|
| 39 |
+
full_text = dspy.InputField(desc="Original clinical or medical source text containing jargon and technical details.")
|
| 40 |
+
generated_text = dspy.InputField(
|
| 41 |
+
desc="A version of the source text rewritten for a specific audience."
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
literacy_label = dspy.OutputField(
|
| 45 |
+
desc="Classification: low_health_literacy (simple words, no jargon), intermediate_health_literacy (moderate technicality), or proficient_health_literacy (highly technical/original level)."
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
class HealthLiteracyClassifier(dspy.Module):
|
| 49 |
+
def __init__(self):
|
| 50 |
+
super().__init__()
|
| 51 |
+
# Use ChainOfThought for better reasoning on medical jargon
|
| 52 |
+
self.classifier = dspy.ChainOfThought(HealthLiteracySignature)
|
| 53 |
+
|
| 54 |
+
def forward(self, full_text, generated_text):
|
| 55 |
+
return self.classifier(full_text=full_text, generated_text=generated_text)
|
| 56 |
+
|
| 57 |
+
def prepare_data(raw_data, seed=42, train_ratio=0.6):
|
| 58 |
+
labels = [
|
| 59 |
+
"low_health_literacy",
|
| 60 |
+
"intermediate_health_literacy",
|
| 61 |
+
"proficient_health_literacy",
|
| 62 |
+
]
|
| 63 |
+
rng = random.Random(seed)
|
| 64 |
+
buckets = {label: [] for label in labels}
|
| 65 |
+
for item in raw_data:
|
| 66 |
+
label = item.get("label")
|
| 67 |
+
if label not in buckets:
|
| 68 |
+
continue
|
| 69 |
+
example = dspy.Example(
|
| 70 |
+
full_text=item["fulltext"],
|
| 71 |
+
generated_text=item["diff_label_texts"],
|
| 72 |
+
literacy_label=label, # Matches the Signature field
|
| 73 |
+
).with_inputs("full_text", "generated_text")
|
| 74 |
+
buckets[label].append(example)
|
| 75 |
+
|
| 76 |
+
min_count = min(len(buckets[label]) for label in labels)
|
| 77 |
+
if min_count == 0:
|
| 78 |
+
raise ValueError("One or more labels has no examples; cannot balance.")
|
| 79 |
+
|
| 80 |
+
per_label_total = min_count
|
| 81 |
+
per_label_train = int(round(per_label_total * train_ratio))
|
| 82 |
+
per_label_train = max(1, min(per_label_train, per_label_total - 1))
|
| 83 |
+
|
| 84 |
+
trainset = []
|
| 85 |
+
testset = []
|
| 86 |
+
for label in labels:
|
| 87 |
+
rng.shuffle(buckets[label])
|
| 88 |
+
selected = buckets[label][:per_label_total]
|
| 89 |
+
trainset.extend(selected[:per_label_train])
|
| 90 |
+
testset.extend(selected[per_label_train:per_label_total])
|
| 91 |
+
|
| 92 |
+
rng.shuffle(trainset)
|
| 93 |
+
rng.shuffle(testset)
|
| 94 |
+
return trainset, testset
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
import json
|
| 98 |
+
path = "/home/mshahidul/readctrl/code/text_classifier/verified_combined_0-80.json"
|
| 99 |
+
raw_data = json.load(open(path))
|
| 100 |
+
trainset, testset = prepare_data(raw_data)
|
| 101 |
+
|
| 102 |
+
def _example_to_dict(example):
|
| 103 |
+
return {
|
| 104 |
+
"full_text": example.full_text,
|
| 105 |
+
"generated_text": example.generated_text,
|
| 106 |
+
"literacy_label": example.literacy_label,
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
def save_jsonl(path, examples):
|
| 110 |
+
with open(path, "w") as f:
|
| 111 |
+
for ex in examples:
|
| 112 |
+
f.write(json.dumps(_example_to_dict(ex), ensure_ascii=False) + "\n")
|
| 113 |
+
|
| 114 |
+
train_path = "/home/mshahidul/readctrl/code/text_classifier/train.jsonl"
|
| 115 |
+
test_path = "/home/mshahidul/readctrl/code/text_classifier/test.jsonl"
|
| 116 |
+
save_jsonl(train_path, trainset)
|
| 117 |
+
save_jsonl(test_path, testset)
|
| 118 |
+
|
| 119 |
+
def health_literacy_metric(gold, pred, trace=None):
|
| 120 |
+
if not pred or not hasattr(pred, 'literacy_label'):
|
| 121 |
+
return False
|
| 122 |
+
|
| 123 |
+
gold_label = str(gold.literacy_label).strip().lower()
|
| 124 |
+
pred_label = str(pred.literacy_label).strip().lower()
|
| 125 |
+
|
| 126 |
+
# Simple inclusion check helps if the LLM gets wordy
|
| 127 |
+
return gold_label in pred_label
|
| 128 |
+
|
| 129 |
+
optimizer = BootstrapFewShotWithRandomSearch(
|
| 130 |
+
metric=health_literacy_metric,
|
| 131 |
+
max_bootstrapped_demos=3,
|
| 132 |
+
num_candidate_programs=8,
|
| 133 |
+
teacher_settings=dict(lm=openai_model_teacher)
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
# 3. Compile! This creates the "optimized prompt"
|
| 137 |
+
compiled_classifier = optimizer.compile(HealthLiteracyClassifier(), trainset=trainset)
|
| 138 |
+
|
| 139 |
+
evaluator = Evaluate(devset=testset, metric=health_literacy_metric, num_threads=1, display_progress=True)
|
| 140 |
+
evaluation_result = evaluator(compiled_classifier)
|
| 141 |
+
accuracy_score = (
|
| 142 |
+
float(evaluation_result.score)
|
| 143 |
+
if hasattr(evaluation_result, "score")
|
| 144 |
+
else float(evaluation_result)
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
def _extract_usage(record):
|
| 148 |
+
if isinstance(record, dict):
|
| 149 |
+
usage = record.get("usage")
|
| 150 |
+
if usage:
|
| 151 |
+
return usage
|
| 152 |
+
response = record.get("response")
|
| 153 |
+
if isinstance(response, dict) and response.get("usage"):
|
| 154 |
+
return response["usage"]
|
| 155 |
+
return None
|
| 156 |
+
|
| 157 |
+
def calc_cost_usd(lm, price_in_per_1m, price_out_per_1m, price_cached_in_per_1m=None):
|
| 158 |
+
prompt_tokens = 0
|
| 159 |
+
completion_tokens = 0
|
| 160 |
+
cached_tokens = 0
|
| 161 |
+
for record in getattr(lm, "history", []) or []:
|
| 162 |
+
usage = _extract_usage(record)
|
| 163 |
+
if not usage:
|
| 164 |
+
continue
|
| 165 |
+
prompt_tokens += int(usage.get("prompt_tokens", usage.get("input_tokens", 0)) or 0)
|
| 166 |
+
completion_tokens += int(usage.get("completion_tokens", usage.get("output_tokens", 0)) or 0)
|
| 167 |
+
cached_tokens += int(usage.get("cached_tokens", usage.get("prompt_tokens_cached", 0)) or 0)
|
| 168 |
+
cost = (prompt_tokens / 1_000_000) * price_in_per_1m
|
| 169 |
+
cost += (completion_tokens / 1_000_000) * price_out_per_1m
|
| 170 |
+
if price_cached_in_per_1m is not None:
|
| 171 |
+
cost += (cached_tokens / 1_000_000) * price_cached_in_per_1m
|
| 172 |
+
return {
|
| 173 |
+
"prompt_tokens": prompt_tokens,
|
| 174 |
+
"completion_tokens": completion_tokens,
|
| 175 |
+
"cached_tokens": cached_tokens,
|
| 176 |
+
"cost_usd": cost,
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
# Fill these with current OpenAI pricing (USD per 1M tokens).
|
| 180 |
+
GPT5_PRICE_INPUT_PER_1M = 1.25
|
| 181 |
+
GPT5_PRICE_OUTPUT_PER_1M = 10.0
|
| 182 |
+
GPT5_MINI_PRICE_INPUT_PER_1M = 0.25
|
| 183 |
+
GPT5_MINI_PRICE_OUTPUT_PER_1M = 2.0
|
| 184 |
+
|
| 185 |
+
teacher_cost = calc_cost_usd(
|
| 186 |
+
openai_model_teacher,
|
| 187 |
+
GPT5_PRICE_INPUT_PER_1M,
|
| 188 |
+
GPT5_PRICE_OUTPUT_PER_1M,
|
| 189 |
+
)
|
| 190 |
+
student_cost = calc_cost_usd(
|
| 191 |
+
openai_model_student,
|
| 192 |
+
GPT5_MINI_PRICE_INPUT_PER_1M,
|
| 193 |
+
GPT5_MINI_PRICE_OUTPUT_PER_1M,
|
| 194 |
+
)
|
| 195 |
+
|
| 196 |
+
cost_report = {
|
| 197 |
+
"gpt-5": teacher_cost,
|
| 198 |
+
"gpt-5-mini": student_cost,
|
| 199 |
+
}
|
| 200 |
+
folder_name="student-gpt5-mini_teacher-gpt5_v1"
|
| 201 |
+
os.makedirs(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}", exist_ok=True)
|
| 202 |
+
compiled_classifier.save(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}/model.json")
|
| 203 |
+
|
| 204 |
+
print(evaluation_result)
|
| 205 |
+
print(json.dumps(cost_report, indent=2))
|
| 206 |
+
with open(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}/accuracy.json", "w") as f:
|
| 207 |
+
json.dump(
|
| 208 |
+
{
|
| 209 |
+
"accuracy_score": accuracy_score,
|
| 210 |
+
"num_results": len(getattr(evaluation_result, "results", []) or []),
|
| 211 |
+
},
|
| 212 |
+
f,
|
| 213 |
+
indent=2,
|
| 214 |
+
)
|
| 215 |
+
with open(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}/cost.json", "w") as f:
|
| 216 |
+
json.dump(cost_report, f, indent=2)
|
code/text_classifier/text_classifier_dspy_load_and_infer_full.py
ADDED
|
@@ -0,0 +1,353 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
from collections import Counter
|
| 5 |
+
from typing import Dict, List, Tuple
|
| 6 |
+
|
| 7 |
+
import dspy
|
| 8 |
+
from tqdm import tqdm
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
API_FILE = "/home/mshahidul/api_new.json"
|
| 12 |
+
DEFAULT_MODEL_PATH = "/home/mshahidul/readctrl/code/text_classifier/dspy_model/student-gpt5-mini_teacher-gpt5_v1/model.json"
|
| 13 |
+
DEFAULT_DATASET_PATH = "/home/mshahidul/readctrl/code/text_classifier/verified_combined_0-80.json"
|
| 14 |
+
DEFAULT_OUTPUT_PATH = "/home/mshahidul/readctrl/code/text_classifier/dspy_model/student-gpt5-mini_teacher-gpt5_v1/full_dataset_accuracy.json"
|
| 15 |
+
DEFAULT_PREDICTIONS_PATH = "/home/mshahidul/readctrl/code/text_classifier/dspy_model/student-gpt5-mini_teacher-gpt5_v1/full_dataset_predictions.json"
|
| 16 |
+
DEFAULT_CLEAN_DATASET_PATH = "/home/mshahidul/readctrl/code/text_classifier/verified_combined_0-80_clean200.json"
|
| 17 |
+
DEFAULT_REMOVED_PATH = "/home/mshahidul/readctrl/code/text_classifier/verified_combined_0-80_removed21.json"
|
| 18 |
+
VALID_LABELS = {
|
| 19 |
+
"low_health_literacy",
|
| 20 |
+
"intermediate_health_literacy",
|
| 21 |
+
"proficient_health_literacy",
|
| 22 |
+
}
|
| 23 |
+
LABEL_ORDER = {
|
| 24 |
+
"low_health_literacy": 0,
|
| 25 |
+
"intermediate_health_literacy": 1,
|
| 26 |
+
"proficient_health_literacy": 2,
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
class HealthLiteracySignature(dspy.Signature):
|
| 31 |
+
"""
|
| 32 |
+
Analyze the linguistic complexity, use of medical jargon, and sentence
|
| 33 |
+
structure of 'generated_text' to determine the health literacy level.
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
generated_text = dspy.InputField(
|
| 37 |
+
desc="A version of the source text rewritten for a specific audience."
|
| 38 |
+
)
|
| 39 |
+
literacy_label = dspy.OutputField(
|
| 40 |
+
desc=(
|
| 41 |
+
"Classification: low_health_literacy (simple words, no jargon), "
|
| 42 |
+
"intermediate_health_literacy (moderate technicality), or "
|
| 43 |
+
"proficient_health_literacy (highly technical/original level)."
|
| 44 |
+
)
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class HealthLiteracyClassifier(dspy.Module):
|
| 49 |
+
def __init__(self):
|
| 50 |
+
super().__init__()
|
| 51 |
+
self.classifier = dspy.ChainOfThought(HealthLiteracySignature)
|
| 52 |
+
|
| 53 |
+
def forward(self, generated_text):
|
| 54 |
+
return self.classifier(generated_text=generated_text)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def load_openai_key(api_file: str) -> str:
|
| 58 |
+
with open(api_file, "r") as f:
|
| 59 |
+
api_keys = json.load(f)
|
| 60 |
+
if "openai" not in api_keys:
|
| 61 |
+
raise KeyError(f"'openai' key is missing in {api_file}")
|
| 62 |
+
return api_keys["openai"]
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def normalize_label(text: str) -> str:
|
| 66 |
+
return str(text or "").strip().lower()
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def is_correct(gold_label: str, predicted_label: str) -> bool:
|
| 70 |
+
gold = normalize_label(gold_label)
|
| 71 |
+
pred = normalize_label(predicted_label)
|
| 72 |
+
return gold in pred
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def extract_predicted_label(predicted_text: str) -> str:
|
| 76 |
+
pred = normalize_label(predicted_text)
|
| 77 |
+
matched = [label for label in VALID_LABELS if label in pred]
|
| 78 |
+
if len(matched) == 1:
|
| 79 |
+
return matched[0]
|
| 80 |
+
return ""
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def misclassification_severity(gold_label: str, predicted_label: str) -> int:
|
| 84 |
+
gold = LABEL_ORDER.get(gold_label)
|
| 85 |
+
pred = LABEL_ORDER.get(predicted_label)
|
| 86 |
+
if gold is None or pred is None:
|
| 87 |
+
# Unknown/unparseable predictions are treated as worst.
|
| 88 |
+
return 3
|
| 89 |
+
return abs(gold - pred)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def load_full_examples(dataset_path: str):
|
| 93 |
+
with open(dataset_path, "r") as f:
|
| 94 |
+
raw_data = json.load(f)
|
| 95 |
+
|
| 96 |
+
examples = []
|
| 97 |
+
for idx, item in enumerate(raw_data):
|
| 98 |
+
label = item.get("label")
|
| 99 |
+
text = item.get("diff_label_texts")
|
| 100 |
+
if label in VALID_LABELS and text:
|
| 101 |
+
examples.append(
|
| 102 |
+
{
|
| 103 |
+
"index": idx,
|
| 104 |
+
"generated_text": text,
|
| 105 |
+
"gold_label": label,
|
| 106 |
+
"doc_id": item.get("doc_id"),
|
| 107 |
+
"raw_item": item,
|
| 108 |
+
}
|
| 109 |
+
)
|
| 110 |
+
if not examples:
|
| 111 |
+
raise ValueError("No valid labeled examples found in dataset.")
|
| 112 |
+
return examples
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def choose_indices_to_remove(
|
| 116 |
+
predictions: List[Dict], remove_count: int
|
| 117 |
+
) -> Tuple[List[Dict], List[int]]:
|
| 118 |
+
def _rank_key(p: Dict):
|
| 119 |
+
return (
|
| 120 |
+
0 if not p["exact_correct"] else 1,
|
| 121 |
+
-p["severity"],
|
| 122 |
+
0 if not p["predicted_label"] else 1,
|
| 123 |
+
-len(normalize_label(p["raw_prediction_text"])),
|
| 124 |
+
p["index"],
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
label_sequence = sorted(VALID_LABELS, key=lambda x: LABEL_ORDER[x])
|
| 128 |
+
per_label_all = {label: [] for label in label_sequence}
|
| 129 |
+
per_label_mis = {label: [] for label in label_sequence}
|
| 130 |
+
for p in predictions:
|
| 131 |
+
label = p["gold_label"]
|
| 132 |
+
if label in per_label_all:
|
| 133 |
+
per_label_all[label].append(p)
|
| 134 |
+
if not p["exact_correct"]:
|
| 135 |
+
per_label_mis[label].append(p)
|
| 136 |
+
|
| 137 |
+
for label in label_sequence:
|
| 138 |
+
per_label_all[label].sort(key=_rank_key)
|
| 139 |
+
per_label_mis[label].sort(key=_rank_key)
|
| 140 |
+
|
| 141 |
+
# Balanced quota (approximately equal removals per label).
|
| 142 |
+
num_labels = len(label_sequence)
|
| 143 |
+
base_quota = remove_count // num_labels
|
| 144 |
+
remainder = remove_count % num_labels
|
| 145 |
+
quotas = {label: base_quota for label in label_sequence}
|
| 146 |
+
|
| 147 |
+
# Assign remainder to labels with more misclassified candidates first.
|
| 148 |
+
remainder_order = sorted(
|
| 149 |
+
label_sequence,
|
| 150 |
+
key=lambda label: (-len(per_label_mis[label]), LABEL_ORDER[label]),
|
| 151 |
+
)
|
| 152 |
+
for label in remainder_order[:remainder]:
|
| 153 |
+
quotas[label] += 1
|
| 154 |
+
|
| 155 |
+
removed = []
|
| 156 |
+
removed_indices_set = set()
|
| 157 |
+
|
| 158 |
+
# First pass: satisfy each label quota with misclassified items.
|
| 159 |
+
for label in label_sequence:
|
| 160 |
+
take = min(quotas[label], len(per_label_mis[label]))
|
| 161 |
+
for item in per_label_mis[label][:take]:
|
| 162 |
+
removed.append(item)
|
| 163 |
+
removed_indices_set.add(item["index"])
|
| 164 |
+
|
| 165 |
+
# Second pass: if some quotas could not be met, fill within those labels
|
| 166 |
+
# using next-worst remaining items (can include correctly classified).
|
| 167 |
+
for label in label_sequence:
|
| 168 |
+
needed = quotas[label] - sum(1 for x in removed if x["gold_label"] == label)
|
| 169 |
+
if needed <= 0:
|
| 170 |
+
continue
|
| 171 |
+
candidates = [
|
| 172 |
+
x for x in per_label_all[label] if x["index"] not in removed_indices_set
|
| 173 |
+
]
|
| 174 |
+
for item in candidates[:needed]:
|
| 175 |
+
removed.append(item)
|
| 176 |
+
removed_indices_set.add(item["index"])
|
| 177 |
+
|
| 178 |
+
# Final pass: if still short (edge cases), fill globally by worst rank.
|
| 179 |
+
if len(removed) < remove_count:
|
| 180 |
+
remaining_global = sorted(
|
| 181 |
+
(p for p in predictions if p["index"] not in removed_indices_set),
|
| 182 |
+
key=_rank_key,
|
| 183 |
+
)
|
| 184 |
+
need = remove_count - len(removed)
|
| 185 |
+
for item in remaining_global[:need]:
|
| 186 |
+
removed.append(item)
|
| 187 |
+
removed_indices_set.add(item["index"])
|
| 188 |
+
|
| 189 |
+
# Keep deterministic order in output by rank.
|
| 190 |
+
removed = sorted(removed, key=_rank_key)[:remove_count]
|
| 191 |
+
removed_indices = sorted(p["index"] for p in removed)
|
| 192 |
+
return removed, removed_indices
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
def run_inference(
|
| 196 |
+
model_path: str,
|
| 197 |
+
dataset_path: str,
|
| 198 |
+
output_path: str,
|
| 199 |
+
predictions_path: str,
|
| 200 |
+
clean_dataset_path: str,
|
| 201 |
+
removed_path: str,
|
| 202 |
+
target_clean_size: int,
|
| 203 |
+
):
|
| 204 |
+
openai_api_key = load_openai_key(API_FILE)
|
| 205 |
+
student_lm = dspy.LM(model="gpt-5-mini", api_key=openai_api_key)
|
| 206 |
+
dspy.configure(lm=student_lm)
|
| 207 |
+
|
| 208 |
+
classifier = HealthLiteracyClassifier()
|
| 209 |
+
classifier.load(model_path)
|
| 210 |
+
|
| 211 |
+
examples = load_full_examples(dataset_path)
|
| 212 |
+
total = len(examples)
|
| 213 |
+
if target_clean_size <= 0 or target_clean_size >= total:
|
| 214 |
+
raise ValueError(
|
| 215 |
+
f"target_clean_size must be between 1 and {total - 1}, got {target_clean_size}"
|
| 216 |
+
)
|
| 217 |
+
|
| 218 |
+
remove_count = total - target_clean_size
|
| 219 |
+
correct = 0
|
| 220 |
+
label_totals = Counter()
|
| 221 |
+
label_correct = Counter()
|
| 222 |
+
predictions = []
|
| 223 |
+
|
| 224 |
+
for idx, ex in enumerate(
|
| 225 |
+
tqdm(examples, desc="Classifying full dataset", unit="sample"), start=1
|
| 226 |
+
):
|
| 227 |
+
pred = classifier(generated_text=ex["generated_text"])
|
| 228 |
+
raw_pred_label = getattr(pred, "literacy_label", "")
|
| 229 |
+
pred_label = extract_predicted_label(raw_pred_label)
|
| 230 |
+
gold_label = ex["gold_label"]
|
| 231 |
+
exact_correct = pred_label == gold_label
|
| 232 |
+
lenient_correct = is_correct(gold_label, raw_pred_label)
|
| 233 |
+
severity = (
|
| 234 |
+
misclassification_severity(gold_label, pred_label) if not exact_correct else 0
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
label_totals[gold_label] += 1
|
| 238 |
+
if lenient_correct:
|
| 239 |
+
correct += 1
|
| 240 |
+
label_correct[gold_label] += 1
|
| 241 |
+
|
| 242 |
+
predictions.append(
|
| 243 |
+
{
|
| 244 |
+
"index": ex["index"],
|
| 245 |
+
"doc_id": ex["doc_id"],
|
| 246 |
+
"gold_label": gold_label,
|
| 247 |
+
"predicted_label": pred_label,
|
| 248 |
+
"raw_prediction_text": raw_pred_label,
|
| 249 |
+
"lenient_correct": lenient_correct,
|
| 250 |
+
"exact_correct": exact_correct,
|
| 251 |
+
"severity": severity,
|
| 252 |
+
"generated_text": ex["generated_text"],
|
| 253 |
+
}
|
| 254 |
+
)
|
| 255 |
+
|
| 256 |
+
if idx % 10 == 0 or idx == total:
|
| 257 |
+
tqdm.write(f"Processed {idx}/{total}")
|
| 258 |
+
|
| 259 |
+
accuracy = correct / total if total else 0.0
|
| 260 |
+
exact_accuracy = (
|
| 261 |
+
sum(1 for p in predictions if p["exact_correct"]) / total if total else 0.0
|
| 262 |
+
)
|
| 263 |
+
per_label_accuracy = {
|
| 264 |
+
label: (
|
| 265 |
+
(label_correct[label] / label_totals[label]) if label_totals[label] else 0.0
|
| 266 |
+
)
|
| 267 |
+
for label in sorted(VALID_LABELS)
|
| 268 |
+
}
|
| 269 |
+
removed_examples, removed_indices = choose_indices_to_remove(predictions, remove_count)
|
| 270 |
+
removed_index_set = set(removed_indices)
|
| 271 |
+
clean_dataset = [
|
| 272 |
+
p["raw_item"]
|
| 273 |
+
for p in examples
|
| 274 |
+
if p["index"] not in removed_index_set
|
| 275 |
+
]
|
| 276 |
+
removed_dataset = [
|
| 277 |
+
p["raw_item"]
|
| 278 |
+
for p in examples
|
| 279 |
+
if p["index"] in removed_index_set
|
| 280 |
+
]
|
| 281 |
+
|
| 282 |
+
report = {
|
| 283 |
+
"model_path": model_path,
|
| 284 |
+
"dataset_path": dataset_path,
|
| 285 |
+
"num_examples": total,
|
| 286 |
+
"num_correct": correct,
|
| 287 |
+
"lenient_accuracy": accuracy,
|
| 288 |
+
"exact_accuracy": exact_accuracy,
|
| 289 |
+
"per_label_accuracy": per_label_accuracy,
|
| 290 |
+
"target_clean_size": target_clean_size,
|
| 291 |
+
"removed_count": remove_count,
|
| 292 |
+
"clean_dataset_size": len(clean_dataset),
|
| 293 |
+
"removed_dataset_size": len(removed_dataset),
|
| 294 |
+
"removed_misclassified_count": sum(
|
| 295 |
+
1 for p in removed_examples if not p["exact_correct"]
|
| 296 |
+
),
|
| 297 |
+
"removed_per_label": dict(
|
| 298 |
+
Counter(p["gold_label"] for p in removed_examples)
|
| 299 |
+
),
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
for path in [
|
| 303 |
+
output_path,
|
| 304 |
+
predictions_path,
|
| 305 |
+
clean_dataset_path,
|
| 306 |
+
removed_path,
|
| 307 |
+
]:
|
| 308 |
+
output_dir = os.path.dirname(path)
|
| 309 |
+
if output_dir:
|
| 310 |
+
os.makedirs(output_dir, exist_ok=True)
|
| 311 |
+
|
| 312 |
+
with open(output_path, "w") as f:
|
| 313 |
+
json.dump(report, f, indent=2)
|
| 314 |
+
with open(predictions_path, "w") as f:
|
| 315 |
+
json.dump(predictions, f, indent=2)
|
| 316 |
+
with open(clean_dataset_path, "w") as f:
|
| 317 |
+
json.dump(clean_dataset, f, indent=2, ensure_ascii=False)
|
| 318 |
+
with open(removed_path, "w") as f:
|
| 319 |
+
json.dump(removed_dataset, f, indent=2, ensure_ascii=False)
|
| 320 |
+
|
| 321 |
+
print(json.dumps(report, indent=2))
|
| 322 |
+
print(f"Saved predictions to: {predictions_path}")
|
| 323 |
+
print(f"Saved clean dataset to: {clean_dataset_path}")
|
| 324 |
+
print(f"Saved removed examples to: {removed_path}")
|
| 325 |
+
print(f"Saved report to: {output_path}")
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
def main():
|
| 329 |
+
parser = argparse.ArgumentParser(
|
| 330 |
+
description="Load a compiled DSPy classifier and evaluate on full dataset."
|
| 331 |
+
)
|
| 332 |
+
parser.add_argument("--model-path", default=DEFAULT_MODEL_PATH)
|
| 333 |
+
parser.add_argument("--dataset-path", default=DEFAULT_DATASET_PATH)
|
| 334 |
+
parser.add_argument("--output-path", default=DEFAULT_OUTPUT_PATH)
|
| 335 |
+
parser.add_argument("--predictions-path", default=DEFAULT_PREDICTIONS_PATH)
|
| 336 |
+
parser.add_argument("--clean-dataset-path", default=DEFAULT_CLEAN_DATASET_PATH)
|
| 337 |
+
parser.add_argument("--removed-path", default=DEFAULT_REMOVED_PATH)
|
| 338 |
+
parser.add_argument("--target-clean-size", type=int, default=200)
|
| 339 |
+
args = parser.parse_args()
|
| 340 |
+
|
| 341 |
+
run_inference(
|
| 342 |
+
model_path=args.model_path,
|
| 343 |
+
dataset_path=args.dataset_path,
|
| 344 |
+
output_path=args.output_path,
|
| 345 |
+
predictions_path=args.predictions_path,
|
| 346 |
+
clean_dataset_path=args.clean_dataset_path,
|
| 347 |
+
removed_path=args.removed_path,
|
| 348 |
+
target_clean_size=args.target_clean_size,
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
if __name__ == "__main__":
|
| 353 |
+
main()
|
code/text_classifier/text_classifier_dspy_only_gen_text.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import dspy
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import random
|
| 5 |
+
from typing import Literal
|
| 6 |
+
from dspy.teleprompt import BootstrapFewShotWithRandomSearch
|
| 7 |
+
from dspy.evaluate import Evaluate
|
| 8 |
+
|
| 9 |
+
# --- 1. LLM Configuration ---
|
| 10 |
+
api_file = "/home/mshahidul/api_new.json"
|
| 11 |
+
with open(api_file, "r") as f:
|
| 12 |
+
api_keys = json.load(f)
|
| 13 |
+
openai_api_key = api_keys["openai"]
|
| 14 |
+
|
| 15 |
+
# Student: Local vLLM (Deployment Model)
|
| 16 |
+
vllm_model = dspy.LM(
|
| 17 |
+
model='Qwen/Qwen3-30B-A3B-Instruct-2507',
|
| 18 |
+
api_base="http://172.16.34.29:8030/v1",
|
| 19 |
+
api_key="EMPTY",
|
| 20 |
+
temperature=0.0
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# Teacher: OpenAI (High-quality rationale generation)
|
| 24 |
+
# Note: Ensure 'gpt-5' is the correct model name in your environment (usually 'gpt-4-turbo' or 'gpt-4o')
|
| 25 |
+
openai_model_teacher = dspy.LM(model='gpt-5', api_key=openai_api_key)
|
| 26 |
+
openai_model_student = dspy.LM(model='gpt-5-mini', api_key=openai_api_key)
|
| 27 |
+
|
| 28 |
+
# Default LM for DSPy runtime
|
| 29 |
+
# Use the local vLLM for fast iteration; switch to openai_model_student if needed.
|
| 30 |
+
# dspy.configure(lm=vllm_model)
|
| 31 |
+
dspy.configure(lm=openai_model_student)
|
| 32 |
+
|
| 33 |
+
class HealthLiteracySignature(dspy.Signature):
|
| 34 |
+
"""
|
| 35 |
+
Analyze the linguistic complexity, use of medical jargon, and sentence
|
| 36 |
+
structure of 'generated_text' to determine the health literacy level.
|
| 37 |
+
"""
|
| 38 |
+
generated_text = dspy.InputField(
|
| 39 |
+
desc="A version of the source text rewritten for a specific audience."
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
literacy_label = dspy.OutputField(
|
| 43 |
+
desc="Classification: low_health_literacy (simple words, no jargon), intermediate_health_literacy (moderate technicality), or proficient_health_literacy (highly technical/original level)."
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
class HealthLiteracyClassifier(dspy.Module):
|
| 47 |
+
def __init__(self):
|
| 48 |
+
super().__init__()
|
| 49 |
+
# Use ChainOfThought for better reasoning on medical jargon
|
| 50 |
+
self.classifier = dspy.ChainOfThought(HealthLiteracySignature)
|
| 51 |
+
|
| 52 |
+
def forward(self, generated_text):
|
| 53 |
+
return self.classifier(generated_text=generated_text)
|
| 54 |
+
|
| 55 |
+
def prepare_data(raw_data, seed=42, train_ratio=0.6):
|
| 56 |
+
labels = [
|
| 57 |
+
"low_health_literacy",
|
| 58 |
+
"intermediate_health_literacy",
|
| 59 |
+
"proficient_health_literacy",
|
| 60 |
+
]
|
| 61 |
+
rng = random.Random(seed)
|
| 62 |
+
buckets = {label: [] for label in labels}
|
| 63 |
+
for item in raw_data:
|
| 64 |
+
label = item.get("label")
|
| 65 |
+
if label not in buckets:
|
| 66 |
+
continue
|
| 67 |
+
example = dspy.Example(
|
| 68 |
+
generated_text=item["diff_label_texts"],
|
| 69 |
+
literacy_label=label, # Matches the Signature field
|
| 70 |
+
).with_inputs("generated_text")
|
| 71 |
+
buckets[label].append(example)
|
| 72 |
+
|
| 73 |
+
min_count = min(len(buckets[label]) for label in labels)
|
| 74 |
+
if min_count == 0:
|
| 75 |
+
raise ValueError("One or more labels has no examples; cannot balance.")
|
| 76 |
+
|
| 77 |
+
per_label_total = min_count
|
| 78 |
+
per_label_train = int(round(per_label_total * train_ratio))
|
| 79 |
+
per_label_train = max(1, min(per_label_train, per_label_total - 1))
|
| 80 |
+
|
| 81 |
+
trainset = []
|
| 82 |
+
testset = []
|
| 83 |
+
for label in labels:
|
| 84 |
+
rng.shuffle(buckets[label])
|
| 85 |
+
selected = buckets[label][:per_label_total]
|
| 86 |
+
trainset.extend(selected[:per_label_train])
|
| 87 |
+
testset.extend(selected[per_label_train:per_label_total])
|
| 88 |
+
|
| 89 |
+
rng.shuffle(trainset)
|
| 90 |
+
rng.shuffle(testset)
|
| 91 |
+
return trainset, testset
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
import json
|
| 95 |
+
path = "/home/mshahidul/readctrl/code/text_classifier/verified_combined_0-80.json"
|
| 96 |
+
raw_data = json.load(open(path))
|
| 97 |
+
trainset, testset = prepare_data(raw_data)
|
| 98 |
+
|
| 99 |
+
def _example_to_dict(example):
|
| 100 |
+
return {
|
| 101 |
+
"generated_text": example.generated_text,
|
| 102 |
+
"literacy_label": example.literacy_label,
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
def save_jsonl(path, examples):
|
| 106 |
+
with open(path, "w") as f:
|
| 107 |
+
for ex in examples:
|
| 108 |
+
f.write(json.dumps(_example_to_dict(ex), ensure_ascii=False) + "\n")
|
| 109 |
+
|
| 110 |
+
train_path = "/home/mshahidul/readctrl/code/text_classifier/train.jsonl"
|
| 111 |
+
test_path = "/home/mshahidul/readctrl/code/text_classifier/test.jsonl"
|
| 112 |
+
save_jsonl(train_path, trainset)
|
| 113 |
+
save_jsonl(test_path, testset)
|
| 114 |
+
|
| 115 |
+
def health_literacy_metric(gold, pred, trace=None):
|
| 116 |
+
if not pred or not hasattr(pred, 'literacy_label'):
|
| 117 |
+
return False
|
| 118 |
+
|
| 119 |
+
gold_label = str(gold.literacy_label).strip().lower()
|
| 120 |
+
pred_label = str(pred.literacy_label).strip().lower()
|
| 121 |
+
|
| 122 |
+
# Simple inclusion check helps if the LLM gets wordy
|
| 123 |
+
return gold_label in pred_label
|
| 124 |
+
|
| 125 |
+
optimizer = BootstrapFewShotWithRandomSearch(
|
| 126 |
+
metric=health_literacy_metric,
|
| 127 |
+
max_bootstrapped_demos=3,
|
| 128 |
+
num_candidate_programs=8,
|
| 129 |
+
teacher_settings=dict(lm=openai_model_teacher)
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
# 3. Compile! This creates the "optimized prompt"
|
| 133 |
+
compiled_classifier = optimizer.compile(HealthLiteracyClassifier(), trainset=trainset)
|
| 134 |
+
|
| 135 |
+
evaluator = Evaluate(devset=testset, metric=health_literacy_metric, num_threads=1, display_progress=True)
|
| 136 |
+
evaluation_result = evaluator(compiled_classifier)
|
| 137 |
+
accuracy_score = (
|
| 138 |
+
float(evaluation_result.score)
|
| 139 |
+
if hasattr(evaluation_result, "score")
|
| 140 |
+
else float(evaluation_result)
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
def _extract_usage(record):
|
| 144 |
+
if isinstance(record, dict):
|
| 145 |
+
usage = record.get("usage")
|
| 146 |
+
if usage:
|
| 147 |
+
return usage
|
| 148 |
+
response = record.get("response")
|
| 149 |
+
if isinstance(response, dict) and response.get("usage"):
|
| 150 |
+
return response["usage"]
|
| 151 |
+
return None
|
| 152 |
+
|
| 153 |
+
def calc_cost_usd(lm, price_in_per_1m, price_out_per_1m, price_cached_in_per_1m=None):
|
| 154 |
+
prompt_tokens = 0
|
| 155 |
+
completion_tokens = 0
|
| 156 |
+
cached_tokens = 0
|
| 157 |
+
for record in getattr(lm, "history", []) or []:
|
| 158 |
+
usage = _extract_usage(record)
|
| 159 |
+
if not usage:
|
| 160 |
+
continue
|
| 161 |
+
prompt_tokens += int(usage.get("prompt_tokens", usage.get("input_tokens", 0)) or 0)
|
| 162 |
+
completion_tokens += int(usage.get("completion_tokens", usage.get("output_tokens", 0)) or 0)
|
| 163 |
+
cached_tokens += int(usage.get("cached_tokens", usage.get("prompt_tokens_cached", 0)) or 0)
|
| 164 |
+
cost = (prompt_tokens / 1_000_000) * price_in_per_1m
|
| 165 |
+
cost += (completion_tokens / 1_000_000) * price_out_per_1m
|
| 166 |
+
if price_cached_in_per_1m is not None:
|
| 167 |
+
cost += (cached_tokens / 1_000_000) * price_cached_in_per_1m
|
| 168 |
+
return {
|
| 169 |
+
"prompt_tokens": prompt_tokens,
|
| 170 |
+
"completion_tokens": completion_tokens,
|
| 171 |
+
"cached_tokens": cached_tokens,
|
| 172 |
+
"cost_usd": cost,
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
# Fill these with current OpenAI pricing (USD per 1M tokens).
|
| 176 |
+
GPT5_PRICE_INPUT_PER_1M = 1.25
|
| 177 |
+
GPT5_PRICE_OUTPUT_PER_1M = 10.0
|
| 178 |
+
GPT5_MINI_PRICE_INPUT_PER_1M = 0.25
|
| 179 |
+
GPT5_MINI_PRICE_OUTPUT_PER_1M = 2.0
|
| 180 |
+
|
| 181 |
+
teacher_cost = calc_cost_usd(
|
| 182 |
+
openai_model_teacher,
|
| 183 |
+
GPT5_PRICE_INPUT_PER_1M,
|
| 184 |
+
GPT5_PRICE_OUTPUT_PER_1M,
|
| 185 |
+
)
|
| 186 |
+
student_cost = calc_cost_usd(
|
| 187 |
+
openai_model_student,
|
| 188 |
+
GPT5_MINI_PRICE_INPUT_PER_1M,
|
| 189 |
+
GPT5_MINI_PRICE_OUTPUT_PER_1M,
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
cost_report = {
|
| 193 |
+
"gpt-5": teacher_cost,
|
| 194 |
+
"gpt-5-mini": student_cost,
|
| 195 |
+
}
|
| 196 |
+
folder_name="student-gpt5-mini_teacher-gpt5_v1"
|
| 197 |
+
os.makedirs(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}", exist_ok=True)
|
| 198 |
+
compiled_classifier.save(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}/model.json")
|
| 199 |
+
|
| 200 |
+
print(evaluation_result)
|
| 201 |
+
print(json.dumps(cost_report, indent=2))
|
| 202 |
+
with open(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}/accuracy.json", "w") as f:
|
| 203 |
+
json.dump(
|
| 204 |
+
{
|
| 205 |
+
"accuracy_score": accuracy_score,
|
| 206 |
+
"num_results": len(getattr(evaluation_result, "results", []) or []),
|
| 207 |
+
},
|
| 208 |
+
f,
|
| 209 |
+
indent=2,
|
| 210 |
+
)
|
| 211 |
+
with open(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}/cost.json", "w") as f:
|
| 212 |
+
json.dump(cost_report, f, indent=2)
|
code/text_classifier/text_classifier_dspy_vllm.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import dspy
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import random
|
| 5 |
+
from typing import Literal
|
| 6 |
+
from dspy.teleprompt import BootstrapFewShotWithRandomSearch
|
| 7 |
+
from dspy.evaluate import Evaluate
|
| 8 |
+
|
| 9 |
+
# --- 1. LLM Configuration ---
|
| 10 |
+
api_file = "/home/mshahidul/api_new.json"
|
| 11 |
+
with open(api_file, "r") as f:
|
| 12 |
+
api_keys = json.load(f)
|
| 13 |
+
openai_api_key = api_keys["openai"]
|
| 14 |
+
|
| 15 |
+
# Student: Local vLLM (Deployment Model)
|
| 16 |
+
vllm_model = dspy.LM(
|
| 17 |
+
model="openai/dspy",
|
| 18 |
+
api_base="http://172.16.34.29:8030/v1",
|
| 19 |
+
api_key="EMPTY",
|
| 20 |
+
temperature=0.0
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
# Teacher: OpenAI (High-quality rationale generation)
|
| 24 |
+
# Note: Ensure 'gpt-5' is the correct model name in your environment (usually 'gpt-4-turbo' or 'gpt-4o')
|
| 25 |
+
openai_model_teacher = dspy.LM(model="gpt-5", api_key=openai_api_key)
|
| 26 |
+
|
| 27 |
+
# Default LM for DSPy runtime
|
| 28 |
+
# Use the local vLLM for fast iteration.
|
| 29 |
+
dspy.configure(lm=vllm_model)
|
| 30 |
+
|
| 31 |
+
class HealthLiteracySignature(dspy.Signature):
|
| 32 |
+
"""
|
| 33 |
+
Analyze the linguistic complexity, use of medical jargon, and sentence
|
| 34 |
+
structure of 'generated_text' relative to 'full_text' to determine
|
| 35 |
+
the health literacy level.
|
| 36 |
+
"""
|
| 37 |
+
full_text = dspy.InputField(desc="Original clinical or medical source text containing jargon and technical details.")
|
| 38 |
+
generated_text = dspy.InputField(
|
| 39 |
+
desc="A version of the source text rewritten for a specific audience."
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
literacy_label = dspy.OutputField(
|
| 43 |
+
desc="Classification: low_health_literacy (simple words, no jargon), intermediate_health_literacy (moderate technicality), or proficient_health_literacy (highly technical/original level)."
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
class HealthLiteracyClassifier(dspy.Module):
|
| 47 |
+
def __init__(self):
|
| 48 |
+
super().__init__()
|
| 49 |
+
# Use ChainOfThought for better reasoning on medical jargon
|
| 50 |
+
self.classifier = dspy.ChainOfThought(HealthLiteracySignature)
|
| 51 |
+
|
| 52 |
+
def forward(self, full_text, generated_text):
|
| 53 |
+
return self.classifier(full_text=full_text, generated_text=generated_text)
|
| 54 |
+
|
| 55 |
+
def prepare_data(raw_data, seed=42, train_ratio=0.6):
|
| 56 |
+
labels = [
|
| 57 |
+
"low_health_literacy",
|
| 58 |
+
"intermediate_health_literacy",
|
| 59 |
+
"proficient_health_literacy",
|
| 60 |
+
]
|
| 61 |
+
rng = random.Random(seed)
|
| 62 |
+
buckets = {label: [] for label in labels}
|
| 63 |
+
for item in raw_data:
|
| 64 |
+
label = item.get("label")
|
| 65 |
+
if label not in buckets:
|
| 66 |
+
continue
|
| 67 |
+
example = dspy.Example(
|
| 68 |
+
full_text=item["fulltext"],
|
| 69 |
+
generated_text=item["diff_label_texts"],
|
| 70 |
+
literacy_label=label, # Matches the Signature field
|
| 71 |
+
).with_inputs("full_text", "generated_text")
|
| 72 |
+
buckets[label].append(example)
|
| 73 |
+
|
| 74 |
+
min_count = min(len(buckets[label]) for label in labels)
|
| 75 |
+
if min_count == 0:
|
| 76 |
+
raise ValueError("One or more labels has no examples; cannot balance.")
|
| 77 |
+
|
| 78 |
+
per_label_total = min_count
|
| 79 |
+
per_label_train = int(round(per_label_total * train_ratio))
|
| 80 |
+
per_label_train = max(1, min(per_label_train, per_label_total - 1))
|
| 81 |
+
|
| 82 |
+
trainset = []
|
| 83 |
+
testset = []
|
| 84 |
+
for label in labels:
|
| 85 |
+
rng.shuffle(buckets[label])
|
| 86 |
+
selected = buckets[label][:per_label_total]
|
| 87 |
+
trainset.extend(selected[:per_label_train])
|
| 88 |
+
testset.extend(selected[per_label_train:per_label_total])
|
| 89 |
+
|
| 90 |
+
rng.shuffle(trainset)
|
| 91 |
+
rng.shuffle(testset)
|
| 92 |
+
return trainset, testset
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
import json
|
| 96 |
+
path = "/home/mshahidul/readctrl/code/text_classifier/verified_combined_0-80.json"
|
| 97 |
+
raw_data = json.load(open(path))
|
| 98 |
+
trainset, testset = prepare_data(raw_data)
|
| 99 |
+
|
| 100 |
+
def _example_to_dict(example):
|
| 101 |
+
return {
|
| 102 |
+
"full_text": example.full_text,
|
| 103 |
+
"generated_text": example.generated_text,
|
| 104 |
+
"literacy_label": example.literacy_label,
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
def save_jsonl(path, examples):
|
| 108 |
+
with open(path, "w") as f:
|
| 109 |
+
for ex in examples:
|
| 110 |
+
f.write(json.dumps(_example_to_dict(ex), ensure_ascii=False) + "\n")
|
| 111 |
+
|
| 112 |
+
train_path = "/home/mshahidul/readctrl/code/text_classifier/train.jsonl"
|
| 113 |
+
test_path = "/home/mshahidul/readctrl/code/text_classifier/test.jsonl"
|
| 114 |
+
save_jsonl(train_path, trainset)
|
| 115 |
+
save_jsonl(test_path, testset)
|
| 116 |
+
|
| 117 |
+
def health_literacy_metric(gold, pred, trace=None):
|
| 118 |
+
if not pred or not hasattr(pred, 'literacy_label'):
|
| 119 |
+
return False
|
| 120 |
+
|
| 121 |
+
gold_label = str(gold.literacy_label).strip().lower()
|
| 122 |
+
pred_label = str(pred.literacy_label).strip().lower()
|
| 123 |
+
|
| 124 |
+
# Simple inclusion check helps if the LLM gets wordy
|
| 125 |
+
return gold_label in pred_label
|
| 126 |
+
|
| 127 |
+
optimizer = BootstrapFewShotWithRandomSearch(
|
| 128 |
+
metric=health_literacy_metric,
|
| 129 |
+
max_bootstrapped_demos=3,
|
| 130 |
+
num_candidate_programs=8,
|
| 131 |
+
teacher_settings=dict(lm=openai_model_teacher)
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
# 3. Compile! This creates the "optimized prompt"
|
| 135 |
+
compiled_classifier = optimizer.compile(HealthLiteracyClassifier(), trainset=trainset)
|
| 136 |
+
|
| 137 |
+
evaluator = Evaluate(devset=testset, metric=health_literacy_metric, num_threads=1, display_progress=True)
|
| 138 |
+
evaluation_result = evaluator(compiled_classifier)
|
| 139 |
+
accuracy_score = (
|
| 140 |
+
float(evaluation_result.score)
|
| 141 |
+
if hasattr(evaluation_result, "score")
|
| 142 |
+
else float(evaluation_result)
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
def _extract_usage(record):
|
| 146 |
+
if isinstance(record, dict):
|
| 147 |
+
usage = record.get("usage")
|
| 148 |
+
if usage:
|
| 149 |
+
return usage
|
| 150 |
+
response = record.get("response")
|
| 151 |
+
if isinstance(response, dict) and response.get("usage"):
|
| 152 |
+
return response["usage"]
|
| 153 |
+
return None
|
| 154 |
+
|
| 155 |
+
def calc_cost_usd(lm, price_in_per_1m, price_out_per_1m, price_cached_in_per_1m=None):
|
| 156 |
+
prompt_tokens = 0
|
| 157 |
+
completion_tokens = 0
|
| 158 |
+
cached_tokens = 0
|
| 159 |
+
for record in getattr(lm, "history", []) or []:
|
| 160 |
+
usage = _extract_usage(record)
|
| 161 |
+
if not usage:
|
| 162 |
+
continue
|
| 163 |
+
prompt_tokens += int(usage.get("prompt_tokens", usage.get("input_tokens", 0)) or 0)
|
| 164 |
+
completion_tokens += int(usage.get("completion_tokens", usage.get("output_tokens", 0)) or 0)
|
| 165 |
+
cached_tokens += int(usage.get("cached_tokens", usage.get("prompt_tokens_cached", 0)) or 0)
|
| 166 |
+
cost = (prompt_tokens / 1_000_000) * price_in_per_1m
|
| 167 |
+
cost += (completion_tokens / 1_000_000) * price_out_per_1m
|
| 168 |
+
if price_cached_in_per_1m is not None:
|
| 169 |
+
cost += (cached_tokens / 1_000_000) * price_cached_in_per_1m
|
| 170 |
+
return {
|
| 171 |
+
"prompt_tokens": prompt_tokens,
|
| 172 |
+
"completion_tokens": completion_tokens,
|
| 173 |
+
"cached_tokens": cached_tokens,
|
| 174 |
+
"cost_usd": cost,
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
# Fill these with current OpenAI pricing (USD per 1M tokens).
|
| 178 |
+
GPT5_PRICE_INPUT_PER_1M = 1.25
|
| 179 |
+
GPT5_PRICE_OUTPUT_PER_1M = 10.0
|
| 180 |
+
|
| 181 |
+
teacher_cost = calc_cost_usd(
|
| 182 |
+
openai_model_teacher,
|
| 183 |
+
GPT5_PRICE_INPUT_PER_1M,
|
| 184 |
+
GPT5_PRICE_OUTPUT_PER_1M,
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
cost_report = {
|
| 188 |
+
"gpt-5": teacher_cost,
|
| 189 |
+
}
|
| 190 |
+
folder_name = "vllm-qwen3-8b_teacher-gpt5_v1"
|
| 191 |
+
os.makedirs(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}", exist_ok=True)
|
| 192 |
+
compiled_classifier.save(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}/model.json")
|
| 193 |
+
|
| 194 |
+
print(evaluation_result)
|
| 195 |
+
|
| 196 |
+
with open(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}/accuracy.json", "w") as f:
|
| 197 |
+
json.dump(
|
| 198 |
+
{
|
| 199 |
+
"accuracy_score": accuracy_score,
|
| 200 |
+
"num_results": len(getattr(evaluation_result, "results", []) or []),
|
| 201 |
+
},
|
| 202 |
+
f,
|
| 203 |
+
indent=2,
|
| 204 |
+
)
|
| 205 |
+
print(json.dumps(cost_report, indent=2))
|
| 206 |
+
with open(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}/cost.json", "w") as f:
|
| 207 |
+
json.dump(cost_report, f, indent=2)
|
code/text_classifier/text_classifier_dspy_vllm_gen_text_only.py
ADDED
|
@@ -0,0 +1,203 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import dspy
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import random
|
| 5 |
+
from typing import Literal
|
| 6 |
+
from dspy.teleprompt import BootstrapFewShotWithRandomSearch
|
| 7 |
+
from dspy.evaluate import Evaluate
|
| 8 |
+
|
| 9 |
+
# --- 1. LLM Configuration ---
|
| 10 |
+
api_file = "/home/mshahidul/api_new.json"
|
| 11 |
+
with open(api_file, "r") as f:
|
| 12 |
+
api_keys = json.load(f)
|
| 13 |
+
openai_api_key = api_keys["openai"]
|
| 14 |
+
|
| 15 |
+
# Student: Local vLLM (Deployment Model)
|
| 16 |
+
vllm_model = dspy.LM(
|
| 17 |
+
model="openai/dspy",
|
| 18 |
+
api_base="http://172.16.34.21:8040/v1",
|
| 19 |
+
api_key="EMPTY",
|
| 20 |
+
temperature=0.0
|
| 21 |
+
)
|
| 22 |
+
folder_name = "vllm-llama-3.1-8b-awq-int4_teacher-gpt5_v1"
|
| 23 |
+
# Teacher: OpenAI (High-quality rationale generation)
|
| 24 |
+
# Note: Ensure 'gpt-5' is the correct model name in your environment (usually 'gpt-4-turbo' or 'gpt-4o')
|
| 25 |
+
openai_model_teacher = dspy.LM(model="gpt-5", api_key=openai_api_key)
|
| 26 |
+
|
| 27 |
+
# Default LM for DSPy runtime
|
| 28 |
+
# Use the local vLLM for fast iteration.
|
| 29 |
+
dspy.configure(lm=vllm_model)
|
| 30 |
+
|
| 31 |
+
class HealthLiteracySignature(dspy.Signature):
|
| 32 |
+
"""
|
| 33 |
+
Analyze the linguistic complexity, use of medical jargon, and sentence
|
| 34 |
+
structure of 'generated_text' to determine the health literacy level.
|
| 35 |
+
"""
|
| 36 |
+
generated_text = dspy.InputField(
|
| 37 |
+
desc="A version of the source text rewritten for a specific audience."
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
literacy_label = dspy.OutputField(
|
| 41 |
+
desc="Classification: low_health_literacy (simple words, no jargon), intermediate_health_literacy (moderate technicality), or proficient_health_literacy (highly technical/original level)."
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
class HealthLiteracyClassifier(dspy.Module):
|
| 45 |
+
def __init__(self):
|
| 46 |
+
super().__init__()
|
| 47 |
+
# Use ChainOfThought for better reasoning on medical jargon
|
| 48 |
+
self.classifier = dspy.ChainOfThought(HealthLiteracySignature)
|
| 49 |
+
|
| 50 |
+
def forward(self, generated_text):
|
| 51 |
+
return self.classifier(generated_text=generated_text)
|
| 52 |
+
|
| 53 |
+
def prepare_data(raw_data, seed=42, train_ratio=0.6):
|
| 54 |
+
labels = [
|
| 55 |
+
"low_health_literacy",
|
| 56 |
+
"intermediate_health_literacy",
|
| 57 |
+
"proficient_health_literacy",
|
| 58 |
+
]
|
| 59 |
+
rng = random.Random(seed)
|
| 60 |
+
buckets = {label: [] for label in labels}
|
| 61 |
+
for item in raw_data:
|
| 62 |
+
label = item.get("label")
|
| 63 |
+
if label not in buckets:
|
| 64 |
+
continue
|
| 65 |
+
example = dspy.Example(
|
| 66 |
+
generated_text=item["diff_label_texts"],
|
| 67 |
+
literacy_label=label, # Matches the Signature field
|
| 68 |
+
).with_inputs("generated_text")
|
| 69 |
+
buckets[label].append(example)
|
| 70 |
+
|
| 71 |
+
min_count = min(len(buckets[label]) for label in labels)
|
| 72 |
+
if min_count == 0:
|
| 73 |
+
raise ValueError("One or more labels has no examples; cannot balance.")
|
| 74 |
+
|
| 75 |
+
per_label_total = min_count
|
| 76 |
+
per_label_train = int(round(per_label_total * train_ratio))
|
| 77 |
+
per_label_train = max(1, min(per_label_train, per_label_total - 1))
|
| 78 |
+
|
| 79 |
+
trainset = []
|
| 80 |
+
testset = []
|
| 81 |
+
for label in labels:
|
| 82 |
+
rng.shuffle(buckets[label])
|
| 83 |
+
selected = buckets[label][:per_label_total]
|
| 84 |
+
trainset.extend(selected[:per_label_train])
|
| 85 |
+
testset.extend(selected[per_label_train:per_label_total])
|
| 86 |
+
|
| 87 |
+
rng.shuffle(trainset)
|
| 88 |
+
rng.shuffle(testset)
|
| 89 |
+
return trainset, testset
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
import json
|
| 93 |
+
path = "/home/mshahidul/readctrl/code/text_classifier/verified_combined_0-80.json"
|
| 94 |
+
raw_data = json.load(open(path))
|
| 95 |
+
trainset, testset = prepare_data(raw_data)
|
| 96 |
+
|
| 97 |
+
def _example_to_dict(example):
|
| 98 |
+
return {
|
| 99 |
+
"generated_text": example.generated_text,
|
| 100 |
+
"literacy_label": example.literacy_label,
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
def save_jsonl(path, examples):
|
| 104 |
+
with open(path, "w") as f:
|
| 105 |
+
for ex in examples:
|
| 106 |
+
f.write(json.dumps(_example_to_dict(ex), ensure_ascii=False) + "\n")
|
| 107 |
+
|
| 108 |
+
train_path = "/home/mshahidul/readctrl/code/text_classifier/train.jsonl"
|
| 109 |
+
test_path = "/home/mshahidul/readctrl/code/text_classifier/test.jsonl"
|
| 110 |
+
save_jsonl(train_path, trainset)
|
| 111 |
+
save_jsonl(test_path, testset)
|
| 112 |
+
|
| 113 |
+
def health_literacy_metric(gold, pred, trace=None):
|
| 114 |
+
if not pred or not hasattr(pred, 'literacy_label'):
|
| 115 |
+
return False
|
| 116 |
+
|
| 117 |
+
gold_label = str(gold.literacy_label).strip().lower()
|
| 118 |
+
pred_label = str(pred.literacy_label).strip().lower()
|
| 119 |
+
|
| 120 |
+
# Simple inclusion check helps if the LLM gets wordy
|
| 121 |
+
return gold_label in pred_label
|
| 122 |
+
|
| 123 |
+
optimizer = BootstrapFewShotWithRandomSearch(
|
| 124 |
+
metric=health_literacy_metric,
|
| 125 |
+
max_bootstrapped_demos=3,
|
| 126 |
+
num_candidate_programs=8,
|
| 127 |
+
teacher_settings=dict(lm=openai_model_teacher)
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
# 3. Compile! This creates the "optimized prompt"
|
| 131 |
+
compiled_classifier = optimizer.compile(HealthLiteracyClassifier(), trainset=trainset)
|
| 132 |
+
|
| 133 |
+
evaluator = Evaluate(devset=testset, metric=health_literacy_metric, num_threads=1, display_progress=True)
|
| 134 |
+
evaluation_result = evaluator(compiled_classifier)
|
| 135 |
+
accuracy_score = (
|
| 136 |
+
float(evaluation_result.score)
|
| 137 |
+
if hasattr(evaluation_result, "score")
|
| 138 |
+
else float(evaluation_result)
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
def _extract_usage(record):
|
| 142 |
+
if isinstance(record, dict):
|
| 143 |
+
usage = record.get("usage")
|
| 144 |
+
if usage:
|
| 145 |
+
return usage
|
| 146 |
+
response = record.get("response")
|
| 147 |
+
if isinstance(response, dict) and response.get("usage"):
|
| 148 |
+
return response["usage"]
|
| 149 |
+
return None
|
| 150 |
+
|
| 151 |
+
def calc_cost_usd(lm, price_in_per_1m, price_out_per_1m, price_cached_in_per_1m=None):
|
| 152 |
+
prompt_tokens = 0
|
| 153 |
+
completion_tokens = 0
|
| 154 |
+
cached_tokens = 0
|
| 155 |
+
for record in getattr(lm, "history", []) or []:
|
| 156 |
+
usage = _extract_usage(record)
|
| 157 |
+
if not usage:
|
| 158 |
+
continue
|
| 159 |
+
prompt_tokens += int(usage.get("prompt_tokens", usage.get("input_tokens", 0)) or 0)
|
| 160 |
+
completion_tokens += int(usage.get("completion_tokens", usage.get("output_tokens", 0)) or 0)
|
| 161 |
+
cached_tokens += int(usage.get("cached_tokens", usage.get("prompt_tokens_cached", 0)) or 0)
|
| 162 |
+
cost = (prompt_tokens / 1_000_000) * price_in_per_1m
|
| 163 |
+
cost += (completion_tokens / 1_000_000) * price_out_per_1m
|
| 164 |
+
if price_cached_in_per_1m is not None:
|
| 165 |
+
cost += (cached_tokens / 1_000_000) * price_cached_in_per_1m
|
| 166 |
+
return {
|
| 167 |
+
"prompt_tokens": prompt_tokens,
|
| 168 |
+
"completion_tokens": completion_tokens,
|
| 169 |
+
"cached_tokens": cached_tokens,
|
| 170 |
+
"cost_usd": cost,
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
# Fill these with current OpenAI pricing (USD per 1M tokens).
|
| 174 |
+
GPT5_PRICE_INPUT_PER_1M = 1.25
|
| 175 |
+
GPT5_PRICE_OUTPUT_PER_1M = 10.0
|
| 176 |
+
|
| 177 |
+
teacher_cost = calc_cost_usd(
|
| 178 |
+
openai_model_teacher,
|
| 179 |
+
GPT5_PRICE_INPUT_PER_1M,
|
| 180 |
+
GPT5_PRICE_OUTPUT_PER_1M,
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
cost_report = {
|
| 184 |
+
"gpt-5": teacher_cost,
|
| 185 |
+
}
|
| 186 |
+
|
| 187 |
+
os.makedirs(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}", exist_ok=True)
|
| 188 |
+
compiled_classifier.save(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}/model.json")
|
| 189 |
+
|
| 190 |
+
print(evaluation_result)
|
| 191 |
+
|
| 192 |
+
with open(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}/accuracy.json", "w") as f:
|
| 193 |
+
json.dump(
|
| 194 |
+
{
|
| 195 |
+
"accuracy_score": accuracy_score,
|
| 196 |
+
"num_results": len(getattr(evaluation_result, "results", []) or []),
|
| 197 |
+
},
|
| 198 |
+
f,
|
| 199 |
+
indent=2,
|
| 200 |
+
)
|
| 201 |
+
print(json.dumps(cost_report, indent=2))
|
| 202 |
+
with open(f"/home/mshahidul/readctrl/code/text_classifier/dspy_model/{folder_name}/cost.json", "w") as f:
|
| 203 |
+
json.dump(cost_report, f, indent=2)
|
code/text_classifier/text_classifier_dspy_vllm_test_cpp.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
|
| 4 |
+
import dspy
|
| 5 |
+
from dspy.evaluate import Evaluate
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
LLM_CPP_API_BASE = os.environ.get("LLM_CPP_API_BASE", "http://172.16.34.21:8034/v1")
|
| 9 |
+
MODEL_PATH = (
|
| 10 |
+
"/home/mshahidul/readctrl/code/text_classifier/dspy_model/vllm-Meta-Llama-3.1-8B-Instruct_teacher-gpt5_v1/model.json"
|
| 11 |
+
)
|
| 12 |
+
TEST_PATH = "/home/mshahidul/readctrl/code/text_classifier/test.jsonl"
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
llama_cpp_lm = dspy.LM(
|
| 16 |
+
model="openai/dspy",
|
| 17 |
+
api_base=LLM_CPP_API_BASE,
|
| 18 |
+
api_key="EMPTY",
|
| 19 |
+
temperature=0.0,
|
| 20 |
+
)
|
| 21 |
+
dspy.configure(lm=llama_cpp_lm)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
class HealthLiteracySignature(dspy.Signature):
|
| 25 |
+
"""
|
| 26 |
+
Analyze the linguistic complexity, use of medical jargon, and sentence
|
| 27 |
+
structure of 'generated_text' to determine the health literacy level.
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
generated_text = dspy.InputField(
|
| 31 |
+
desc="A version of the source text rewritten for a specific audience."
|
| 32 |
+
)
|
| 33 |
+
literacy_label = dspy.OutputField(
|
| 34 |
+
desc=(
|
| 35 |
+
"Classification: low_health_literacy (simple words, no jargon), "
|
| 36 |
+
"intermediate_health_literacy (moderate technicality), or "
|
| 37 |
+
"proficient_health_literacy (highly technical/original level)."
|
| 38 |
+
)
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class HealthLiteracyClassifier(dspy.Module):
|
| 43 |
+
def __init__(self):
|
| 44 |
+
super().__init__()
|
| 45 |
+
self.classifier = dspy.ChainOfThought(HealthLiteracySignature)
|
| 46 |
+
|
| 47 |
+
def forward(self, generated_text):
|
| 48 |
+
return self.classifier(generated_text=generated_text)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def load_testset(path):
|
| 52 |
+
examples = []
|
| 53 |
+
with open(path, "r") as f:
|
| 54 |
+
for line in f:
|
| 55 |
+
if not line.strip():
|
| 56 |
+
continue
|
| 57 |
+
record = json.loads(line)
|
| 58 |
+
example = dspy.Example(
|
| 59 |
+
generated_text=record["generated_text"],
|
| 60 |
+
literacy_label=record["literacy_label"],
|
| 61 |
+
).with_inputs("generated_text")
|
| 62 |
+
examples.append(example)
|
| 63 |
+
return examples
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def health_literacy_metric(gold, pred, trace=None):
|
| 67 |
+
if not pred or not hasattr(pred, "literacy_label"):
|
| 68 |
+
return False
|
| 69 |
+
|
| 70 |
+
gold_label = str(gold.literacy_label).strip().lower()
|
| 71 |
+
pred_label = str(pred.literacy_label).strip().lower()
|
| 72 |
+
return gold_label in pred_label
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def load_compiled_classifier(path):
|
| 76 |
+
if hasattr(dspy, "load"):
|
| 77 |
+
try:
|
| 78 |
+
return dspy.load(path)
|
| 79 |
+
except Exception:
|
| 80 |
+
pass
|
| 81 |
+
classifier = HealthLiteracyClassifier()
|
| 82 |
+
try:
|
| 83 |
+
classifier.load(path)
|
| 84 |
+
except Exception as exc:
|
| 85 |
+
raise RuntimeError(f"Failed to load compiled model from {path}") from exc
|
| 86 |
+
return classifier
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def main():
|
| 90 |
+
if not os.path.exists(MODEL_PATH):
|
| 91 |
+
raise FileNotFoundError(f"Model file not found: {MODEL_PATH}")
|
| 92 |
+
if not os.path.exists(TEST_PATH):
|
| 93 |
+
raise FileNotFoundError(f"Test file not found: {TEST_PATH}")
|
| 94 |
+
|
| 95 |
+
testset = load_testset(TEST_PATH)
|
| 96 |
+
compiled_classifier = load_compiled_classifier(MODEL_PATH)
|
| 97 |
+
|
| 98 |
+
evaluator = Evaluate(
|
| 99 |
+
devset=testset,
|
| 100 |
+
metric=health_literacy_metric,
|
| 101 |
+
num_threads=1,
|
| 102 |
+
display_progress=True,
|
| 103 |
+
)
|
| 104 |
+
evaluation_result = evaluator(compiled_classifier)
|
| 105 |
+
accuracy_score = (
|
| 106 |
+
float(evaluation_result.score)
|
| 107 |
+
if hasattr(evaluation_result, "score")
|
| 108 |
+
else float(evaluation_result)
|
| 109 |
+
)
|
| 110 |
+
print(evaluation_result)
|
| 111 |
+
print(f"accuracy_score: {accuracy_score}")
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
if __name__ == "__main__":
|
| 115 |
+
main()
|
code/translation_quality_check/calc_comet_bertscore_from_jsonl.py
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Compute BERTScore and COMET from saved translations.jsonl output.
|
| 4 |
+
|
| 5 |
+
Expected JSONL fields per row:
|
| 6 |
+
- target_language_file
|
| 7 |
+
- direction (e.g., en_to_es)
|
| 8 |
+
- source_text
|
| 9 |
+
- reference_text
|
| 10 |
+
- hypothesis_text
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import argparse
|
| 16 |
+
import csv
|
| 17 |
+
import json
|
| 18 |
+
import os
|
| 19 |
+
from collections import defaultdict
|
| 20 |
+
from pathlib import Path
|
| 21 |
+
from typing import Dict, List, Optional, Tuple
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def parse_args() -> argparse.Namespace:
|
| 25 |
+
parser = argparse.ArgumentParser(
|
| 26 |
+
description="Calculate COMET and BERTScore from translations.jsonl"
|
| 27 |
+
)
|
| 28 |
+
parser.add_argument(
|
| 29 |
+
"--input-jsonl",
|
| 30 |
+
default="/home/mshahidul/readctrl/code/translation_quality_check/run_20260214_201430/translations.jsonl",
|
| 31 |
+
help="Path to translations.jsonl",
|
| 32 |
+
)
|
| 33 |
+
parser.add_argument(
|
| 34 |
+
"--output-json",
|
| 35 |
+
default="",
|
| 36 |
+
help="Output JSON path (default: beside input as score_comet_bertscore.json)",
|
| 37 |
+
)
|
| 38 |
+
parser.add_argument(
|
| 39 |
+
"--output-csv",
|
| 40 |
+
default="",
|
| 41 |
+
help="Output CSV path (default: beside input as score_comet_bertscore.csv)",
|
| 42 |
+
)
|
| 43 |
+
parser.add_argument(
|
| 44 |
+
"--summary-csv",
|
| 45 |
+
default="",
|
| 46 |
+
help="Optional summary.csv to update with bertscore_f1 and comet",
|
| 47 |
+
)
|
| 48 |
+
parser.add_argument(
|
| 49 |
+
"--skip-bertscore",
|
| 50 |
+
action="store_true",
|
| 51 |
+
help="Skip BERTScore",
|
| 52 |
+
)
|
| 53 |
+
parser.add_argument(
|
| 54 |
+
"--skip-comet",
|
| 55 |
+
action="store_true",
|
| 56 |
+
help="Skip COMET",
|
| 57 |
+
)
|
| 58 |
+
parser.add_argument(
|
| 59 |
+
"--comet-model",
|
| 60 |
+
default="Unbabel/wmt22-comet-da",
|
| 61 |
+
help="COMET model name for download_model",
|
| 62 |
+
)
|
| 63 |
+
parser.add_argument(
|
| 64 |
+
"--batch-size",
|
| 65 |
+
type=int,
|
| 66 |
+
default=8,
|
| 67 |
+
help="Batch size for COMET prediction",
|
| 68 |
+
)
|
| 69 |
+
return parser.parse_args()
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def load_jsonl(path: Path) -> List[dict]:
|
| 73 |
+
rows: List[dict] = []
|
| 74 |
+
with path.open("r", encoding="utf-8") as f:
|
| 75 |
+
for line_no, line in enumerate(f, start=1):
|
| 76 |
+
line = line.strip()
|
| 77 |
+
if not line:
|
| 78 |
+
continue
|
| 79 |
+
try:
|
| 80 |
+
rows.append(json.loads(line))
|
| 81 |
+
except json.JSONDecodeError as exc:
|
| 82 |
+
raise ValueError(f"Invalid JSON at line {line_no} in {path}: {exc}") from exc
|
| 83 |
+
return rows
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def direction_target_lang(direction: str) -> str:
|
| 87 |
+
# Expected format: src_to_tgt
|
| 88 |
+
parts = direction.split("_to_")
|
| 89 |
+
if len(parts) != 2:
|
| 90 |
+
return "en"
|
| 91 |
+
return parts[1].strip().lower()
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def compute_bertscore(
|
| 95 |
+
hyps: List[str], refs: List[str], target_lang: str
|
| 96 |
+
) -> Optional[float]:
|
| 97 |
+
try:
|
| 98 |
+
from bert_score import score as bert_score_fn # type: ignore
|
| 99 |
+
except Exception as exc:
|
| 100 |
+
print(
|
| 101 |
+
"[WARN] Could not import bert_score. "
|
| 102 |
+
"Install with: pip install bert-score\n"
|
| 103 |
+
f" Details: {exc}"
|
| 104 |
+
)
|
| 105 |
+
return None
|
| 106 |
+
# BERTScore supports short language codes like en/es/fr/pt.
|
| 107 |
+
_, _, f1 = bert_score_fn(hyps, refs, lang=target_lang, verbose=False)
|
| 108 |
+
return round(float(f1.mean().item()), 6)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
def compute_comet(
|
| 112 |
+
srcs: List[str],
|
| 113 |
+
hyps: List[str],
|
| 114 |
+
refs: List[str],
|
| 115 |
+
model_name: str,
|
| 116 |
+
batch_size: int,
|
| 117 |
+
) -> Optional[float]:
|
| 118 |
+
try:
|
| 119 |
+
from comet import download_model, load_from_checkpoint # type: ignore
|
| 120 |
+
except Exception as exc:
|
| 121 |
+
print(
|
| 122 |
+
"[WARN] Could not import comet. "
|
| 123 |
+
"Install with: pip install unbabel-comet\n"
|
| 124 |
+
f" Details: {exc}"
|
| 125 |
+
)
|
| 126 |
+
return None
|
| 127 |
+
|
| 128 |
+
model_path = download_model(model_name)
|
| 129 |
+
comet_model = load_from_checkpoint(model_path)
|
| 130 |
+
data = [{"src": s, "mt": h, "ref": r} for s, h, r in zip(srcs, hyps, refs)]
|
| 131 |
+
result = comet_model.predict(
|
| 132 |
+
data,
|
| 133 |
+
batch_size=batch_size,
|
| 134 |
+
gpus=1 if os.environ.get("CUDA_VISIBLE_DEVICES") else 0,
|
| 135 |
+
)
|
| 136 |
+
return round(float(result.system_score), 6)
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def write_json(path: Path, payload: dict) -> None:
|
| 140 |
+
with path.open("w", encoding="utf-8") as f:
|
| 141 |
+
json.dump(payload, f, ensure_ascii=False, indent=2)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def write_csv(path: Path, rows: List[dict]) -> None:
|
| 145 |
+
cols = [
|
| 146 |
+
"language_file",
|
| 147 |
+
"direction",
|
| 148 |
+
"n_samples",
|
| 149 |
+
"bertscore_f1",
|
| 150 |
+
"comet",
|
| 151 |
+
]
|
| 152 |
+
with path.open("w", encoding="utf-8", newline="") as f:
|
| 153 |
+
writer = csv.DictWriter(f, fieldnames=cols)
|
| 154 |
+
writer.writeheader()
|
| 155 |
+
writer.writerows(rows)
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def maybe_update_summary_csv(summary_path: Path, metrics_rows: List[dict]) -> Path:
|
| 159 |
+
metric_lookup: Dict[Tuple[str, str], dict] = {
|
| 160 |
+
(row["language_file"], row["direction"]): row for row in metrics_rows
|
| 161 |
+
}
|
| 162 |
+
with summary_path.open("r", encoding="utf-8") as f:
|
| 163 |
+
reader = csv.DictReader(f)
|
| 164 |
+
src_rows = list(reader)
|
| 165 |
+
cols = list(reader.fieldnames or [])
|
| 166 |
+
|
| 167 |
+
if "bertscore_f1" not in cols:
|
| 168 |
+
cols.append("bertscore_f1")
|
| 169 |
+
if "comet" not in cols:
|
| 170 |
+
cols.append("comet")
|
| 171 |
+
|
| 172 |
+
out_rows: List[dict] = []
|
| 173 |
+
for row in src_rows:
|
| 174 |
+
key = (row.get("language_file", ""), row.get("direction", ""))
|
| 175 |
+
m = metric_lookup.get(key)
|
| 176 |
+
if m:
|
| 177 |
+
row["bertscore_f1"] = m.get("bertscore_f1", "")
|
| 178 |
+
row["comet"] = m.get("comet", "")
|
| 179 |
+
out_rows.append(row)
|
| 180 |
+
|
| 181 |
+
out_path = summary_path.with_name(f"{summary_path.stem}_with_comet_bertscore.csv")
|
| 182 |
+
with out_path.open("w", encoding="utf-8", newline="") as f:
|
| 183 |
+
writer = csv.DictWriter(f, fieldnames=cols)
|
| 184 |
+
writer.writeheader()
|
| 185 |
+
writer.writerows(out_rows)
|
| 186 |
+
return out_path
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def main() -> None:
|
| 190 |
+
args = parse_args()
|
| 191 |
+
input_path = Path(args.input_jsonl)
|
| 192 |
+
if not input_path.exists():
|
| 193 |
+
raise FileNotFoundError(f"Input not found: {input_path}")
|
| 194 |
+
|
| 195 |
+
out_json = (
|
| 196 |
+
Path(args.output_json)
|
| 197 |
+
if args.output_json
|
| 198 |
+
else input_path.with_name("score_comet_bertscore.json")
|
| 199 |
+
)
|
| 200 |
+
out_csv = (
|
| 201 |
+
Path(args.output_csv)
|
| 202 |
+
if args.output_csv
|
| 203 |
+
else input_path.with_name("score_comet_bertscore.csv")
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
rows = load_jsonl(input_path)
|
| 207 |
+
if not args.skip_bertscore:
|
| 208 |
+
print("[info] BERTScore enabled")
|
| 209 |
+
if not args.skip_comet:
|
| 210 |
+
print("[info] COMET enabled")
|
| 211 |
+
groups: Dict[Tuple[str, str], List[dict]] = defaultdict(list)
|
| 212 |
+
for r in rows:
|
| 213 |
+
lang_file = str(r.get("target_language_file", "")).strip()
|
| 214 |
+
direction = str(r.get("direction", "")).strip()
|
| 215 |
+
if not lang_file or not direction:
|
| 216 |
+
continue
|
| 217 |
+
groups[(lang_file, direction)].append(r)
|
| 218 |
+
|
| 219 |
+
score_rows: List[dict] = []
|
| 220 |
+
payload = {
|
| 221 |
+
"input_jsonl": str(input_path),
|
| 222 |
+
"scores": {},
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
for (lang_file, direction), group_rows in sorted(groups.items()):
|
| 226 |
+
srcs = [str(x.get("source_text", "")) for x in group_rows]
|
| 227 |
+
refs = [str(x.get("reference_text", "")) for x in group_rows]
|
| 228 |
+
hyps = [str(x.get("hypothesis_text", "")) for x in group_rows]
|
| 229 |
+
|
| 230 |
+
tgt_lang = direction_target_lang(direction)
|
| 231 |
+
bert = None if args.skip_bertscore else compute_bertscore(hyps, refs, tgt_lang)
|
| 232 |
+
comet = None
|
| 233 |
+
if not args.skip_comet:
|
| 234 |
+
comet = compute_comet(
|
| 235 |
+
srcs=srcs,
|
| 236 |
+
hyps=hyps,
|
| 237 |
+
refs=refs,
|
| 238 |
+
model_name=args.comet_model,
|
| 239 |
+
batch_size=args.batch_size,
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
row = {
|
| 243 |
+
"language_file": lang_file,
|
| 244 |
+
"direction": direction,
|
| 245 |
+
"n_samples": len(group_rows),
|
| 246 |
+
"bertscore_f1": bert if bert is not None else "",
|
| 247 |
+
"comet": comet if comet is not None else "",
|
| 248 |
+
}
|
| 249 |
+
score_rows.append(row)
|
| 250 |
+
payload["scores"].setdefault(lang_file, {})[direction] = {
|
| 251 |
+
"n_samples": len(group_rows),
|
| 252 |
+
"bertscore_f1": bert,
|
| 253 |
+
"comet": comet,
|
| 254 |
+
}
|
| 255 |
+
print(
|
| 256 |
+
f"[done] {lang_file} {direction}: "
|
| 257 |
+
f"bertscore_f1={row['bertscore_f1']} comet={row['comet']}"
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
write_json(out_json, payload)
|
| 261 |
+
write_csv(out_csv, score_rows)
|
| 262 |
+
print(f"\nSaved JSON: {out_json}")
|
| 263 |
+
print(f"Saved CSV: {out_csv}")
|
| 264 |
+
|
| 265 |
+
if args.summary_csv:
|
| 266 |
+
summary_path = Path(args.summary_csv)
|
| 267 |
+
if not summary_path.exists():
|
| 268 |
+
raise FileNotFoundError(f"summary.csv not found: {summary_path}")
|
| 269 |
+
merged_path = maybe_update_summary_csv(summary_path, score_rows)
|
| 270 |
+
print(f"Saved merged summary: {merged_path}")
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
if __name__ == "__main__":
|
| 274 |
+
main()
|
code/translation_quality_check/eval_gpt52_translation.py
ADDED
|
@@ -0,0 +1,438 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Evaluate GPT-5.2 translation quality on MultiClinSum files.
|
| 4 |
+
|
| 5 |
+
What this script does:
|
| 6 |
+
1) Loads EN/ES/FR/PT json files (expects fields like id/fulltext/summary)
|
| 7 |
+
2) Aligns EN with each non-EN language by shared numeric case id
|
| 8 |
+
3) Samples N aligned instances per language pair
|
| 9 |
+
4) Runs bidirectional translation with GPT-5.2:
|
| 10 |
+
- EN -> X
|
| 11 |
+
- X -> EN
|
| 12 |
+
5) Reports common MT metrics used in top venues:
|
| 13 |
+
- BLEU (sacreBLEU)
|
| 14 |
+
- chrF++ (sacreBLEU chrF)
|
| 15 |
+
- COMET (if installed)
|
| 16 |
+
- BERTScore F1 (if installed)
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
from __future__ import annotations
|
| 20 |
+
|
| 21 |
+
import argparse
|
| 22 |
+
import csv
|
| 23 |
+
import json
|
| 24 |
+
import os
|
| 25 |
+
import random
|
| 26 |
+
import re
|
| 27 |
+
import sys
|
| 28 |
+
import time
|
| 29 |
+
from dataclasses import dataclass
|
| 30 |
+
from datetime import datetime
|
| 31 |
+
from pathlib import Path
|
| 32 |
+
from typing import Dict, List, Optional
|
| 33 |
+
|
| 34 |
+
from openai import OpenAI
|
| 35 |
+
import sacrebleu
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
ID_NUM_RE = re.compile(r"_(\d+)\.txt$")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
@dataclass
|
| 42 |
+
class Example:
|
| 43 |
+
case_id: str
|
| 44 |
+
text: str
|
| 45 |
+
raw_id: str
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def parse_args() -> argparse.Namespace:
|
| 49 |
+
parser = argparse.ArgumentParser(description="GPT-5.2 translation evaluation")
|
| 50 |
+
parser.add_argument(
|
| 51 |
+
"--en-file",
|
| 52 |
+
default="/home/mshahidul/readctrl/data/testing_data_gs/multiclinsum_gs_train_en.json",
|
| 53 |
+
help="Path to English json file",
|
| 54 |
+
)
|
| 55 |
+
parser.add_argument(
|
| 56 |
+
"--es-file",
|
| 57 |
+
default="/home/mshahidul/readctrl/data/testing_data_gs/multiclinsum_gs_train_es.json",
|
| 58 |
+
help="Path to Spanish json file",
|
| 59 |
+
)
|
| 60 |
+
parser.add_argument(
|
| 61 |
+
"--fr-file",
|
| 62 |
+
default="/home/mshahidul/readctrl/data/testing_data_gs/multiclinsum_gs_train_fr.json",
|
| 63 |
+
help="Path to French json file",
|
| 64 |
+
)
|
| 65 |
+
parser.add_argument(
|
| 66 |
+
"--pt-file",
|
| 67 |
+
default="/home/mshahidul/readctrl/data/testing_data_gs/multiclinsum_gs_train_pt.json",
|
| 68 |
+
help="Path to Portuguese json file",
|
| 69 |
+
)
|
| 70 |
+
parser.add_argument(
|
| 71 |
+
"--num-samples",
|
| 72 |
+
type=int,
|
| 73 |
+
default=20,
|
| 74 |
+
help="Samples per language pair",
|
| 75 |
+
)
|
| 76 |
+
parser.add_argument("--seed", type=int, default=42, help="Random seed")
|
| 77 |
+
parser.add_argument(
|
| 78 |
+
"--model",
|
| 79 |
+
default="gpt-5.2",
|
| 80 |
+
help="OpenAI model name",
|
| 81 |
+
)
|
| 82 |
+
parser.add_argument(
|
| 83 |
+
"--max-chars",
|
| 84 |
+
type=int,
|
| 85 |
+
default=2500,
|
| 86 |
+
help="Character cap per sample to control cost/latency",
|
| 87 |
+
)
|
| 88 |
+
parser.add_argument(
|
| 89 |
+
"--api-file",
|
| 90 |
+
default="/home/mshahidul/api_new.json",
|
| 91 |
+
help="JSON file containing API keys (expects key 'openai')",
|
| 92 |
+
)
|
| 93 |
+
parser.add_argument(
|
| 94 |
+
"--output-dir",
|
| 95 |
+
default="/home/mshahidul/readctrl/code/translation_quality_check",
|
| 96 |
+
help="Directory to save outputs",
|
| 97 |
+
)
|
| 98 |
+
parser.add_argument(
|
| 99 |
+
"--skip-comet",
|
| 100 |
+
action="store_true",
|
| 101 |
+
help="Skip COMET even if installed",
|
| 102 |
+
)
|
| 103 |
+
parser.add_argument(
|
| 104 |
+
"--skip-bertscore",
|
| 105 |
+
action="store_true",
|
| 106 |
+
help="Skip BERTScore even if installed",
|
| 107 |
+
)
|
| 108 |
+
parser.add_argument(
|
| 109 |
+
"--temperature",
|
| 110 |
+
type=float,
|
| 111 |
+
default=0.0,
|
| 112 |
+
help="Decoding temperature",
|
| 113 |
+
)
|
| 114 |
+
parser.add_argument(
|
| 115 |
+
"--save-every",
|
| 116 |
+
type=int,
|
| 117 |
+
default=10,
|
| 118 |
+
help="Checkpoint save interval (in translated instances)",
|
| 119 |
+
)
|
| 120 |
+
return parser.parse_args()
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def load_json(path: str) -> List[dict]:
|
| 124 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 125 |
+
return json.load(f)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def normalize_case_id(raw_id: str) -> str:
|
| 129 |
+
m = ID_NUM_RE.search(raw_id)
|
| 130 |
+
if m:
|
| 131 |
+
return m.group(1)
|
| 132 |
+
return raw_id
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def dataset_to_examples(rows: List[dict], field: str) -> Dict[str, Example]:
|
| 136 |
+
out: Dict[str, Example] = {}
|
| 137 |
+
for row in rows:
|
| 138 |
+
raw_id = str(row.get("id", ""))
|
| 139 |
+
case_id = normalize_case_id(raw_id)
|
| 140 |
+
text = row.get(field)
|
| 141 |
+
if text is None:
|
| 142 |
+
text = row.get("summary") or row.get("fulltext") or ""
|
| 143 |
+
text = str(text).strip()
|
| 144 |
+
if not text:
|
| 145 |
+
continue
|
| 146 |
+
out[case_id] = Example(case_id=case_id, text=text, raw_id=raw_id)
|
| 147 |
+
return out
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def truncate_text(text: str, max_chars: int) -> str:
|
| 151 |
+
if max_chars <= 0:
|
| 152 |
+
return text
|
| 153 |
+
if len(text) <= max_chars:
|
| 154 |
+
return text
|
| 155 |
+
return text[:max_chars].rstrip() + " ..."
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def translate_one(
|
| 159 |
+
client: OpenAI,
|
| 160 |
+
model: str,
|
| 161 |
+
text: str,
|
| 162 |
+
src_lang_name: str,
|
| 163 |
+
tgt_lang_name: str,
|
| 164 |
+
temperature: float,
|
| 165 |
+
) -> str:
|
| 166 |
+
system = (
|
| 167 |
+
"You are a professional medical translator for clinical text. "
|
| 168 |
+
"Your top priority is fidelity and patient-safety: do not hallucinate, "
|
| 169 |
+
"do not add, remove, infer, or normalize medical content that is not explicitly present. "
|
| 170 |
+
"Preserve the original meaning, uncertainty, negation, severity, temporality, "
|
| 171 |
+
"numbers, units, dosages, lab values, abbreviations, named entities, and terminology. "
|
| 172 |
+
"If a term is ambiguous, keep the closest literal translation rather than guessing. "
|
| 173 |
+
"Keep formatting and sentence boundaries as close as possible to the source. "
|
| 174 |
+
"Return only the translated text, with no explanation."
|
| 175 |
+
)
|
| 176 |
+
user = (
|
| 177 |
+
f"Translate the following medical text from {src_lang_name} to {tgt_lang_name}.\n"
|
| 178 |
+
"Strict rules: no extra information, no paraphrased additions, no clinical assumptions.\n\n"
|
| 179 |
+
f"{text}"
|
| 180 |
+
)
|
| 181 |
+
response = client.responses.create(
|
| 182 |
+
model=model,
|
| 183 |
+
input=[
|
| 184 |
+
{"role": "system", "content": system},
|
| 185 |
+
{"role": "user", "content": user},
|
| 186 |
+
],
|
| 187 |
+
)
|
| 188 |
+
return response.output_text.strip()
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def compute_bleu_chrf(hypotheses: List[str], references: List[str]) -> Dict[str, float]:
|
| 192 |
+
bleu = sacrebleu.corpus_bleu(hypotheses, [references]).score
|
| 193 |
+
chrf = sacrebleu.corpus_chrf(hypotheses, [references]).score
|
| 194 |
+
return {"bleu": round(bleu, 4), "chrf++": round(chrf, 4)}
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def maybe_compute_bertscore(
|
| 198 |
+
hypotheses: List[str],
|
| 199 |
+
references: List[str],
|
| 200 |
+
target_lang: str,
|
| 201 |
+
) -> Optional[float]:
|
| 202 |
+
try:
|
| 203 |
+
from bert_score import score as bert_score_fn # type: ignore
|
| 204 |
+
except Exception:
|
| 205 |
+
return None
|
| 206 |
+
_, _, f1 = bert_score_fn(hypotheses, references, lang=target_lang, verbose=False)
|
| 207 |
+
return round(float(f1.mean().item()), 6)
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
def maybe_compute_comet(
|
| 211 |
+
sources: List[str],
|
| 212 |
+
hypotheses: List[str],
|
| 213 |
+
references: List[str],
|
| 214 |
+
) -> Optional[float]:
|
| 215 |
+
try:
|
| 216 |
+
from comet import download_model, load_from_checkpoint # type: ignore
|
| 217 |
+
except Exception:
|
| 218 |
+
return None
|
| 219 |
+
model_path = download_model("Unbabel/wmt22-comet-da")
|
| 220 |
+
comet_model = load_from_checkpoint(model_path)
|
| 221 |
+
data = [{"src": s, "mt": h, "ref": r} for s, h, r in zip(sources, hypotheses, references)]
|
| 222 |
+
result = comet_model.predict(data, batch_size=8, gpus=1 if os.environ.get("CUDA_VISIBLE_DEVICES") else 0)
|
| 223 |
+
return round(float(result.system_score), 6)
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def ensure_dir(path: str) -> None:
|
| 227 |
+
Path(path).mkdir(parents=True, exist_ok=True)
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
def persist_outputs(
|
| 231 |
+
json_path: Path,
|
| 232 |
+
details_path: Path,
|
| 233 |
+
csv_path: Path,
|
| 234 |
+
all_results: dict,
|
| 235 |
+
detailed_rows: List[dict],
|
| 236 |
+
summary_rows: List[dict],
|
| 237 |
+
) -> None:
|
| 238 |
+
with open(json_path, "w", encoding="utf-8") as f:
|
| 239 |
+
json.dump(all_results, f, ensure_ascii=False, indent=2)
|
| 240 |
+
|
| 241 |
+
with open(details_path, "w", encoding="utf-8") as f:
|
| 242 |
+
for row in detailed_rows:
|
| 243 |
+
f.write(json.dumps(row, ensure_ascii=False) + "\n")
|
| 244 |
+
|
| 245 |
+
cols = [
|
| 246 |
+
"language_file",
|
| 247 |
+
"direction",
|
| 248 |
+
"n_samples",
|
| 249 |
+
"bleu",
|
| 250 |
+
"chrf++",
|
| 251 |
+
"bertscore_f1",
|
| 252 |
+
"comet",
|
| 253 |
+
"elapsed_sec",
|
| 254 |
+
]
|
| 255 |
+
with open(csv_path, "w", encoding="utf-8", newline="") as f:
|
| 256 |
+
writer = csv.DictWriter(f, fieldnames=cols)
|
| 257 |
+
writer.writeheader()
|
| 258 |
+
if summary_rows:
|
| 259 |
+
writer.writerows(summary_rows)
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
def resolve_openai_api_key(api_file: str) -> str:
|
| 263 |
+
# Keep same loading pattern used in diff_label_text_creation_bangla.py.
|
| 264 |
+
with open(api_file, "r", encoding="utf-8") as f:
|
| 265 |
+
api_keys = json.load(f)
|
| 266 |
+
return str(api_keys["openai"])
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
def main() -> None:
|
| 270 |
+
args = parse_args()
|
| 271 |
+
api_key = resolve_openai_api_key(args.api_file)
|
| 272 |
+
|
| 273 |
+
rng = random.Random(args.seed)
|
| 274 |
+
client = OpenAI(api_key=api_key)
|
| 275 |
+
|
| 276 |
+
en_rows = load_json(args.en_file)
|
| 277 |
+
lang_files = {"es": args.es_file, "fr": args.fr_file, "pt": args.pt_file}
|
| 278 |
+
|
| 279 |
+
field = "fulltext"
|
| 280 |
+
en_map = dataset_to_examples(en_rows, field)
|
| 281 |
+
lang_maps = {
|
| 282 |
+
lang: dataset_to_examples(load_json(path), field)
|
| 283 |
+
for lang, path in lang_files.items()
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
lang_name = {"en": "English", "es": "Spanish", "fr": "French", "pt": "Portuguese"}
|
| 287 |
+
bert_lang = {"en": "en", "es": "es", "fr": "fr", "pt": "pt"}
|
| 288 |
+
|
| 289 |
+
timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S")
|
| 290 |
+
run_dir = Path(args.output_dir) / f"run_{timestamp}"
|
| 291 |
+
ensure_dir(str(run_dir))
|
| 292 |
+
|
| 293 |
+
all_results = {
|
| 294 |
+
"run_time_utc": datetime.utcnow().isoformat(),
|
| 295 |
+
"settings": {
|
| 296 |
+
"model": args.model,
|
| 297 |
+
"field": field,
|
| 298 |
+
"num_samples": args.num_samples,
|
| 299 |
+
"max_chars": args.max_chars,
|
| 300 |
+
"seed": args.seed,
|
| 301 |
+
"files": {
|
| 302 |
+
"en": args.en_file,
|
| 303 |
+
"es": args.es_file,
|
| 304 |
+
"fr": args.fr_file,
|
| 305 |
+
"pt": args.pt_file,
|
| 306 |
+
},
|
| 307 |
+
},
|
| 308 |
+
"scores": {},
|
| 309 |
+
}
|
| 310 |
+
|
| 311 |
+
detailed_rows: List[dict] = []
|
| 312 |
+
summary_rows: List[dict] = []
|
| 313 |
+
all_results["partial_scores"] = {}
|
| 314 |
+
|
| 315 |
+
json_path = run_dir / "scores.json"
|
| 316 |
+
details_path = run_dir / "translations.jsonl"
|
| 317 |
+
csv_path = run_dir / "summary.csv"
|
| 318 |
+
|
| 319 |
+
for tgt_lang, tgt_map in lang_maps.items():
|
| 320 |
+
common_ids = sorted(set(en_map.keys()) & set(tgt_map.keys()))
|
| 321 |
+
if not common_ids:
|
| 322 |
+
print(f"[WARN] No aligned IDs between en and {tgt_lang}. Skipping.")
|
| 323 |
+
continue
|
| 324 |
+
k = min(args.num_samples, len(common_ids))
|
| 325 |
+
sampled_ids = rng.sample(common_ids, k=k)
|
| 326 |
+
|
| 327 |
+
pair_results = {}
|
| 328 |
+
print(f"[INFO] Evaluating EN <-> {tgt_lang.upper()} with {k} samples")
|
| 329 |
+
|
| 330 |
+
directions = [("en", tgt_lang), (tgt_lang, "en")]
|
| 331 |
+
for src_lang, out_lang in directions:
|
| 332 |
+
sources: List[str] = []
|
| 333 |
+
refs: List[str] = []
|
| 334 |
+
hyps: List[str] = []
|
| 335 |
+
|
| 336 |
+
start = time.time()
|
| 337 |
+
for idx, case_id in enumerate(sampled_ids, start=1):
|
| 338 |
+
src_ex = en_map[case_id] if src_lang == "en" else tgt_map[case_id]
|
| 339 |
+
ref_ex = tgt_map[case_id] if out_lang == tgt_lang else en_map[case_id]
|
| 340 |
+
|
| 341 |
+
src_text = truncate_text(src_ex.text, args.max_chars)
|
| 342 |
+
ref_text = truncate_text(ref_ex.text, args.max_chars)
|
| 343 |
+
|
| 344 |
+
hyp = translate_one(
|
| 345 |
+
client=client,
|
| 346 |
+
model=args.model,
|
| 347 |
+
text=src_text,
|
| 348 |
+
src_lang_name=lang_name[src_lang],
|
| 349 |
+
tgt_lang_name=lang_name[out_lang],
|
| 350 |
+
temperature=args.temperature,
|
| 351 |
+
)
|
| 352 |
+
|
| 353 |
+
sources.append(src_text)
|
| 354 |
+
refs.append(ref_text)
|
| 355 |
+
hyps.append(hyp)
|
| 356 |
+
|
| 357 |
+
detailed_rows.append(
|
| 358 |
+
{
|
| 359 |
+
"target_language_file": tgt_lang,
|
| 360 |
+
"direction": f"{src_lang}_to_{out_lang}",
|
| 361 |
+
"case_id": case_id,
|
| 362 |
+
"src_raw_id": src_ex.raw_id,
|
| 363 |
+
"ref_raw_id": ref_ex.raw_id,
|
| 364 |
+
"source_text": src_text,
|
| 365 |
+
"reference_text": ref_text,
|
| 366 |
+
"hypothesis_text": hyp,
|
| 367 |
+
}
|
| 368 |
+
)
|
| 369 |
+
print(
|
| 370 |
+
f" [{src_lang}->{out_lang}] {idx}/{k} done "
|
| 371 |
+
f"(case_id={case_id})"
|
| 372 |
+
)
|
| 373 |
+
|
| 374 |
+
if args.save_every > 0 and (idx % args.save_every == 0):
|
| 375 |
+
partial_key = f"{tgt_lang}:{src_lang}_to_{out_lang}"
|
| 376 |
+
all_results["partial_scores"][partial_key] = {
|
| 377 |
+
"completed": idx,
|
| 378 |
+
"total": k,
|
| 379 |
+
**compute_bleu_chrf(hyps, refs),
|
| 380 |
+
}
|
| 381 |
+
persist_outputs(
|
| 382 |
+
json_path=json_path,
|
| 383 |
+
details_path=details_path,
|
| 384 |
+
csv_path=csv_path,
|
| 385 |
+
all_results=all_results,
|
| 386 |
+
detailed_rows=detailed_rows,
|
| 387 |
+
summary_rows=summary_rows,
|
| 388 |
+
)
|
| 389 |
+
print(
|
| 390 |
+
f" [checkpoint] saved at {idx}/{k} "
|
| 391 |
+
f"for {src_lang}->{out_lang}"
|
| 392 |
+
)
|
| 393 |
+
|
| 394 |
+
metric_dict = compute_bleu_chrf(hyps, refs)
|
| 395 |
+
if not args.skip_bertscore:
|
| 396 |
+
bs = maybe_compute_bertscore(hyps, refs, bert_lang[out_lang])
|
| 397 |
+
metric_dict["bertscore_f1"] = bs if bs is not None else None
|
| 398 |
+
if not args.skip_comet:
|
| 399 |
+
comet = maybe_compute_comet(sources, hyps, refs)
|
| 400 |
+
metric_dict["comet"] = comet if comet is not None else None
|
| 401 |
+
|
| 402 |
+
metric_dict["n_samples"] = k
|
| 403 |
+
metric_dict["elapsed_sec"] = round(time.time() - start, 2)
|
| 404 |
+
key = f"{src_lang}_to_{out_lang}"
|
| 405 |
+
pair_results[key] = metric_dict
|
| 406 |
+
|
| 407 |
+
summary_rows.append(
|
| 408 |
+
{
|
| 409 |
+
"language_file": tgt_lang,
|
| 410 |
+
"direction": key,
|
| 411 |
+
**metric_dict,
|
| 412 |
+
}
|
| 413 |
+
)
|
| 414 |
+
|
| 415 |
+
all_results["scores"][tgt_lang] = pair_results
|
| 416 |
+
|
| 417 |
+
persist_outputs(
|
| 418 |
+
json_path=json_path,
|
| 419 |
+
details_path=details_path,
|
| 420 |
+
csv_path=csv_path,
|
| 421 |
+
all_results=all_results,
|
| 422 |
+
detailed_rows=detailed_rows,
|
| 423 |
+
summary_rows=summary_rows,
|
| 424 |
+
)
|
| 425 |
+
|
| 426 |
+
print("\n=== Translation Evaluation Complete ===")
|
| 427 |
+
print(f"Run directory: {run_dir}")
|
| 428 |
+
print(f"Scores JSON: {json_path}")
|
| 429 |
+
print(f"Summary CSV: {csv_path}")
|
| 430 |
+
print(f"Details JSONL: {details_path}")
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
if __name__ == "__main__":
|
| 434 |
+
try:
|
| 435 |
+
main()
|
| 436 |
+
except KeyboardInterrupt:
|
| 437 |
+
print("\nInterrupted by user.")
|
| 438 |
+
sys.exit(130)
|
code/validation/data_gen_subclaims_support_valid_ch_gpt5.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from openai import OpenAI
|
| 2 |
+
import json, os
|
| 3 |
+
|
| 4 |
+
with open("/home/mshahidul/readctrl/prompts/subclaim_result_generate_gpt5.txt", "r") as f:
|
| 5 |
+
prompt_template = f.read()
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
api_file = "/home/mshahidul/api_new.json"
|
| 9 |
+
with open(api_file, "r") as f:
|
| 10 |
+
api_keys = json.load(f)
|
| 11 |
+
openai_api_key = api_keys["openai"]
|
| 12 |
+
|
| 13 |
+
client = OpenAI(api_key=openai_api_key)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def openai_return(prompt, model="gpt-5"):
|
| 17 |
+
"""Send a prompt to GPT and parse JSON."""
|
| 18 |
+
response = client.chat.completions.create(
|
| 19 |
+
model=model,
|
| 20 |
+
messages=[
|
| 21 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
| 22 |
+
{"role": "user", "content": prompt}
|
| 23 |
+
]
|
| 24 |
+
)
|
| 25 |
+
content = response.choices[0].message.content.strip()
|
| 26 |
+
cleaned = content.replace("```json", "").replace("```", "").strip()
|
| 27 |
+
try:
|
| 28 |
+
return json.loads(cleaned)
|
| 29 |
+
except json.JSONDecodeError:
|
| 30 |
+
print("⚠️ JSON parse failed — storing raw text.")
|
| 31 |
+
return cleaned
|
| 32 |
+
|
| 33 |
+
with open("/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_full_data.json", "r") as f:
|
| 34 |
+
data = json.load(f)
|
| 35 |
+
|
| 36 |
+
save_path="/home/mshahidul/readctrl/data/model_validity_check/subclaims_support_validity_check_gt_gpt5(1-5).json"
|
| 37 |
+
res=[]
|
| 38 |
+
if os.path.exists(save_path):
|
| 39 |
+
with open(save_path, "r") as f:
|
| 40 |
+
res = json.load(f)
|
| 41 |
+
import tqdm
|
| 42 |
+
for i in tqdm.tqdm(range(5)):
|
| 43 |
+
for label in ["easy", "intermediate", "hard"]:
|
| 44 |
+
new_prompt = prompt_template.replace("<<<DOCUMENT>>>",data[i]['fulltext']).replace("<<<SUBCLAIMS>>>", json.dumps(data[i][f'{label}_subclaims'], indent=2, ensure_ascii=False))
|
| 45 |
+
# import ipdb; ipdb.set_trace()
|
| 46 |
+
sample = openai_return(new_prompt, model="gpt-5")
|
| 47 |
+
|
| 48 |
+
res.append(sample)
|
| 49 |
+
if len(res) % 2 == 0:
|
| 50 |
+
with open(save_path, "w") as f:
|
| 51 |
+
json.dump(res, f, indent=2, ensure_ascii=False)
|
| 52 |
+
print(f"Saved {len(res)} samples so far.")
|
| 53 |
+
|
| 54 |
+
with open(save_path, "w") as f:
|
| 55 |
+
json.dump(res, f, indent=2, ensure_ascii=False)
|
| 56 |
+
|
code/validation/subclaims_extr_valid_check_gpt5.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from openai import OpenAI
|
| 2 |
+
import json, os
|
| 3 |
+
|
| 4 |
+
with open("/home/mshahidul/readctrl/prompts/subclaims_extraction_vali.txt", "r") as f:
|
| 5 |
+
prompt_template = f.read()
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
api_file = "/home/mshahidul/api_new.json"
|
| 9 |
+
with open(api_file, "r") as f:
|
| 10 |
+
api_keys = json.load(f)
|
| 11 |
+
openai_api_key = api_keys["openai"]
|
| 12 |
+
|
| 13 |
+
client = OpenAI(api_key=openai_api_key)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def openai_return(prompt, model="gpt-5"):
|
| 17 |
+
"""Send a prompt to GPT and parse JSON."""
|
| 18 |
+
response = client.chat.completions.create(
|
| 19 |
+
model=model,
|
| 20 |
+
messages=[
|
| 21 |
+
{"role": "system", "content": "You are a helpful assistant."},
|
| 22 |
+
{"role": "user", "content": prompt}
|
| 23 |
+
]
|
| 24 |
+
)
|
| 25 |
+
content = response.choices[0].message.content.strip()
|
| 26 |
+
cleaned = content.replace("```json", "").replace("```", "").strip()
|
| 27 |
+
try:
|
| 28 |
+
return json.loads(cleaned)
|
| 29 |
+
except json.JSONDecodeError:
|
| 30 |
+
print("⚠️ JSON parse failed — storing raw text.")
|
| 31 |
+
return cleaned
|
| 32 |
+
|
| 33 |
+
with open("/home/mshahidul/readctrl/data/extracting_subclaim/extracted_subclaims_full_data.json", "r") as f:
|
| 34 |
+
data = json.load(f)
|
| 35 |
+
|
| 36 |
+
save_path="/home/mshahidul/readctrl/data/model_validity_check/subclaims_validity_check_v1.json"
|
| 37 |
+
res=[]
|
| 38 |
+
if os.path.exists(save_path):
|
| 39 |
+
with open(save_path, "r") as f:
|
| 40 |
+
res = json.load(f)
|
| 41 |
+
import tqdm
|
| 42 |
+
for i in tqdm.tqdm(range(5)):
|
| 43 |
+
for label in ["easy", "intermediate", "hard"]:
|
| 44 |
+
new_prompt = prompt_template.replace("<<<TEXT>>>",data[i][f"{label}_text"]).replace("<<<SUBCLAIMS>>>", json.dumps(data[i][f"{label}_subclaims"], indent=2, ensure_ascii=False))
|
| 45 |
+
# import ipdb; ipdb.set_trace()
|
| 46 |
+
sample = openai_return(new_prompt, model="gpt-5")
|
| 47 |
+
|
| 48 |
+
res.append(sample)
|
| 49 |
+
if len(res) % 2 == 0:
|
| 50 |
+
with open(save_path, "w") as f:
|
| 51 |
+
json.dump(res, f, indent=2, ensure_ascii=False)
|
| 52 |
+
print(f"Saved {len(res)} samples so far.")
|
| 53 |
+
|
| 54 |
+
with open(save_path, "w") as f:
|
| 55 |
+
json.dump(res, f, indent=2, ensure_ascii=False)
|
| 56 |
+
|