Update src/model.py
Browse files- src/model.py +1112 -274
src/model.py
CHANGED
|
@@ -1,310 +1,1148 @@
|
|
| 1 |
-
import
|
| 2 |
import uuid
|
| 3 |
-
from datetime import datetime
|
| 4 |
import json
|
| 5 |
-
import os
|
| 6 |
import re
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
background-color: #1e1e24;
|
| 55 |
-
}
|
| 56 |
|
| 57 |
-
|
| 58 |
-
.
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
| 65 |
|
| 66 |
-
|
| 67 |
-
.
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
margin-bottom: 1rem;
|
| 71 |
-
display: flex;
|
| 72 |
-
flex-direction: column;
|
| 73 |
-
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1);
|
| 74 |
-
transition: all 0.2s ease-in-out;
|
| 75 |
-
}
|
| 76 |
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
border-radius: 12px;
|
| 94 |
-
padding: 8px 16px;
|
| 95 |
-
background: linear-gradient(135deg, #7A5FFF, #00D1B2);
|
| 96 |
-
color: white;
|
| 97 |
-
border: none;
|
| 98 |
-
transition: all 0.2s ease;
|
| 99 |
-
}
|
| 100 |
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
box-shadow: 0 4px 12px rgba(122, 95, 255, 0.4);
|
| 104 |
-
}
|
| 105 |
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
}
|
| 114 |
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
|
| 121 |
-
|
| 122 |
-
.
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
.block-container {
|
| 126 |
-
padding-top: 1rem;
|
| 127 |
-
}
|
| 128 |
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
color: #00D1B2;
|
| 132 |
-
text-decoration: none;
|
| 133 |
-
}
|
| 134 |
-
a:hover {
|
| 135 |
-
text-decoration: underline;
|
| 136 |
-
}
|
| 137 |
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
font-style: italic;
|
| 141 |
-
color: rgba(255, 107, 107, 0.8);
|
| 142 |
-
}
|
| 143 |
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
border-radius: 12px;
|
| 152 |
-
}
|
| 153 |
-
.toggle-label {
|
| 154 |
-
margin-right: 10px;
|
| 155 |
-
font-weight: 500;
|
| 156 |
-
color: #f0f0f0;
|
| 157 |
}
|
| 158 |
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
}
|
| 186 |
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
text-align: center;
|
| 192 |
-
padding: 10px;
|
| 193 |
-
position: fixed;
|
| 194 |
-
bottom: 0;
|
| 195 |
-
width: 100%;
|
| 196 |
-
z-index: 99;
|
| 197 |
}
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
if
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
if 'use_rag' not in st.session_state:
|
| 207 |
-
st.session_state.use_rag = True
|
| 208 |
-
|
| 209 |
-
# Helper function to check if explanation has meaningful content
|
| 210 |
-
def has_meaningful_content(text):
|
| 211 |
-
if not text:
|
| 212 |
-
return False
|
| 213 |
-
|
| 214 |
-
# Check if the text is just equal signs or other separators
|
| 215 |
-
stripped_text = text.strip()
|
| 216 |
-
if re.match(r'^[=\-_*]+$', stripped_text.replace('\n', '')):
|
| 217 |
-
return False
|
| 218 |
-
|
| 219 |
-
# Check if the text only contains "## REASONING" with no actual content
|
| 220 |
-
if "## REASONING" in stripped_text and len(stripped_text) < 20:
|
| 221 |
-
return False
|
| 222 |
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
-
#
|
| 226 |
-
|
| 227 |
-
if
|
| 228 |
-
|
| 229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
else:
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
#
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 256 |
else:
|
| 257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 258 |
|
| 259 |
-
#
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
|
| 267 |
-
#
|
| 268 |
-
|
|
|
|
| 269 |
|
| 270 |
-
#
|
| 271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
-
#
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
import uuid
|
|
|
|
| 3 |
import json
|
|
|
|
| 4 |
import re
|
| 5 |
+
import xml.etree.ElementTree as ET
|
| 6 |
+
from bs4 import BeautifulSoup
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
import os
|
| 9 |
+
import openai
|
| 10 |
+
import urllib.parse
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
|
| 13 |
+
# Load environment variables
|
| 14 |
+
load_dotenv()
|
| 15 |
+
|
| 16 |
+
# Initialize OpenAI API key
|
| 17 |
+
def get_openai_api_key():
|
| 18 |
+
"""Get OpenAI API key from environment variables"""
|
| 19 |
+
api_key = os.environ.get("OPENAI_API_KEY")
|
| 20 |
+
if not api_key:
|
| 21 |
+
raise ValueError("OPENAI_API_KEY environment variable is not set")
|
| 22 |
+
return api_key
|
| 23 |
+
|
| 24 |
+
# Set OpenAI API key
|
| 25 |
+
openai.api_key = get_openai_api_key()
|
| 26 |
+
|
| 27 |
+
# System prompts
|
| 28 |
+
SYSTEM_PROMPT = """You are an advanced clinical AI assistant designed to aid healthcare professionals.
|
| 29 |
+
Follow these guidelines in all responses:
|
| 30 |
+
|
| 31 |
+
1. **Clarify First**: Before providing any diagnosis or plan, if the user's query is underspecified, ALWAYS ask relevant clarifying questions to gather necessary patient information. This includes, but is not limited to, symptoms, duration, severity, medical history, age, lifestyle factors (diet, exercise), and current medications.
|
| 32 |
+
2. Professional tone: Maintain a clear, respectful, and professional tone appropriate for medical consultation.
|
| 33 |
+
3. Evidence-based practice: Base all responses on current medical evidence and guidelines.
|
| 34 |
+
4. Transparency: Clearly distinguish between established medical facts, clinical guidance, and areas of uncertainty.
|
| 35 |
+
5. Structured analysis: Present information in a clear, organized manner following clinical reasoning patterns.
|
| 36 |
+
6. Citation: Always cite specific sources for medical claims when available using the [source_id] format.
|
| 37 |
+
7. Limitations: Acknowledge the limits of AI medical advice and recommend in-person consultation when appropriate.
|
| 38 |
+
8. Comprehensive approach: Consider differential diagnoses and relevant contextual factors.
|
| 39 |
+
9. Patient-centered: Focus on clinically relevant information while maintaining respect for the patient.
|
| 40 |
+
|
| 41 |
+
For each consultation:
|
| 42 |
+
1. Ask clarifying questions if needed (as per guideline 1).
|
| 43 |
+
2. Provide differential diagnosis with likelihood assessment.
|
| 44 |
+
3. Suggest appropriate next steps (testing, treatment, referral).
|
| 45 |
+
4. Include reasoning for your conclusions.
|
| 46 |
+
5. Cite medical literature or guidelines supporting your assessment using [source_id].
|
| 47 |
+
|
| 48 |
+
IMPORTANT: Your primary duty is to support clinical decision-making, not replace clinical judgment.
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
FOLLOW_UP_PROMPT = """Continue this medical consultation based on the previous discussion.
|
| 52 |
+
Consider the information already gathered and the tentative diagnosis/plan.
|
| 53 |
+
|
| 54 |
+
When responding to the follow-up:
|
| 55 |
+
1. Reference relevant details from the prior conversation.
|
| 56 |
+
2. Address the specific follow-up question with evidence-based information.
|
| 57 |
+
3. If new information suggests a need for clarification, ask relevant questions.
|
| 58 |
+
4. Update recommendations if appropriate.
|
| 59 |
+
5. Maintain the same structured approach with transparent reasoning.
|
| 60 |
+
6. Cite additional medical literature or guidelines when relevant using [source_id].
|
| 61 |
+
|
| 62 |
+
Remember that this is an ongoing consultation where continuity of care is important.
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
# Function to extract source IDs and replace them with actual links
|
| 66 |
+
def extract_and_link_sources(text, evidence_snippets):
|
| 67 |
+
"""Replace [source_id] placeholders with actual source information"""
|
| 68 |
+
source_pattern = r'\[([\w\d:_\-\.+]+)\]' # Expanded to handle more characters including +
|
| 69 |
+
matches = re.findall(source_pattern, text)
|
| 70 |
+
|
| 71 |
+
source_map = {} # Map to store source_id -> source data
|
| 72 |
+
|
| 73 |
+
# First, try direct ID matches (most reliable)
|
| 74 |
+
for source_id_match in matches:
|
| 75 |
+
for snippet in evidence_snippets:
|
| 76 |
+
if source_id_match == snippet["id"]:
|
| 77 |
+
source_map[source_id_match] = {
|
| 78 |
+
"id": snippet["id"],
|
| 79 |
+
"title": snippet["title"].strip(),
|
| 80 |
+
"url": snippet["url"],
|
| 81 |
+
"citation": snippet["citation"]
|
| 82 |
+
}
|
| 83 |
+
break
|
| 84 |
|
| 85 |
+
# Next, try fuzzy matching for cases where the exact ID isn't matched
|
| 86 |
+
for source_id_match in matches:
|
| 87 |
+
if source_id_match not in source_map and source_id_match != "source_id":
|
| 88 |
+
for snippet in evidence_snippets:
|
| 89 |
+
# Try to match on partial IDs (e.g. part before a hyphen)
|
| 90 |
+
snippet_id_parts = snippet["id"].split("-")
|
| 91 |
+
source_id_parts = source_id_match.split("-")
|
| 92 |
+
|
| 93 |
+
# Check if the first parts match (journal name)
|
| 94 |
+
if (snippet_id_parts and source_id_parts and
|
| 95 |
+
snippet_id_parts[0] == source_id_parts[0]):
|
| 96 |
+
source_map[source_id_match] = {
|
| 97 |
+
"id": snippet["id"],
|
| 98 |
+
"title": snippet["title"].strip(),
|
| 99 |
+
"url": snippet["url"],
|
| 100 |
+
"citation": snippet["citation"]
|
| 101 |
+
}
|
| 102 |
+
break
|
| 103 |
|
| 104 |
+
# Handle generic [source_id] placeholder
|
| 105 |
+
if "source_id" in matches:
|
| 106 |
+
# Use the first snippet available if we have any
|
| 107 |
+
if evidence_snippets and "source_id" not in source_map:
|
| 108 |
+
snippet = evidence_snippets[0] # Use the first snippet
|
| 109 |
+
if snippet.get("url") and snippet.get("title"):
|
| 110 |
+
source_map["source_id"] = {
|
| 111 |
+
"id": snippet["id"],
|
| 112 |
+
"title": snippet["title"].strip(),
|
| 113 |
+
"url": snippet["url"],
|
| 114 |
+
"citation": snippet["citation"]
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
# Replace source_id placeholders with actual links in the text
|
| 118 |
+
linked_text = text
|
| 119 |
+
for source_id_key, source_data in source_map.items():
|
| 120 |
+
safe_id = re.escape(source_id_key)
|
| 121 |
+
pattern = f"\\[{safe_id}\\]"
|
| 122 |
+
replacement = f"[{source_data['title']}]({source_data['url']})"
|
| 123 |
+
linked_text = re.sub(pattern, replacement, linked_text)
|
| 124 |
|
| 125 |
+
# Handle remaining [source_id] placeholders
|
| 126 |
+
if "source_id" in source_map and "[source_id]" in linked_text:
|
| 127 |
+
generic_data = source_map["source_id"]
|
| 128 |
+
replacement = f"[{generic_data['title']}]({generic_data['url']})"
|
| 129 |
+
linked_text = re.sub(r'\[source_id\]', replacement, linked_text)
|
|
|
|
|
|
|
| 130 |
|
| 131 |
+
# Final fallback for any [source_id] not mapped at all
|
| 132 |
+
linked_text = re.sub(r'\[source_id\]', "[Medical Reference]", linked_text)
|
| 133 |
+
|
| 134 |
+
return linked_text, source_map
|
| 135 |
+
|
| 136 |
+
# Implement PubMed API integration for medical evidence retrieval
|
| 137 |
+
def fetch_from_pubmed_api(query, max_results=3, api_key=None):
|
| 138 |
+
"""Fetch medical evidence from PubMed API using E-utilities"""
|
| 139 |
+
results = []
|
| 140 |
|
| 141 |
+
# Clean up the query for better results
|
| 142 |
+
cleaned_query = re.sub(r'^(hi|hello|hey|greetings|good morning|good afternoon|good evening)[,\.]?\s+', '', query.lower())
|
| 143 |
+
cleaned_query = re.sub(r"(i'?m|i am)\s+a\s+\d+[-\s]year[-\s]old", '', cleaned_query)
|
| 144 |
+
cleaned_query = re.sub(r'(my name is|i am|i have been|i\'ve been|i was|i have|i\'ve had|i feel|i\'m feeling|i experienced)', '', cleaned_query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
+
# Try to extract key medical symptoms
|
| 147 |
+
symptom_patterns = [
|
| 148 |
+
r'(muscle weakness)', r'(fatigue)', r'(rash)', r'(pain)', r'(swelling)',
|
| 149 |
+
r'(difficulty breathing|shortness of breath)', r'(fever)', r'(headache)',
|
| 150 |
+
r'(nausea|vomiting)', r'(dizziness)', r'(numbness)', r'(tingling)'
|
| 151 |
+
]
|
| 152 |
|
| 153 |
+
medical_terms = []
|
| 154 |
+
for pattern in symptom_patterns:
|
| 155 |
+
matches = re.findall(pattern, query.lower())
|
| 156 |
+
if matches:
|
| 157 |
+
medical_terms.extend(matches)
|
| 158 |
|
| 159 |
+
# If we found medical terms, prioritize them in the search
|
| 160 |
+
if medical_terms:
|
| 161 |
+
search_query = " AND ".join(medical_terms)
|
| 162 |
+
# Add the complete cleaned query as a less weighted part
|
| 163 |
+
if cleaned_query:
|
| 164 |
+
search_query = f"({search_query}) OR ({cleaned_query})"
|
| 165 |
+
else:
|
| 166 |
+
# If no medical terms found, use the cleaned query
|
| 167 |
+
search_query = cleaned_query
|
| 168 |
|
| 169 |
+
# Encode the query for the API
|
| 170 |
+
encoded_query = urllib.parse.quote(search_query)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
+
# Base URL for PubMed E-utilities
|
| 173 |
+
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
|
|
|
|
|
|
| 174 |
|
| 175 |
+
# Search parameters
|
| 176 |
+
search_params = {
|
| 177 |
+
"db": "pubmed",
|
| 178 |
+
"term": encoded_query,
|
| 179 |
+
"retmax": max_results,
|
| 180 |
+
"retmode": "json",
|
| 181 |
+
"sort": "relevance"
|
| 182 |
}
|
| 183 |
|
| 184 |
+
# Add API key if provided (increases rate limits)
|
| 185 |
+
if api_key:
|
| 186 |
+
search_params["api_key"] = api_key
|
| 187 |
+
|
| 188 |
+
try:
|
| 189 |
+
# First get article IDs
|
| 190 |
+
search_response = requests.get(f"{base_url}esearch.fcgi", params=search_params)
|
| 191 |
+
|
| 192 |
+
if search_response.status_code != 200:
|
| 193 |
+
return []
|
| 194 |
+
|
| 195 |
+
search_data = search_response.json()
|
| 196 |
+
|
| 197 |
+
if "esearchresult" in search_data and "idlist" in search_data["esearchresult"]:
|
| 198 |
+
ids = search_data["esearchresult"]["idlist"]
|
| 199 |
+
|
| 200 |
+
if ids:
|
| 201 |
+
# Fetch article details
|
| 202 |
+
fetch_params = {
|
| 203 |
+
"db": "pubmed",
|
| 204 |
+
"id": ",".join(ids),
|
| 205 |
+
"retmode": "xml"
|
| 206 |
+
}
|
| 207 |
+
if api_key:
|
| 208 |
+
fetch_params["api_key"] = api_key
|
| 209 |
+
|
| 210 |
+
fetch_response = requests.get(f"{base_url}efetch.fcgi", params=fetch_params)
|
| 211 |
+
|
| 212 |
+
if fetch_response.status_code != 200:
|
| 213 |
+
return []
|
| 214 |
+
|
| 215 |
+
try:
|
| 216 |
+
# Parse XML response
|
| 217 |
+
root = ET.fromstring(fetch_response.text)
|
| 218 |
+
|
| 219 |
+
for article in root.findall(".//PubmedArticle"):
|
| 220 |
+
try:
|
| 221 |
+
pmid = article.findtext(".//PMID")
|
| 222 |
+
title = article.findtext(".//ArticleTitle") or "No title available"
|
| 223 |
+
|
| 224 |
+
# Extract abstract
|
| 225 |
+
abstract_elements = article.findall(".//AbstractText")
|
| 226 |
+
abstract = " ".join([(elem.text or "") for elem in abstract_elements])
|
| 227 |
+
|
| 228 |
+
# Extract authors
|
| 229 |
+
authors = []
|
| 230 |
+
for author in article.findall(".//Author"):
|
| 231 |
+
last_name = author.findtext(".//LastName") or ""
|
| 232 |
+
initials = author.findtext(".//Initials") or ""
|
| 233 |
+
if last_name and initials:
|
| 234 |
+
authors.append(f"{last_name} {initials}")
|
| 235 |
+
|
| 236 |
+
author_str = ", ".join(authors[:3])
|
| 237 |
+
if len(authors) > 3:
|
| 238 |
+
author_str += " et al."
|
| 239 |
+
|
| 240 |
+
# Extract journal and date
|
| 241 |
+
journal = article.findtext(".//Journal/Title") or "Journal not specified"
|
| 242 |
+
year = article.findtext(".//PubDate/Year") or "N/A"
|
| 243 |
+
|
| 244 |
+
# Create citation
|
| 245 |
+
citation = f"{author_str}. ({year}). {title}. {journal}. PMID: {pmid}"
|
| 246 |
+
|
| 247 |
+
# Create direct access URL
|
| 248 |
+
url = f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/"
|
| 249 |
+
|
| 250 |
+
# Check if free full text is available via PMC
|
| 251 |
+
pmc_id = article.findtext(".//ArticleId[@IdType='pmc']")
|
| 252 |
+
has_free_text = bool(pmc_id) or article.findtext(".//PublicationStatus") == "epublish"
|
| 253 |
+
|
| 254 |
+
# If PMC ID is available, use that URL instead as it provides full text
|
| 255 |
+
if pmc_id:
|
| 256 |
+
url = f"https://www.ncbi.nlm.nih.gov/pmc/articles/{pmc_id}/"
|
| 257 |
+
|
| 258 |
+
results.append({
|
| 259 |
+
"id": f"pubmed:{pmid}",
|
| 260 |
+
"title": title,
|
| 261 |
+
"text": abstract[:800] + "..." if len(abstract) > 800 else abstract,
|
| 262 |
+
"citation": citation,
|
| 263 |
+
"url": url,
|
| 264 |
+
"source_type": "PubMed" + (" (Free Full Text)" if has_free_text else ""),
|
| 265 |
+
"is_open_access": has_free_text
|
| 266 |
+
})
|
| 267 |
+
except Exception:
|
| 268 |
+
continue
|
| 269 |
+
except ET.ParseError:
|
| 270 |
+
return []
|
| 271 |
+
|
| 272 |
+
return results
|
| 273 |
+
except Exception:
|
| 274 |
+
return []
|
| 275 |
+
|
| 276 |
+
def fetch_from_pmc_api(query, max_results=2, api_key=None):
|
| 277 |
+
"""Fetch free full text articles from PubMed Central (PMC)"""
|
| 278 |
+
results = []
|
| 279 |
|
| 280 |
+
# Clean up the query for better results
|
| 281 |
+
cleaned_query = re.sub(r'^(hi|hello|hey|greetings|good morning|good afternoon|good evening)[,\.]?\s+', '', query.lower())
|
| 282 |
+
cleaned_query = re.sub(r"(i'?m|i am)\s+a\s+\d+[-\s]year[-\s]old", '', cleaned_query)
|
| 283 |
+
cleaned_query = re.sub(r'(my name is|i am|i have been|i\'ve been|i was|i have|i\'ve had|i feel|i\'m feeling|i experienced)', '', cleaned_query)
|
|
|
|
|
|
|
|
|
|
| 284 |
|
| 285 |
+
# Encode for API
|
| 286 |
+
encoded_query = urllib.parse.quote(cleaned_query + " AND free full text[filter]")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
|
| 288 |
+
# Base URL for E-utilities
|
| 289 |
+
base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"
|
|
|
|
|
|
|
|
|
|
| 290 |
|
| 291 |
+
# Search parameters - specifically targeting PMC for open access articles
|
| 292 |
+
search_params = {
|
| 293 |
+
"db": "pmc",
|
| 294 |
+
"term": encoded_query,
|
| 295 |
+
"retmax": max_results,
|
| 296 |
+
"retmode": "json",
|
| 297 |
+
"sort": "relevance"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 298 |
}
|
| 299 |
|
| 300 |
+
# Add API key if provided
|
| 301 |
+
if api_key:
|
| 302 |
+
search_params["api_key"] = api_key
|
| 303 |
+
|
| 304 |
+
try:
|
| 305 |
+
# First get article IDs
|
| 306 |
+
search_response = requests.get(f"{base_url}esearch.fcgi", params=search_params)
|
| 307 |
+
|
| 308 |
+
if search_response.status_code != 200:
|
| 309 |
+
return []
|
| 310 |
+
|
| 311 |
+
search_data = search_response.json()
|
| 312 |
+
|
| 313 |
+
if "esearchresult" in search_data and "idlist" in search_data["esearchresult"]:
|
| 314 |
+
ids = search_data["esearchresult"]["idlist"]
|
| 315 |
+
|
| 316 |
+
if ids:
|
| 317 |
+
# Fetch article details
|
| 318 |
+
fetch_params = {
|
| 319 |
+
"db": "pmc",
|
| 320 |
+
"id": ",".join(ids),
|
| 321 |
+
"retmode": "xml"
|
| 322 |
+
}
|
| 323 |
+
if api_key:
|
| 324 |
+
fetch_params["api_key"] = api_key
|
| 325 |
+
|
| 326 |
+
fetch_response = requests.get(f"{base_url}efetch.fcgi", params=fetch_params)
|
| 327 |
+
|
| 328 |
+
if fetch_response.status_code != 200:
|
| 329 |
+
return []
|
| 330 |
+
|
| 331 |
+
try:
|
| 332 |
+
# Parse XML response for PMC articles
|
| 333 |
+
root = ET.fromstring(fetch_response.text)
|
| 334 |
+
|
| 335 |
+
for article in root.findall(".//article"):
|
| 336 |
+
try:
|
| 337 |
+
# Get PMC ID
|
| 338 |
+
article_id_elements = article.findall(".//article-id")
|
| 339 |
+
pmc_id = None
|
| 340 |
+
for id_elem in article_id_elements:
|
| 341 |
+
if id_elem.get("pub-id-type") == "pmc":
|
| 342 |
+
pmc_id = id_elem.text
|
| 343 |
+
|
| 344 |
+
if not pmc_id:
|
| 345 |
+
continue
|
| 346 |
+
|
| 347 |
+
# Get article title
|
| 348 |
+
title_elem = article.find(".//article-title")
|
| 349 |
+
title = "".join(title_elem.itertext()) if title_elem is not None else "No title available"
|
| 350 |
+
|
| 351 |
+
# Extract abstract
|
| 352 |
+
abstract_elem = article.find(".//abstract")
|
| 353 |
+
abstract = ""
|
| 354 |
+
if abstract_elem is not None:
|
| 355 |
+
for p in abstract_elem.findall(".//p"):
|
| 356 |
+
abstract += " ".join(p.itertext()) + " "
|
| 357 |
+
|
| 358 |
+
# If no abstract, try to get from first paragraphs
|
| 359 |
+
if not abstract:
|
| 360 |
+
body = article.find(".//body")
|
| 361 |
+
if body is not None:
|
| 362 |
+
paragraphs = body.findall(".//p")
|
| 363 |
+
abstract = " ".join([" ".join(p.itertext()) for p in paragraphs[:3]])
|
| 364 |
+
|
| 365 |
+
# Extract journal and date information
|
| 366 |
+
journal_elem = article.find(".//journal-title")
|
| 367 |
+
journal = "".join(journal_elem.itertext()) if journal_elem is not None else "PMC Journal"
|
| 368 |
+
|
| 369 |
+
year_elem = article.find(".//pub-date/year")
|
| 370 |
+
year = year_elem.text if year_elem is not None else "N/A"
|
| 371 |
+
|
| 372 |
+
# Extract authors
|
| 373 |
+
authors = []
|
| 374 |
+
for contrib in article.findall(".//contrib[@contrib-type='author']"):
|
| 375 |
+
surname = contrib.find(".//surname")
|
| 376 |
+
given_names = contrib.find(".//given-names")
|
| 377 |
+
if surname is not None and given_names is not None:
|
| 378 |
+
authors.append(f"{surname.text} {given_names.text[0] if given_names.text else ''}")
|
| 379 |
+
|
| 380 |
+
author_str = ", ".join(authors[:3])
|
| 381 |
+
if len(authors) > 3:
|
| 382 |
+
author_str += " et al."
|
| 383 |
+
|
| 384 |
+
# Create citation
|
| 385 |
+
citation = f"{author_str}. ({year}). {title}. {journal}. PMC{pmc_id}"
|
| 386 |
+
|
| 387 |
+
# Create URL for direct access to full text
|
| 388 |
+
url = f"https://www.ncbi.nlm.nih.gov/pmc/articles/PMC{pmc_id}/"
|
| 389 |
+
|
| 390 |
+
results.append({
|
| 391 |
+
"id": f"pmc:{pmc_id}",
|
| 392 |
+
"title": title,
|
| 393 |
+
"text": abstract[:800] + "..." if len(abstract) > 800 else abstract,
|
| 394 |
+
"citation": citation,
|
| 395 |
+
"url": url,
|
| 396 |
+
"source_type": "PubMed Central (Open Access)",
|
| 397 |
+
"is_open_access": True
|
| 398 |
+
})
|
| 399 |
+
except Exception:
|
| 400 |
+
continue
|
| 401 |
+
except ET.ParseError:
|
| 402 |
+
return []
|
| 403 |
+
|
| 404 |
+
return results
|
| 405 |
+
except Exception:
|
| 406 |
+
return []
|
| 407 |
+
|
| 408 |
+
def fetch_from_who_api(query, max_results=1):
|
| 409 |
+
"""Fetch information from WHO guidelines - using web scraping as alternative to API"""
|
| 410 |
+
try:
|
| 411 |
+
# WHO search URL (as they don't have a public API, we use web scraping)
|
| 412 |
+
search_url = f"https://www.who.int/publications/search-results?indexTerms={query.replace(' ', '+')}"
|
| 413 |
+
response = requests.get(search_url)
|
| 414 |
+
|
| 415 |
+
if response.status_code == 200:
|
| 416 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
| 417 |
+
results = []
|
| 418 |
+
|
| 419 |
+
# Extract article information
|
| 420 |
+
articles = soup.select('.search-results article')[:max_results]
|
| 421 |
+
|
| 422 |
+
for article in articles:
|
| 423 |
+
title_elem = article.select_one('h3')
|
| 424 |
+
title = title_elem.text.strip() if title_elem else "WHO Guideline"
|
| 425 |
+
|
| 426 |
+
desc_elem = article.select_one('.search-description')
|
| 427 |
+
description = desc_elem.text.strip() if desc_elem else ""
|
| 428 |
+
|
| 429 |
+
link_elem = article.select_one('a')
|
| 430 |
+
link = "https://www.who.int" + link_elem['href'] if link_elem and 'href' in link_elem.attrs else ""
|
| 431 |
+
|
| 432 |
+
date_elem = article.select_one('.search-meta')
|
| 433 |
+
date = date_elem.text.strip() if date_elem else "Date not specified"
|
| 434 |
+
|
| 435 |
+
# Generate a unique ID based on the URL
|
| 436 |
+
who_id = link.split('/')[-1] if link else f"who-{uuid.uuid4().hex[:8]}"
|
| 437 |
+
|
| 438 |
+
results.append({
|
| 439 |
+
"id": f"who:{who_id}",
|
| 440 |
+
"title": title,
|
| 441 |
+
"text": description[:800] + "..." if len(description) > 800 else description,
|
| 442 |
+
"citation": f"World Health Organization. ({date}). {title}.",
|
| 443 |
+
"url": link,
|
| 444 |
+
"source_type": "WHO Guidelines",
|
| 445 |
+
"is_open_access": True # WHO guidelines are freely accessible
|
| 446 |
+
})
|
| 447 |
+
|
| 448 |
+
return results
|
| 449 |
+
return []
|
| 450 |
+
except Exception:
|
| 451 |
+
return []
|
| 452 |
+
|
| 453 |
+
def fetch_from_core_api(query, max_results=2, api_key=None):
|
| 454 |
+
"""Fetch open access research papers from CORE API"""
|
| 455 |
+
results = []
|
| 456 |
|
| 457 |
+
# Clean up the query for better results
|
| 458 |
+
cleaned_query = re.sub(r'^(hi|hello|hey|greetings|good morning|good afternoon|good evening)[,\.]?\s+', '', query.lower())
|
| 459 |
+
cleaned_query = re.sub(r"(i'?m|i am)\s+a\s+\d+[-\s]year[-\s]old", '', cleaned_query)
|
| 460 |
+
cleaned_query = re.sub(r'(my name is|i am|i have been|i\'ve been|i was|i have|i\'ve had|i feel|i\'m feeling|i experienced)', '', cleaned_query)
|
| 461 |
+
|
| 462 |
+
# Extract medical terms for better search
|
| 463 |
+
symptom_patterns = [
|
| 464 |
+
r'(muscle weakness)', r'(fatigue)', r'(rash)', r'(pain)', r'(swelling)',
|
| 465 |
+
r'(difficulty breathing|shortness of breath)', r'(fever)', r'(headache)',
|
| 466 |
+
r'(nausea|vomiting)', r'(dizziness)', r'(numbness)', r'(tingling)'
|
| 467 |
+
]
|
| 468 |
+
|
| 469 |
+
medical_terms = []
|
| 470 |
+
for pattern in symptom_patterns:
|
| 471 |
+
matches = re.findall(pattern, query.lower())
|
| 472 |
+
if matches:
|
| 473 |
+
medical_terms.extend(matches)
|
| 474 |
|
| 475 |
+
# If we found medical terms, enhance the query
|
| 476 |
+
if medical_terms:
|
| 477 |
+
search_query = cleaned_query + " " + " ".join(medical_terms)
|
| 478 |
+
else:
|
| 479 |
+
search_query = cleaned_query
|
| 480 |
+
|
| 481 |
+
# Base URL for CORE API
|
| 482 |
+
base_url = "https://core.ac.uk/api/v3/search/works"
|
| 483 |
+
|
| 484 |
+
# Search parameters with medical focus
|
| 485 |
+
search_params = {
|
| 486 |
+
"q": search_query,
|
| 487 |
+
"limit": max_results * 2, # Get more results to filter for the best ones
|
| 488 |
+
"offset": 0,
|
| 489 |
+
"fields": ["title", "abstract", "authors", "year", "downloadUrl", "sourceFulltextUrl", "doi", "fullText"]
|
| 490 |
}
|
| 491 |
|
| 492 |
+
# Headers with API key
|
| 493 |
+
headers = {
|
| 494 |
+
"Authorization": f"Bearer {api_key}" if api_key else None,
|
| 495 |
+
"Content-Type": "application/json"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 496 |
}
|
| 497 |
+
|
| 498 |
+
try:
|
| 499 |
+
response = requests.post(base_url, json=search_params, headers=headers)
|
| 500 |
+
|
| 501 |
+
if response.status_code != 200:
|
| 502 |
+
return []
|
| 503 |
+
|
| 504 |
+
data = response.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 505 |
|
| 506 |
+
if "results" in data:
|
| 507 |
+
filtered_articles = []
|
| 508 |
+
|
| 509 |
+
# First pass: Collect and score all articles
|
| 510 |
+
for article in data["results"]:
|
| 511 |
+
try:
|
| 512 |
+
# Score articles for relevance (higher is better)
|
| 513 |
+
score = 0
|
| 514 |
+
|
| 515 |
+
# Has downloadUrl or sourceFulltextUrl (direct access)
|
| 516 |
+
if article.get("downloadUrl") or article.get("sourceFulltextUrl"):
|
| 517 |
+
score += 3
|
| 518 |
+
|
| 519 |
+
# Has full text in the response
|
| 520 |
+
if article.get("fullText"):
|
| 521 |
+
score += 2
|
| 522 |
+
|
| 523 |
+
# Has abstract
|
| 524 |
+
if article.get("abstract") and len(article.get("abstract")) > 100:
|
| 525 |
+
score += 1
|
| 526 |
+
|
| 527 |
+
# Medical relevance - check title and abstract for medical terms
|
| 528 |
+
for term in medical_terms:
|
| 529 |
+
if term in (article.get("title", "") + article.get("abstract", "")).lower():
|
| 530 |
+
score += 2
|
| 531 |
+
|
| 532 |
+
# Store with score for later filtering
|
| 533 |
+
filtered_articles.append((score, article))
|
| 534 |
+
|
| 535 |
+
except Exception:
|
| 536 |
+
continue
|
| 537 |
+
|
| 538 |
+
# Sort by score (highest first) and take the top results
|
| 539 |
+
filtered_articles.sort(reverse=True, key=lambda x: x[0])
|
| 540 |
+
top_articles = [article for score, article in filtered_articles[:max_results]]
|
| 541 |
+
|
| 542 |
+
# Second pass: Process the top articles in detail
|
| 543 |
+
for article in top_articles:
|
| 544 |
+
try:
|
| 545 |
+
# Extract article information
|
| 546 |
+
title = article.get("title", "No title available")
|
| 547 |
+
abstract = article.get("abstract", "")
|
| 548 |
+
|
| 549 |
+
# Try to use full text if available, otherwise use abstract
|
| 550 |
+
full_text = article.get("fullText", "")
|
| 551 |
+
text_content = ""
|
| 552 |
+
|
| 553 |
+
if full_text:
|
| 554 |
+
# If full text is available, use a summarized version (first part)
|
| 555 |
+
text_content = f"[FULL TEXT AVAILABLE] {full_text[:1500]}..."
|
| 556 |
+
else:
|
| 557 |
+
# Use abstract if no full text
|
| 558 |
+
text_content = abstract
|
| 559 |
+
|
| 560 |
+
authors = article.get("authors", [])
|
| 561 |
+
year = article.get("year", "N/A")
|
| 562 |
+
|
| 563 |
+
# Format authors
|
| 564 |
+
author_str = ", ".join([f"{author.get('name', '')}" for author in authors[:3]])
|
| 565 |
+
if len(authors) > 3:
|
| 566 |
+
author_str += " et al."
|
| 567 |
+
|
| 568 |
+
# Get the best available URL - prioritize direct download links
|
| 569 |
+
url = ""
|
| 570 |
+
download_available = False
|
| 571 |
+
|
| 572 |
+
if article.get("downloadUrl"):
|
| 573 |
+
url = article.get("downloadUrl")
|
| 574 |
+
download_available = True
|
| 575 |
+
elif article.get("sourceFulltextUrl"):
|
| 576 |
+
url = article.get("sourceFulltextUrl")
|
| 577 |
+
download_available = True
|
| 578 |
+
elif article.get("doi"):
|
| 579 |
+
url = f"https://doi.org/{article.get('doi')}"
|
| 580 |
+
|
| 581 |
+
# Create citation
|
| 582 |
+
citation = f"{author_str}. ({year}). {title}."
|
| 583 |
+
if article.get("doi"):
|
| 584 |
+
citation += f" DOI: {article['doi']}"
|
| 585 |
+
|
| 586 |
+
# Generate a unique ID
|
| 587 |
+
core_id = article.get("id", str(uuid.uuid4()))
|
| 588 |
+
|
| 589 |
+
# Create source type with clarity about data availability
|
| 590 |
+
source_type = "CORE Open Access"
|
| 591 |
+
if download_available:
|
| 592 |
+
source_type += " (Full Text Available)"
|
| 593 |
+
elif full_text:
|
| 594 |
+
source_type += " (Full Text Excerpt Included)"
|
| 595 |
+
else:
|
| 596 |
+
source_type += " (Abstract Only)"
|
| 597 |
+
|
| 598 |
+
results.append({
|
| 599 |
+
"id": f"core:{core_id}",
|
| 600 |
+
"title": title,
|
| 601 |
+
"text": text_content[:800] + "..." if len(text_content) > 800 else text_content,
|
| 602 |
+
"citation": citation,
|
| 603 |
+
"url": url,
|
| 604 |
+
"source_type": source_type,
|
| 605 |
+
"is_open_access": True # All CORE articles are open access
|
| 606 |
+
})
|
| 607 |
+
except Exception:
|
| 608 |
+
continue
|
| 609 |
+
|
| 610 |
+
return results
|
| 611 |
+
except Exception:
|
| 612 |
+
return []
|
| 613 |
+
|
| 614 |
+
# Enhanced RAG System with real medical sources
|
| 615 |
+
def fetch_medical_evidence(query, max_results=5):
|
| 616 |
+
"""Fetch medical evidence from multiple sources using real APIs"""
|
| 617 |
+
results = []
|
| 618 |
+
|
| 619 |
+
# Define API keys
|
| 620 |
+
pubmed_api_key = os.environ.get("PUBMED_API_KEY")
|
| 621 |
+
core_api_key = os.environ.get("CORE_API_KEY")
|
| 622 |
+
|
| 623 |
+
# Source 1: PubMed API - prioritize for relevant medical research
|
| 624 |
+
pubmed_results = fetch_from_pubmed_api(query, max_results=max(2, max_results//2), api_key=pubmed_api_key)
|
| 625 |
+
if pubmed_results:
|
| 626 |
+
results.extend(pubmed_results)
|
| 627 |
+
|
| 628 |
+
# Source 2: PubMed Central - free full text articles
|
| 629 |
+
if len(results) < max_results:
|
| 630 |
+
remaining = max_results - len(results)
|
| 631 |
+
pmc_results = fetch_from_pmc_api(query, max_results=remaining, api_key=pubmed_api_key)
|
| 632 |
+
if pmc_results:
|
| 633 |
+
results.extend(pmc_results)
|
| 634 |
+
|
| 635 |
+
# Source 3: CORE API - open access research papers
|
| 636 |
+
if len(results) < max_results:
|
| 637 |
+
remaining = max_results - len(results)
|
| 638 |
+
core_results = fetch_from_core_api(query, max_results=remaining, api_key=core_api_key)
|
| 639 |
+
if core_results:
|
| 640 |
+
results.extend(core_results)
|
| 641 |
+
|
| 642 |
+
# Source 4: WHO Guidelines - if still need more results
|
| 643 |
+
if len(results) < max_results:
|
| 644 |
+
remaining = max_results - len(results)
|
| 645 |
+
who_results = fetch_from_who_api(query, max_results=remaining)
|
| 646 |
+
if who_results:
|
| 647 |
+
results.extend(who_results)
|
| 648 |
+
|
| 649 |
+
# Prioritize sources with full text for better diagnosis
|
| 650 |
+
results.sort(key=lambda x: (
|
| 651 |
+
"Full Text" in x.get("source_type", ""),
|
| 652 |
+
"CORE" in x.get("source_type", ""),
|
| 653 |
+
"PMC" in x.get("source_type", ""),
|
| 654 |
+
"PubMed" in x.get("source_type", "")
|
| 655 |
+
), reverse=True)
|
| 656 |
+
|
| 657 |
+
return results[:max_results] # Limit to requested number after sorting
|
| 658 |
+
|
| 659 |
+
# Function to parse doctor agent responses
|
| 660 |
+
def parse_doctor_response(response_text):
|
| 661 |
+
"""Parse the doctor agent's response into structured components"""
|
| 662 |
+
# Initialize structure
|
| 663 |
+
parsed = {
|
| 664 |
+
"main_response": response_text,
|
| 665 |
+
"diagnosis": "",
|
| 666 |
+
"treatment": "",
|
| 667 |
+
"reasoning": [],
|
| 668 |
+
"sources": []
|
| 669 |
+
}
|
| 670 |
+
|
| 671 |
+
# Try to extract diagnosis
|
| 672 |
+
diagnosis_match = re.search(r'(?i)diagnosis:?\s*(.*?)(?:\n\n|\n[A-Z]|\Z)', response_text, re.DOTALL)
|
| 673 |
+
if diagnosis_match:
|
| 674 |
+
parsed["diagnosis"] = diagnosis_match.group(1).strip()
|
| 675 |
+
|
| 676 |
+
# Try to extract treatment/recommendations
|
| 677 |
+
treatment_match = re.search(r'(?i)(treatment|recommendations|plan):?\s*(.*?)(?:\n\n|\n[A-Z]|\Z)', response_text, re.DOTALL)
|
| 678 |
+
if treatment_match:
|
| 679 |
+
parsed["treatment"] = treatment_match.group(2).strip()
|
| 680 |
+
|
| 681 |
+
# Try to extract reasoning if present
|
| 682 |
+
reasoning_match = re.search(r'(?i)reasoning:?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
|
| 683 |
+
if reasoning_match:
|
| 684 |
+
reasoning_text = reasoning_match.group(1).strip()
|
| 685 |
+
# Split into bullet points if present
|
| 686 |
+
if '\n-' in reasoning_text:
|
| 687 |
+
parsed["reasoning"] = [item.strip() for item in reasoning_text.split('\n-') if item.strip()]
|
| 688 |
+
# Clean up first item which might not have a dash
|
| 689 |
+
if parsed["reasoning"]:
|
| 690 |
+
parsed["reasoning"][0] = parsed["reasoning"][0].lstrip('- ')
|
| 691 |
+
else:
|
| 692 |
+
parsed["reasoning"] = [reasoning_text]
|
| 693 |
|
| 694 |
+
# Extract sources/references
|
| 695 |
+
sources_match = re.search(r'(?i)(sources|references):?\s*(.*?)(?:\n\n\Z|\Z)', response_text, re.DOTALL)
|
| 696 |
+
if sources_match:
|
| 697 |
+
sources_text = sources_match.group(2).strip()
|
| 698 |
+
# Split into individual sources
|
| 699 |
+
if '\n' in sources_text:
|
| 700 |
+
parsed["sources"] = [item.strip() for item in sources_text.split('\n') if item.strip()]
|
| 701 |
+
else:
|
| 702 |
+
parsed["sources"] = [sources_text]
|
| 703 |
+
|
| 704 |
+
# Extract citations in the text (format: [source_id])
|
| 705 |
+
citation_matches = re.findall(r'\[([\w\d:]+)\]', response_text)
|
| 706 |
+
for citation in citation_matches:
|
| 707 |
+
if citation not in parsed["sources"]:
|
| 708 |
+
parsed["sources"].append(citation)
|
| 709 |
+
|
| 710 |
+
return parsed
|
| 711 |
+
|
| 712 |
+
# Enhanced Doctor Agent call with structured output
|
| 713 |
+
def doctor_agent(messages):
|
| 714 |
+
"""Call the LLM to get a structured response using OpenAI API v0.28.1"""
|
| 715 |
+
try:
|
| 716 |
+
response = openai.ChatCompletion.create(
|
| 717 |
+
model="gpt-4o-mini",
|
| 718 |
+
messages=messages,
|
| 719 |
+
temperature=0.3
|
| 720 |
+
)
|
| 721 |
+
return response.choices[0].message['content']
|
| 722 |
+
except Exception as e:
|
| 723 |
+
return f"I'm sorry, there was an error processing your request. Please try again. Error: {str(e)}"
|
| 724 |
+
|
| 725 |
+
# Single orchestrator turn with enhanced reasoning and citation tracking
|
| 726 |
+
def orchestrator_chat(history, query, use_rag, is_follow_up=False):
|
| 727 |
+
"""Handle a single turn of conversation with the doctor agent"""
|
| 728 |
+
# Select appropriate system prompt based on whether this is a follow-up
|
| 729 |
+
if is_follow_up:
|
| 730 |
+
system = {"role": "system", "content": FOLLOW_UP_PROMPT}
|
| 731 |
else:
|
| 732 |
+
system = {"role": "system", "content": SYSTEM_PROMPT}
|
| 733 |
+
|
| 734 |
+
msgs = [system] + history
|
| 735 |
+
|
| 736 |
+
# Evidence gathering
|
| 737 |
+
evidence_snippets = []
|
| 738 |
+
if use_rag:
|
| 739 |
+
# Only fetch and format evidence if RAG is enabled
|
| 740 |
+
evidence_snippets = fetch_medical_evidence(query)
|
| 741 |
+
|
| 742 |
+
# Format evidence for the model
|
| 743 |
+
if evidence_snippets:
|
| 744 |
+
evidence_text = "MEDICAL EVIDENCE FROM AUTHORITATIVE SOURCES:\n\n"
|
| 745 |
+
|
| 746 |
+
for i, snippet in enumerate(evidence_snippets):
|
| 747 |
+
evidence_text += f"[{snippet['id']}] {snippet['title']}\n"
|
| 748 |
+
evidence_text += f"Source: {snippet['source_type']}\n"
|
| 749 |
+
evidence_text += f"Content: {snippet['text']}\n"
|
| 750 |
+
evidence_text += f"Citation: {snippet['citation']}\n"
|
| 751 |
+
evidence_text += f"URL: {snippet['url']}\n\n"
|
| 752 |
+
|
| 753 |
+
# Enhanced instructions for better source utilization
|
| 754 |
+
evidence_text += """CITATION INSTRUCTIONS:
|
| 755 |
+
1. When referencing these sources in your response, use the format [source_id] to cite them.
|
| 756 |
+
2. Prioritize information from sources marked with "Full Text Available" as they provide more comprehensive data.
|
| 757 |
+
3. CORE API sources provide open access full text articles that are particularly valuable for diagnosis.
|
| 758 |
+
4. Use the most relevant medical evidence to support your diagnostic reasoning.
|
| 759 |
+
5. Try to cite multiple sources to provide a well-rounded assessment.
|
| 760 |
+
"""
|
| 761 |
+
|
| 762 |
+
msgs.append({"role": "system", "content": evidence_text})
|
| 763 |
else:
|
| 764 |
+
# If no evidence was found, inform the model
|
| 765 |
+
no_evidence_msg = ("Note: No specific medical evidence was found in our databases for this query. "
|
| 766 |
+
"Please rely on your general medical knowledge and be sure to recommend "
|
| 767 |
+
"appropriate diagnostic steps and medical consultation.")
|
| 768 |
+
msgs.append({"role": "system", "content": no_evidence_msg})
|
| 769 |
+
|
| 770 |
+
# Add instructions for structured output
|
| 771 |
+
if use_rag:
|
| 772 |
+
output_instructions = """
|
| 773 |
+
Please structure your response clearly.
|
| 774 |
+
|
| 775 |
+
**Priority 1: Ask Clarifying Questions**
|
| 776 |
+
If the user's query lacks detail for a proper assessment (e.g., age, specific symptoms, medical history, duration, severity), your HIGHEST priority is to ask these questions first. Do not provide a diagnosis or plan until sufficient information is gathered.
|
| 777 |
+
|
| 778 |
+
**Priority 2: Main Response (After Clarification)**
|
| 779 |
+
Once sufficient information is available (either initially or after asking questions), provide:
|
| 780 |
+
1. A direct answer to the patient's concerns.
|
| 781 |
+
2. If appropriate, a clear diagnosis or differential diagnosis.
|
| 782 |
+
3. Recommendations for a treatment plan or next steps.
|
| 783 |
+
4. Ensure you cite medical evidence using the [source_id] format for any claims or information taken from the provided MEDICAL EVIDENCE snippets.
|
| 784 |
+
|
| 785 |
+
**After your main response, ALWAYS include these sections:**
|
| 786 |
+
- **Reasoning**: Bullet points detailing your clinical reasoning.
|
| 787 |
+
- **Sources**: A list of all references cited in your main response, using their full titles and corresponding URLs if they were linked (e.g., [Title of Source](URL)). If a source was just an ID without a direct link in the text, list its ID or citation.
|
| 788 |
+
"""
|
| 789 |
+
else:
|
| 790 |
+
# Different instructions when RAG is disabled - no mention of sources or citations
|
| 791 |
+
output_instructions = """
|
| 792 |
+
Please structure your response clearly.
|
| 793 |
+
|
| 794 |
+
**Priority 1: Ask Clarifying Questions**
|
| 795 |
+
If the user's query lacks detail for a proper assessment (e.g., age, specific symptoms, medical history, duration, severity), your HIGHEST priority is to ask these questions first. Do not provide a diagnosis or plan until sufficient information is gathered.
|
| 796 |
+
|
| 797 |
+
**Priority 2: Main Response (After Clarification)**
|
| 798 |
+
Once sufficient information is available (either initially or after asking questions), provide:
|
| 799 |
+
1. A direct answer to the patient's concerns.
|
| 800 |
+
2. If appropriate, a clear diagnosis or differential diagnosis.
|
| 801 |
+
3. Recommendations for a treatment plan or next steps.
|
| 802 |
+
|
| 803 |
+
**After your main response, ALWAYS include this section:**
|
| 804 |
+
- **Reasoning**: Bullet points detailing your clinical reasoning.
|
| 805 |
+
|
| 806 |
+
IMPORTANT: Since database search is disabled, do not include citations or sources in your response.
|
| 807 |
+
"""
|
| 808 |
+
|
| 809 |
+
msgs.append({"role": "system", "content": output_instructions})
|
| 810 |
+
msgs.append({"role": "user", "content": query})
|
| 811 |
+
|
| 812 |
+
# Get response from doctor agent
|
| 813 |
+
response = doctor_agent(msgs)
|
| 814 |
+
|
| 815 |
+
# Process the response based on whether RAG is enabled
|
| 816 |
+
if use_rag:
|
| 817 |
+
# Process the response to replace source placeholders with actual links
|
| 818 |
+
linked_response, source_map = extract_and_link_sources(response, evidence_snippets)
|
| 819 |
|
| 820 |
+
# Parse the response
|
| 821 |
+
parsed_response = parse_doctor_response(linked_response)
|
| 822 |
+
|
| 823 |
+
# Enhance source information with evidence snippets data
|
| 824 |
+
enhanced_sources = []
|
| 825 |
+
# Use the source_map from extract_and_link_sources as the primary guide for cited sources
|
| 826 |
+
for source_id_key, mapped_data in source_map.items():
|
| 827 |
+
enhanced_sources.append({
|
| 828 |
+
"id": mapped_data["id"], # This is the original ID from the snippet
|
| 829 |
+
"title": mapped_data["title"],
|
| 830 |
+
"citation": mapped_data["citation"],
|
| 831 |
+
"url": mapped_data["url"],
|
| 832 |
+
"source_type": "Referenced Source" # Or derive from snippet if available
|
| 833 |
+
})
|
| 834 |
+
|
| 835 |
+
# Get source types and open access status from original snippets
|
| 836 |
+
for es in enhanced_sources:
|
| 837 |
+
for snippet in evidence_snippets:
|
| 838 |
+
if es["id"] == snippet["id"]:
|
| 839 |
+
es["source_type"] = snippet.get("source_type", "Referenced Source")
|
| 840 |
+
es["is_open_access"] = snippet.get("is_open_access", False)
|
| 841 |
+
break
|
| 842 |
|
| 843 |
+
# If there are sources in parsed_response["sources"] that are not in source_map
|
| 844 |
+
# (e.g., LLM hallucinated an ID or cited something not in snippets), add them.
|
| 845 |
+
current_enhanced_ids = {es['id'] for es in enhanced_sources}
|
| 846 |
|
| 847 |
+
for source_text in parsed_response["sources"]: # source_text could be "[id]", "title (url)", or just "citation"
|
| 848 |
+
source_id_candidate = source_text.strip("[]") # Basic extraction
|
| 849 |
+
|
| 850 |
+
# Check if this source_id_candidate was part of the original evidence
|
| 851 |
+
found_in_evidence = False
|
| 852 |
+
for snippet in evidence_snippets:
|
| 853 |
+
if source_id_candidate == snippet["id"]:
|
| 854 |
+
if source_id_candidate not in current_enhanced_ids:
|
| 855 |
+
enhanced_sources.append({
|
| 856 |
+
"id": snippet["id"],
|
| 857 |
+
"title": snippet["title"],
|
| 858 |
+
"citation": snippet["citation"],
|
| 859 |
+
"url": snippet["url"],
|
| 860 |
+
"source_type": snippet["source_type"],
|
| 861 |
+
"is_open_access": snippet.get("is_open_access", False)
|
| 862 |
+
})
|
| 863 |
+
current_enhanced_ids.add(snippet["id"]) # Add to set to avoid re-adding
|
| 864 |
+
found_in_evidence = True
|
| 865 |
+
break
|
| 866 |
+
|
| 867 |
+
if not found_in_evidence:
|
| 868 |
+
# If it's not in source_map and not directly in evidence_snippets by a simple ID match,
|
| 869 |
+
# it might be a raw citation or a URL. Add it with available info.
|
| 870 |
+
is_duplicate = False
|
| 871 |
+
for es_item in enhanced_sources:
|
| 872 |
+
if es_item["title"] == source_text or es_item["url"] == source_text or es_item["citation"] == source_text:
|
| 873 |
+
is_duplicate = True
|
| 874 |
+
break
|
| 875 |
+
if not is_duplicate and source_text not in current_enhanced_ids:
|
| 876 |
+
# Try to extract a URL if present in markdown format
|
| 877 |
+
url_match = re.search(r'\[(.*?)\]\((https?://[^)]+)\)', source_text)
|
| 878 |
+
if url_match:
|
| 879 |
+
title = url_match.group(1)
|
| 880 |
+
url = url_match.group(2)
|
| 881 |
+
else:
|
| 882 |
+
title = source_text # Could be a citation string or a plain title
|
| 883 |
+
url = "" # No URL found directly
|
| 884 |
+
|
| 885 |
+
enhanced_sources.append({
|
| 886 |
+
"id": source_id_candidate, # Use the candidate, might be a simple title or part of citation
|
| 887 |
+
"title": title,
|
| 888 |
+
"citation": source_text, # The original text from LLM's source list
|
| 889 |
+
"url": url,
|
| 890 |
+
"source_type": "Referenced Source (uncategorized)"
|
| 891 |
+
})
|
| 892 |
+
current_enhanced_ids.add(source_id_candidate)
|
| 893 |
+
|
| 894 |
+
# Add the enhanced sources back to the parsed response
|
| 895 |
+
parsed_response["enhanced_sources"] = enhanced_sources
|
| 896 |
+
main_response = linked_response
|
| 897 |
+
else:
|
| 898 |
+
# If RAG is disabled, just parse the response without source processing
|
| 899 |
+
parsed_response = parse_doctor_response(response)
|
| 900 |
+
parsed_response["enhanced_sources"] = []
|
| 901 |
+
main_response = response
|
| 902 |
+
|
| 903 |
+
# Create detailed explanation with reasoning and sources
|
| 904 |
+
explanation = []
|
| 905 |
+
|
| 906 |
+
# Add reasoning section
|
| 907 |
+
if parsed_response["reasoning"]:
|
| 908 |
+
explanation.append("## REASONING")
|
| 909 |
+
for i, reason in enumerate(parsed_response["reasoning"]):
|
| 910 |
+
explanation.append(f"{i+1}. {reason}")
|
| 911 |
+
explanation.append("")
|
| 912 |
+
|
| 913 |
+
# Only add sources section if RAG is enabled
|
| 914 |
+
if use_rag and parsed_response["enhanced_sources"]:
|
| 915 |
+
explanation.append("## SOURCES USED")
|
| 916 |
+
|
| 917 |
+
# Add enhanced sources first (these are the ones actually cited in the response)
|
| 918 |
+
source_added_count = 0
|
| 919 |
+
|
| 920 |
+
unique_sources_for_display = {} # id: {title, url, citation, source_type}
|
| 921 |
+
for source in parsed_response["enhanced_sources"]:
|
| 922 |
+
# Prefer using the mapped title and URL from extract_and_link_sources if available
|
| 923 |
+
display_id = source.get('id', source.get('title', 'Unknown Source'))
|
| 924 |
+
|
| 925 |
+
if display_id not in unique_sources_for_display:
|
| 926 |
+
unique_sources_for_display[display_id] = {
|
| 927 |
+
"title": source.get('title', 'N/A'),
|
| 928 |
+
"url": source.get('url', ''),
|
| 929 |
+
"citation": source.get('citation', ''),
|
| 930 |
+
"source_type": source.get('source_type', 'Referenced Source'),
|
| 931 |
+
"is_open_access": source.get('is_open_access', False)
|
| 932 |
+
}
|
| 933 |
+
|
| 934 |
+
# Create a categorized display of sources
|
| 935 |
+
source_categories = {
|
| 936 |
+
"CORE": [], # CORE API full text
|
| 937 |
+
"PMC": [], # PubMed Central full text
|
| 938 |
+
"PubMed": [], # PubMed abstracts
|
| 939 |
+
"WHO": [], # WHO guidelines
|
| 940 |
+
"Other": [] # Uncategorized
|
| 941 |
+
}
|
| 942 |
|
| 943 |
+
# Categorize sources
|
| 944 |
+
for key, src_data in unique_sources_for_display.items():
|
| 945 |
+
source_type = src_data['source_type']
|
| 946 |
+
|
| 947 |
+
if "CORE" in source_type:
|
| 948 |
+
source_categories["CORE"].append((key, src_data))
|
| 949 |
+
elif "PMC" in source_type:
|
| 950 |
+
source_categories["PMC"].append((key, src_data))
|
| 951 |
+
elif "PubMed" in source_type:
|
| 952 |
+
source_categories["PubMed"].append((key, src_data))
|
| 953 |
+
elif "WHO" in source_type:
|
| 954 |
+
source_categories["WHO"].append((key, src_data))
|
| 955 |
+
else:
|
| 956 |
+
source_categories["Other"].append((key, src_data))
|
| 957 |
+
|
| 958 |
+
# Display sources by category
|
| 959 |
+
for category, sources in source_categories.items():
|
| 960 |
+
if sources:
|
| 961 |
+
if category != "Other": # Skip category header for Other
|
| 962 |
+
explanation.append(f"### {category} Sources:")
|
| 963 |
|
| 964 |
+
for key, src_data in sources:
|
| 965 |
+
title = src_data['title']
|
| 966 |
+
url = src_data['url']
|
| 967 |
+
is_open_access = src_data.get('is_open_access', False)
|
| 968 |
+
|
| 969 |
+
if url: # If URL exists, make it a markdown link
|
| 970 |
+
explanation.append(f"- [{title}]({url}) {' π' if is_open_access else ''}")
|
| 971 |
+
else: # Otherwise, just list the title or ID
|
| 972 |
+
explanation.append(f"- {title}")
|
| 973 |
+
|
| 974 |
+
if src_data['source_type']:
|
| 975 |
+
explanation.append(f" Source Type: {src_data['source_type']}")
|
| 976 |
+
if src_data['citation']: # Always show citation if available
|
| 977 |
+
explanation.append(f" Citation: {src_data['citation']}")
|
| 978 |
+
explanation.append("") # Add a blank line for spacing
|
| 979 |
+
source_added_count += 1
|
| 980 |
+
|
| 981 |
+
if source_added_count == 0 and parsed_response["sources"]: # Fallback to raw sources if enhanced list is empty but LLM listed some
|
| 982 |
+
explanation.append("## SOURCES MENTIONED (Raw)") # Indicate these are less processed
|
| 983 |
+
for source_text in parsed_response["sources"]:
|
| 984 |
+
explanation.append(f"- {source_text.strip()}")
|
| 985 |
+
explanation.append("")
|
| 986 |
+
source_added_count +=1
|
| 987 |
+
|
| 988 |
+
# If we still have no sources, remove the header
|
| 989 |
+
if source_added_count == 0: # Check if any sources were actually added to explanation
|
| 990 |
+
# Remove "## SOURCES USED" header if it was added but no sources followed
|
| 991 |
+
if explanation and explanation[-1] == "## SOURCES USED":
|
| 992 |
+
explanation.pop()
|
| 993 |
+
|
| 994 |
+
# Enhanced version to display clickable article links
|
| 995 |
+
# Check if we have evidence snippets but no sources in the explanation
|
| 996 |
+
if evidence_snippets and "## SOURCES USED" not in "\n".join(explanation):
|
| 997 |
+
# If AI didn't explicitly cite sources, show available evidence anyway
|
| 998 |
+
additional_explanation = ["\n## AVAILABLE MEDICAL SOURCES"]
|
| 999 |
+
|
| 1000 |
+
# Create categorized display of all available sources
|
| 1001 |
+
categorized_snippets = {
|
| 1002 |
+
"CORE Open Access": [], # CORE API full text
|
| 1003 |
+
"PubMed Central": [], # PMC full text
|
| 1004 |
+
"PubMed": [], # PubMed abstracts
|
| 1005 |
+
"WHO Guidelines": [], # WHO guidelines
|
| 1006 |
+
"Other": [] # Uncategorized
|
| 1007 |
+
}
|
| 1008 |
+
|
| 1009 |
+
# Categorize snippets
|
| 1010 |
+
for snippet in evidence_snippets:
|
| 1011 |
+
source_type = snippet.get("source_type", "")
|
| 1012 |
+
|
| 1013 |
+
if "CORE" in source_type:
|
| 1014 |
+
categorized_snippets["CORE Open Access"].append(snippet)
|
| 1015 |
+
elif "PMC" in source_type:
|
| 1016 |
+
categorized_snippets["PubMed Central"].append(snippet)
|
| 1017 |
+
elif "PubMed" in source_type and "PMC" not in source_type:
|
| 1018 |
+
categorized_snippets["PubMed"].append(snippet)
|
| 1019 |
+
elif "WHO" in source_type:
|
| 1020 |
+
categorized_snippets["WHO Guidelines"].append(snippet)
|
| 1021 |
+
else:
|
| 1022 |
+
categorized_snippets["Other"].append(snippet)
|
| 1023 |
+
|
| 1024 |
+
# Display snippets by category
|
| 1025 |
+
for category, snippets in categorized_snippets.items():
|
| 1026 |
+
if snippets:
|
| 1027 |
+
if category != "Other": # Skip category header for Other
|
| 1028 |
+
additional_explanation.append(f"### {category}:")
|
| 1029 |
+
|
| 1030 |
+
for snippet in snippets:
|
| 1031 |
+
title = snippet.get("title", "Unknown Title")
|
| 1032 |
+
url = snippet.get("url", "")
|
| 1033 |
+
source_type = snippet.get("source_type", "Medical Source")
|
| 1034 |
+
is_open_access = snippet.get("is_open_access", False)
|
| 1035 |
+
|
| 1036 |
+
if url:
|
| 1037 |
+
# Format as clickable markdown link with open access indicator
|
| 1038 |
+
additional_explanation.append(f"- [{title}]({url}) {' π' if is_open_access else ''}")
|
| 1039 |
+
else:
|
| 1040 |
+
additional_explanation.append(f"- {title} {' π' if is_open_access else ''}")
|
| 1041 |
+
|
| 1042 |
+
if "source_type" in snippet:
|
| 1043 |
+
additional_explanation.append(f" Source Type: {snippet['source_type']}")
|
| 1044 |
+
if "citation" in snippet:
|
| 1045 |
+
additional_explanation.append(f" Citation: {snippet['citation']}")
|
| 1046 |
+
additional_explanation.append("")
|
| 1047 |
+
|
| 1048 |
+
# Add to the main explanation
|
| 1049 |
+
explanation.extend(additional_explanation)
|
| 1050 |
+
|
| 1051 |
+
# Add a note about data availability
|
| 1052 |
+
data_availability_note = [
|
| 1053 |
+
"\n## DATA AVAILABILITY NOTE",
|
| 1054 |
+
"- PubMed sources typically provide abstracts only, unless marked as free full text",
|
| 1055 |
+
"- PubMed Central (PMC) sources provide complete free full text articles",
|
| 1056 |
+
"- CORE Open Access sources provide full text content from research repositories",
|
| 1057 |
+
"- WHO Guidelines provide official medical recommendations and protocols",
|
| 1058 |
+
"- Sources marked with π indicate open access content with full text available"
|
| 1059 |
+
]
|
| 1060 |
+
explanation.extend(data_availability_note)
|
| 1061 |
+
|
| 1062 |
+
# Format explanation as string
|
| 1063 |
+
explanation_text = "\n".join(explanation)
|
| 1064 |
+
|
| 1065 |
+
# Update conversation history
|
| 1066 |
+
history.append({"role": "user", "content": query})
|
| 1067 |
+
history.append({"role": "assistant", "content": main_response})
|
| 1068 |
+
|
| 1069 |
+
return main_response, explanation_text, evidence_snippets
|
| 1070 |
+
|
| 1071 |
+
# Enhanced interactive loop with better handling of consultations
|
| 1072 |
+
def run_consultation(use_rag=True):
|
| 1073 |
+
"""Run an interactive medical consultation"""
|
| 1074 |
+
history = []
|
| 1075 |
+
print("\n===== MEDICAL AI ASSISTANT =====")
|
| 1076 |
+
print("Type 'exit' to end or 'next' for a new case.\n")
|
| 1077 |
+
|
| 1078 |
+
if use_rag:
|
| 1079 |
+
print("Using medical evidence from: PubMed, PMC, CORE, and WHO")
|
| 1080 |
+
print("Sources marked with π provide full text access\n")
|
| 1081 |
+
|
| 1082 |
+
consultation_id = str(uuid.uuid4())[:8]
|
| 1083 |
+
print(f"Consultation ID: {consultation_id}")
|
| 1084 |
+
|
| 1085 |
+
query = input("\nYou: ")
|
| 1086 |
+
while query.lower() != "exit":
|
| 1087 |
+
# Track if this is a follow-up question
|
| 1088 |
+
is_follow_up = len(history) > 0
|
| 1089 |
+
|
| 1090 |
+
# Inform user that evidence is being fetched if RAG is enabled
|
| 1091 |
+
if use_rag:
|
| 1092 |
+
print("\nSearching medical databases...")
|
| 1093 |
+
|
| 1094 |
+
# Process query
|
| 1095 |
+
reply, explanation, evidence = orchestrator_chat(history, query, use_rag, is_follow_up)
|
| 1096 |
+
|
| 1097 |
+
# Display the AI response
|
| 1098 |
+
print("\n" + "=" * 30)
|
| 1099 |
+
print("AI RESPONSE")
|
| 1100 |
+
print("=" * 30)
|
| 1101 |
+
print(reply)
|
| 1102 |
+
|
| 1103 |
+
# Always show explanation/reasoning
|
| 1104 |
+
print("\n" + "=" * 30)
|
| 1105 |
+
print("DETAILED EXPLANATION")
|
| 1106 |
+
print("=" * 30)
|
| 1107 |
+
# Ensure explanation is not empty before printing, or print a default message
|
| 1108 |
+
if explanation and explanation.strip() and explanation.strip() != "="*50:
|
| 1109 |
+
print(explanation)
|
| 1110 |
+
else:
|
| 1111 |
+
print("No detailed explanation or sources were generated for this response.")
|
| 1112 |
+
|
| 1113 |
+
# Add Open Access Legend if evidence sources were found
|
| 1114 |
+
if evidence:
|
| 1115 |
+
print("\nLEGEND: π = Open Access (full text available)")
|
| 1116 |
+
|
| 1117 |
+
# Check if we need to continue with follow-up or start a new case
|
| 1118 |
+
next_action = input("\nFollow-up? (or 'next' for new case, 'exit' to end): ")
|
| 1119 |
+
|
| 1120 |
+
if next_action.lower() == "exit":
|
| 1121 |
+
break
|
| 1122 |
+
elif next_action.lower() == "next":
|
| 1123 |
+
# Start a new consultation
|
| 1124 |
+
history = []
|
| 1125 |
+
consultation_id = str(uuid.uuid4())[:8]
|
| 1126 |
+
print(f"\nNew Consultation ID: {consultation_id}")
|
| 1127 |
+
query = input("\nYou: ")
|
| 1128 |
+
else:
|
| 1129 |
+
# Continue with follow-up
|
| 1130 |
+
query = next_action
|
| 1131 |
+
|
| 1132 |
+
print("\nConsultation ended.")
|
| 1133 |
+
|
| 1134 |
+
# Save consultation to file
|
| 1135 |
+
def save_consultation(history, consultation_id):
|
| 1136 |
+
"""Save the consultation history to a file"""
|
| 1137 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 1138 |
+
filename = f"consultation_{consultation_id}_{timestamp}.json"
|
| 1139 |
+
|
| 1140 |
+
with open(filename, 'w') as f:
|
| 1141 |
+
json.dump(history, f, indent=2)
|
| 1142 |
+
|
| 1143 |
+
print(f"Consultation saved to {filename}")
|
| 1144 |
+
|
| 1145 |
+
# Main entry point
|
| 1146 |
+
if __name__ == "__main__":
|
| 1147 |
+
print("\nInitializing Medical AI Assistant...")
|
| 1148 |
+
run_consultation(use_rag=True)
|