Spaces:
Sleeping
Sleeping
File size: 111,776 Bytes
6995e19 9760410 6995e19 72e921c 6995e19 e8251f8 1fb4f83 e8251f8 1fb4f83 e8251f8 6995e19 e8251f8 6995e19 1fb4f83 6995e19 9760410 6995e19 9760410 6995e19 9760410 7b063e5 9760410 7b063e5 9760410 7b063e5 9760410 6995e19 9760410 7b063e5 9760410 7b063e5 9760410 7b063e5 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 72e921c 1fb4f83 72e921c 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 9760410 6995e19 1367957 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175 2176 2177 2178 2179 2180 2181 2182 2183 2184 2185 2186 2187 2188 2189 2190 2191 2192 2193 2194 2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589 2590 2591 2592 2593 2594 2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 |
"""
rag_engine.py - Production-Ready Medical RAG Engine
Updated with role-based response handling and improved simple query detection
"""
from typing import List, Dict, Any, Optional, Tuple
import re
import json
import time
import random
from datetime import datetime
from collections import Counter
import statistics
# Medical paper templates for different domains
PAPER_TEMPLATES = {
'infectious_disease': [
"Comparison of {drug1} vs {drug2} for {condition}: A randomized controlled trial",
"Clinical outcomes of {treatment} in patients with {condition}: A multicenter study",
"Risk factors and management of {condition} in the ICU setting",
"Antimicrobial resistance patterns in {condition}: A retrospective analysis",
"Efficacy and safety of {treatment} for {condition}: A systematic review and meta-analysis",
"Diagnostic approaches for {condition}: Current evidence and guidelines",
"Prevention strategies for {condition} in hospitalized patients",
"Economic impact of {treatment} for {condition} in diverse healthcare settings"
],
'cardiology': [
"Long-term outcomes of {intervention} in {condition}: The {acronym} trial",
"Novel biomarkers for predicting {outcome} in {condition}",
"Comparison of invasive vs conservative strategies for {condition}",
"Role of {medication} in secondary prevention of {condition}",
"Advances in imaging techniques for {condition} diagnosis",
"Genetic predictors of treatment response in {condition}",
"Quality of life outcomes following {procedure} for {condition}"
],
'endocrinology': [
"Real-world effectiveness of {drug_class} in {condition} management",
"Impact of {lifestyle_intervention} on glycemic control in {condition}",
"Novel insulin delivery systems for {condition}: Patient-reported outcomes",
"Thyroid dysfunction in patients with {comorbidity}: Screening and management",
"Bone health in patients receiving {treatment} for {condition}",
"Hormonal therapies for {condition}: Comparative effectiveness analysis"
],
'neurology': [
"Early diagnosis and intervention in {condition}: Impact on long-term outcomes",
"Neuroimaging biomarkers for {condition} progression",
"Novel therapeutic targets in {condition} pathogenesis",
"Cognitive rehabilitation strategies for {condition}: A randomized trial",
"Genetic and environmental risk factors for {condition}",
"Quality of life measures in {condition} clinical trials"
],
'oncology': [
"Biomarker-driven therapy for {cancer_type}: Current status and future directions",
"Immunotherapy combinations in {cancer_type}: Efficacy and toxicity profiles",
"Liquid biopsy applications in {cancer_type} management",
"Supportive care interventions for {cancer_type} treatment side effects",
"Cost-effectiveness of targeted therapies in {cancer_type}",
"Survivorship issues in {cancer_type}: Long-term follow-up data"
],
'internal_medicine': [
"Management of {condition} in elderly patients with multiple comorbidities",
"Diagnostic uncertainty in {condition}: A clinical decision-making framework",
"Transition of care for patients with {condition}: Best practices",
"Polypharmacy management in patients with {condition}",
"Telemedicine applications for {condition} follow-up",
"Patient education strategies for {condition} self-management"
]
}
# Medical terms for paper generation (now used only as fallback)
MEDICAL_TERMS = {
'drugs': ['amoxicillin-clavulanate', 'azithromycin', 'ceftriaxone', 'doxycycline', 'levofloxacin',
'meropenem', 'vancomycin', 'piperacillin-tazobactam', 'linezolid', 'metronidazole'],
'conditions': ['community-acquired pneumonia', 'hospital-acquired pneumonia', 'sepsis', 'urinary tract infection',
'skin and soft tissue infection', 'intra-abdominal infection', 'meningitis', 'endocarditis'],
'treatments': ['antibiotic therapy', 'source control', 'resuscitation', 'ventilator management',
'infection prevention', 'antimicrobial stewardship'],
'outcomes': ['clinical cure', 'mortality', 'length of stay', 'readmission', 'antibiotic resistance',
'adverse events', 'cost-effectiveness', 'quality of life']
}
# ============================================================================
# GUIDELINE DETECTION SYSTEM
# ============================================================================
class GuidelineDetector:
"""Detect explicit guideline citations in medical papers"""
# Comprehensive guideline databases by domain
GUIDELINE_DATABASES = {
# Diabetes/Endocrinology
'endocrinology': {
'IDF': ['IDF', 'International Diabetes Federation', 'International Diabetes Federation guidelines'],
'ADA': ['ADA', 'American Diabetes Association', 'American Diabetes Association guidelines',
'ADA/EASD', 'ADA Standards of Care'],
'EASD': ['EASD', 'European Association for the Study of Diabetes'],
'AACE': ['AACE', 'American Association of Clinical Endocrinologists'],
'NICE': ['NICE', 'National Institute for Health and Care Excellence', 'NICE guidelines'],
'WHO': ['WHO', 'World Health Organization', 'WHO guidelines for diabetes'],
'ATP III': ['ATP III', 'Adult Treatment Panel III', 'NCEP ATP III'],
'KDIGO': ['KDIGO', 'Kidney Disease Improving Global Outcomes'],
'ESC': ['ESC', 'European Society of Cardiology', 'ESC/EASD'],
'AHA': ['AHA', 'American Heart Association']
},
# Cardiology
'cardiology': {
'ACC/AHA': ['ACC/AHA', 'American College of Cardiology/American Heart Association',
'ACC/AHA guidelines', 'AHA/ACC'],
'ESC': ['ESC', 'European Society of Cardiology', 'ESC guidelines'],
'AHA': ['AHA', 'American Heart Association', 'AHA guidelines'],
'ACC': ['ACC', 'American College of Cardiology'],
'NICE': ['NICE', 'National Institute for Health and Care Excellence'],
'WHO': ['WHO', 'World Health Organization'],
'ATP III': ['ATP III', 'Adult Treatment Panel III', 'NCEP ATP III'],
'JNC': ['JNC', 'Joint National Committee', 'JNC 8', 'JNC 7'],
'CHEP': ['CHEP', 'Canadian Hypertension Education Program'],
'CCS': ['CCS', 'Canadian Cardiovascular Society']
},
# Infectious Diseases
'infectious_disease': {
'IDSA': ['IDSA', 'Infectious Diseases Society of America', 'IDSA guidelines'],
'ATS': ['ATS', 'American Thoracic Society', 'ATS/IDSA'],
'CDC': ['CDC', 'Centers for Disease Control and Prevention', 'CDC guidelines'],
'WHO': ['WHO', 'World Health Organization', 'WHO guidelines'],
'ECDC': ['ECDC', 'European Centre for Disease Prevention and Control'],
'SHEA': ['SHEA', 'Society for Healthcare Epidemiology of America'],
'ESCMID': ['ESCMID', 'European Society of Clinical Microbiology and Infectious Diseases'],
'NICE': ['NICE', 'National Institute for Health and Care Excellence']
},
# Oncology
'oncology': {
'NCCN': ['NCCN', 'National Comprehensive Cancer Network', 'NCCN guidelines'],
'ASCO': ['ASCO', 'American Society of Clinical Oncology', 'ASCO guidelines'],
'ESMO': ['ESMO', 'European Society for Medical Oncology', 'ESMO guidelines'],
'AJCC': ['AJCC', 'American Joint Committee on Cancer'],
'WHO': ['WHO', 'World Health Organization'],
'NICE': ['NICE', 'National Institute for Health and Care Excellence']
},
# Neurology
'neurology': {
'AAN': ['AAN', 'American Academy of Neurology', 'AAN guidelines'],
'EFNS': ['EFNS', 'European Federation of Neurological Societies'],
'EAN': ['EAN', 'European Academy of Neurology'],
'NICE': ['NICE', 'National Institute for Health and Care Excellence'],
'WHO': ['WHO', 'World Health Organization']
},
# Internal Medicine (General)
'internal_medicine': {
'ACP': ['ACP', 'American College of Physicians', 'ACP guidelines'],
'ACC/AHA': ['ACC/AHA', 'American College of Cardiology/American Heart Association'],
'IDSA': ['IDSA', 'Infectious Diseases Society of America'],
'ATS': ['ATS', 'American Thoracic Society'],
'ADA': ['ADA', 'American Diabetes Association'],
'NICE': ['NICE', 'National Institute for Health and Care Excellence'],
'WHO': ['WHO', 'World Health Organization'],
'USPSTF': ['USPSTF', 'U.S. Preventive Services Task Force']
},
# Pulmonology
'pulmonology': {
'ATS': ['ATS', 'American Thoracic Society', 'ATS guidelines'],
'ERS': ['ERS', 'European Respiratory Society'],
'GOLD': ['GOLD', 'Global Initiative for Chronic Obstructive Lung Disease'],
'GINA': ['GINA', 'Global Initiative for Asthma'],
'NICE': ['NICE', 'National Institute for Health and Care Excellence']
},
# Gastroenterology
'gastroenterology': {
'AGA': ['AGA', 'American Gastroenterological Association', 'AGA guidelines'],
'ACG': ['ACG', 'American College of Gastroenterology'],
'UEG': ['UEG', 'United European Gastroenterology'],
'ESGE': ['ESGE', 'European Society of Gastrointestinal Endoscopy'],
'NICE': ['NICE', 'National Institute for Health and Care Excellence']
},
# Nephrology
'nephrology': {
'KDIGO': ['KDIGO', 'Kidney Disease Improving Global Outcomes', 'KDIGO guidelines'],
'NKF': ['NKF', 'National Kidney Foundation', 'NKF/KDOQI'],
'KDOQI': ['KDOQI', 'Kidney Disease Outcomes Quality Initiative'],
'ERA': ['ERA', 'European Renal Association'],
'NICE': ['NICE', 'National Institute for Health and Care Excellence']
},
# Hematology
'hematology': {
'ASH': ['ASH', 'American Society of Hematology', 'ASH guidelines'],
'ESMO': ['ESMO', 'European Society for Medical Oncology'],
'NCCN': ['NCCN', 'National Comprehensive Cancer Network'],
'WHO': ['WHO', 'World Health Organization']
}
}
# Domain-specific critical guidelines that should be mentioned
CRITICAL_GUIDELINES = {
'endocrinology': ['ADA', 'IDF', 'EASD', 'AACE', 'NICE'],
'cardiology': ['ACC/AHA', 'ESC', 'AHA', 'NICE'],
'infectious_disease': ['IDSA', 'ATS', 'CDC', 'WHO'],
'oncology': ['NCCN', 'ASCO', 'ESMO'],
'diabetes': ['ADA', 'IDF', 'EASD', 'AACE'],
'hypertension': ['ACC/AHA', 'ESC', 'JNC', 'NICE'],
'hyperlipidemia': ['ACC/AHA', 'ESC', 'NICE', 'ATP III'],
'heart_failure': ['ACC/AHA', 'ESC', 'NICE'],
'pneumonia': ['IDSA', 'ATS', 'CDC'],
'sepsis': ['SSC', 'IDSA', 'WHO'],
'COPD': ['GOLD', 'ATS', 'ERS', 'NICE'],
'asthma': ['GINA', 'ATS', 'ERS', 'NICE']
}
@staticmethod
def detect_guidelines(papers: List[Dict], domain: str, query: str) -> Dict[str, Any]:
"""Detect guideline citations in papers and identify missing critical ones"""
if not papers:
return {
'guidelines_found': [],
'critical_missing': [],
'guideline_coverage': 'NO_PAPERS',
'recommendation': 'No papers available for guideline analysis'
}
# Get relevant guideline database for domain
domain_guidelines = GuidelineDetector.GUIDELINE_DATABASES.get(domain, {})
# Also check related domains
all_guidelines = {}
if domain in GuidelineDetector.GUIDELINE_DATABASES:
all_guidelines.update(GuidelineDetector.GUIDELINE_DATABASES[domain])
# Check for condition-specific guidelines
query_lower = query.lower()
condition_guidelines = []
if 'diabetes' in query_lower or 'glycemic' in query_lower:
condition_guidelines.extend(['ADA', 'IDF', 'EASD', 'AACE'])
if 'hypertension' in query_lower or 'blood pressure' in query_lower:
condition_guidelines.extend(['ACC/AHA', 'ESC', 'JNC', 'NICE'])
if 'hyperlipidemia' in query_lower or 'cholesterol' in query_lower or 'lipid' in query_lower:
condition_guidelines.extend(['ACC/AHA', 'ESC', 'ATP III', 'NICE'])
if 'heart failure' in query_lower:
condition_guidelines.extend(['ACC/AHA', 'ESC', 'NICE'])
if 'pneumonia' in query_lower:
condition_guidelines.extend(['IDSA', 'ATS', 'CDC'])
if 'sepsis' in query_lower:
condition_guidelines.extend(['SSC', 'IDSA', 'WHO'])
if 'copd' in query_lower or 'chronic obstructive' in query_lower:
condition_guidelines.extend(['GOLD', 'ATS', 'ERS'])
if 'asthma' in query_lower:
condition_guidelines.extend(['GINA', 'ATS', 'ERS'])
# Merge domain and condition guidelines
guidelines_to_check = {}
for guideline in set(condition_guidelines):
for key, value in all_guidelines.items():
if guideline == key or guideline in key:
guidelines_to_check[key] = value
# If no specific condition guidelines, use domain guidelines
if not guidelines_to_check and domain_guidelines:
guidelines_to_check = domain_guidelines
# Scan papers for guideline mentions
found_guidelines = set()
papers_with_guidelines = []
for paper in papers:
text = f"{paper.get('title', '')} {paper.get('abstract', '')}".lower()
paper_guidelines = []
for guideline_name, patterns in guidelines_to_check.items():
for pattern in patterns:
pattern_lower = pattern.lower()
if pattern_lower in text:
found_guidelines.add(guideline_name)
if guideline_name not in paper_guidelines:
paper_guidelines.append(guideline_name)
if paper_guidelines:
papers_with_guidelines.append({
'title': paper.get('title', 'Untitled')[:100],
'guidelines': paper_guidelines
})
# Determine critical missing guidelines
critical_missing = []
if condition_guidelines:
# Check condition-specific critical guidelines
for guideline in condition_guidelines:
if guideline not in found_guidelines:
critical_missing.append(guideline)
elif domain_guidelines:
# Check domain-specific critical guidelines
critical_for_domain = GuidelineDetector.CRITICAL_GUIDELINES.get(domain, [])
for guideline in critical_for_domain:
if guideline not in found_guidelines:
critical_missing.append(guideline)
# Calculate coverage score
coverage_percentage = 0
if guidelines_to_check:
coverage_percentage = (len(found_guidelines) / len(guidelines_to_check)) * 100
if coverage_percentage >= 75:
coverage = 'HIGH'
elif coverage_percentage >= 50:
coverage = 'MODERATE'
elif coverage_percentage >= 25:
coverage = 'LOW'
else:
coverage = 'VERY_LOW'
else:
coverage = 'UNKNOWN'
# Generate recommendation
if critical_missing:
if len(critical_missing) >= 3:
recommendation = f"Missing explicit guideline citations ({', '.join(critical_missing[:3])}...)"
else:
recommendation = f"Missing explicit guideline citations ({', '.join(critical_missing)})"
elif found_guidelines:
recommendation = f"Guideline coverage: {len(found_guidelines)}/{len(guidelines_to_check) if guidelines_to_check else 'N/A'} major guidelines referenced"
else:
recommendation = "No explicit guideline citations detected"
return {
'guidelines_found': sorted(list(found_guidelines)),
'critical_missing': critical_missing,
'guideline_coverage': coverage,
'recommendation': recommendation,
'papers_with_guidelines': papers_with_guidelines[:5], # Top 5 papers with guidelines
'total_guidelines_checked': len(guidelines_to_check),
'coverage_percentage': round(coverage_percentage, 1) if guidelines_to_check else 0
}
# ADD THIS AT THE VERY TOP OF rag_engine.py (before any other imports)
import sys
import os
# Add the project root to Python path
current_dir = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.dirname(current_dir) # This goes from chat/ to MedSearchPro/
if project_root not in sys.path:
sys.path.insert(0, project_root)
print(f"β
Added project root to sys.path: {project_root}")
# Import existing modules
try:
from processing.vector_store import VectorStore
from chat.summarizer import MultiDocumentSummarizer
from chat.single_paper_summarizer import SinglePaperSummarizer
from chat.comparator import CrossPaperComparator
from chat.gap_analyzer import ResearchGapAnalyzer
from lib.memory_manager import ConversationMemory
from llm.llm_provider import XAIGrokProvider, GrokLLM
except ImportError as e:
print(f"β οΈ Some dependencies not found - using simplified mode: {e}")
# Set fallback values for all imported modules
VectorStore = None
MultiDocumentSummarizer = None
SinglePaperSummarizer = None
CrossPaperComparator = None
ResearchGapAnalyzer = None
ConversationMemory = None
GrokLLM = None
# Import config for domain mapping
try:
from config.domains import (
get_domain_display_name, get_domain_description,
validate_domain, get_all_domains
)
CONFIG_AVAILABLE = True
except ImportError:
print("β οΈ config.domains not found - using fallback domain info")
CONFIG_AVAILABLE = False
# ============================================================================
# ROLE-BASED REASONING FOR MEDICAL RESEARCH
# ============================================================================
class RoleBasedReasoning:
"""Role-based reasoning technique focused on domain-agnostic, role-appropriate responses"""
# Role definitions with domain-agnostic prompts
ROLE_SYSTEM_PROMPTS = {
'patient': {
'name': 'Patient',
'icon': 'π©Ί',
'prompt': '''You are helping a patient understand information. Use simple, clear, reassuring language.
- Focus on practical implications and what they need to know
- Avoid complex terminology or jargon
- Emphasize safety and when to seek professional help
- Be compassionate and supportive
- Do not provide diagnoses or specific medical advice
- Explain concepts in everyday terms'''
},
'student': {
'name': 'Student',
'icon': 'π',
'prompt': '''You are teaching a student. Focus on educational value and understanding.
- Explain foundational concepts and definitions
- Provide examples and analogies
- Encourage critical thinking and questions
- Structure information logically
- Connect to broader knowledge areas
- Mention learning resources when helpful'''
},
'clinician': {
'name': 'Clinician',
'icon': 'π¨ββοΈ',
'prompt': '''You are assisting a healthcare professional. Be concise, actionable, and evidence-based.
- Focus on practical implications and decision-making
- Reference guidelines and evidence levels when relevant
- Consider workflow and implementation
- Be precise but efficient with time
- Address risks and benefits clearly
- Maintain professional tone'''
},
'doctor': {
'name': 'Doctor',
'icon': 'βοΈ',
'prompt': '''You are assisting a physician. Use appropriate terminology and clinical reasoning.
- Focus on differential diagnosis, treatment options, and management
- Reference current standards of care and guidelines
- Consider patient factors and comorbidities
- Discuss evidence quality and limitations
- Be thorough but organized
- Maintain clinical accuracy'''
},
'researcher': {
'name': 'Researcher',
'icon': 'π¬',
'prompt': '''You are assisting a research scientist. Focus on methodology and evidence.
- Discuss study designs, methods, and limitations
- Analyze evidence quality and gaps
- Consider statistical significance and clinical relevance
- Reference current literature and trends
- Discuss implications for future research
- Maintain scientific rigor'''
},
'professor': {
'name': 'Professor',
'icon': 'π',
'prompt': '''You are assisting an academic educator. Focus on knowledge synthesis and pedagogy.
- Provide comprehensive overviews with context
- Compare theories, methods, and findings
- Discuss historical development and future directions
- Emphasize critical evaluation and synthesis
- Connect to broader academic discourse
- Support teaching and learning objectives'''
},
'pharmacist': {
'name': 'Pharmacist',
'icon': 'π',
'prompt': '''You are assisting a pharmacy professional. Focus on medications and safety.
- Discuss drug mechanisms, interactions, and pharmacokinetics
- Emphasize safety profiles and monitoring
- Consider dosing, administration, and compliance
- Address patient counseling points
- Reference formularies and guidelines
- Maintain focus on medication optimization'''
},
'general': {
'name': 'General User',
'icon': 'π€',
'prompt': '''You are assisting a general user. Provide balanced, accessible information.
- Adjust complexity based on the query
- Be helpful and informative without overwhelming
- Provide context and practical implications
- Use clear language with minimal jargon
- Consider diverse backgrounds and knowledge levels
- Maintain neutral, objective tone'''
}
}
@staticmethod
def create_role_prompt(query: str, domain: str, role: str,
papers_count: int = 0, guideline_info: Dict = None) -> str:
"""Create role-appropriate prompt with domain-agnostic focus"""
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
# Simple query detection - greetings and basic questions
simple_queries = ['hi', 'hello', 'hey', 'greetings', 'good morning', 'good afternoon',
'good evening', 'how are you', "what's up", 'sup']
query_lower = query.lower().strip()
if query_lower in simple_queries or len(query.split()) <= 2:
# Simple greeting or short query
if role == 'patient':
return f"""You are helping a patient. Use warm, reassuring tone.
Query: {query}
Respond with a friendly greeting and invitation to ask questions. Keep it brief and welcoming.
Example: "Hello! I'm here to help answer your health questions in simple, clear terms. What would you like to know?"""
elif role == 'student':
return f"""You are teaching a student.
Query: {query}
Respond with an encouraging greeting that invites learning questions.
Example: "Hi there! I'm here to help you learn about medical topics. What are you curious about today?"""
elif role in ['clinician', 'doctor']:
return f"""You are assisting a healthcare professional.
Query: {query}
Respond with a professional greeting appropriate for clinical setting.
Example: "Hello. I'm ready to assist with evidence-based medical information. How can I help you today?"""
elif role in ['researcher', 'professor']:
return f"""You are assisting an academic professional.
Query: {query}
Respond with a scholarly greeting that invites research questions.
Example: "Greetings. I'm available to discuss research topics and evidence-based analysis. What would you like to explore?"""
elif role == 'pharmacist':
return f"""You are assisting a pharmacy professional.
Query: {query}
Respond with a professional greeting focused on medication information.
Example: "Hello. I can help with medication-related questions and information. How can I assist you today?"""
else: # general
return f"""You are assisting a general user.
Query: {query}
Respond with a friendly, welcoming greeting.
Example: "Hello! I'm your Medical Research Assistant. I can help with evidence-based information across various specialties. How can I assist you today?"""
# For substantive queries, use role-appropriate analysis
role_prompt = role_info['prompt']
# Domain-agnostic instruction
domain_agnostic = f"""DOMAIN-AGNOSTIC APPROACH:
- This system can answer questions from ANY domain (tech, finance, health, education, general)
- Adapt your expertise to the query domain naturally
- Do NOT force medical framing on non-medical questions
- Only emphasize citations/guidelines when the query domain and role demand it
- Use appropriate terminology for the query's domain"""
# Build comprehensive prompt
prompt = f"""ROLE: {role_info['name']} {role_info['icon']}
{role_prompt}
{domain_agnostic}
QUERY: {query}
QUERY DOMAIN CONTEXT: {domain} (adapt your response appropriately)
RESPONSE GUIDELINES:
1. **Role-Appropriate Depth:**
- {role}: Adjust response complexity for {role_info['name'].lower()} needs
2. **Terminology Level:**
- Use language appropriate for {role_info['name'].lower()} understanding
3. **Evidence Awareness:**
- { 'Reference evidence/guidelines when domain-appropriate' if role in ['clinician', 'doctor', 'researcher'] else 'Mention evidence when helpful, not required' }
4. **Safety & Practicality:**
- { 'Include appropriate disclaimers' if role == 'patient' else 'Maintain professional standards' }
5. **Response Structure:**
- Organize information logically for {role_info['name'].lower()} understanding
- Prioritize most relevant information first
- Keep response focused and actionable"""
# Add research context if we have papers
if papers_count > 0:
prompt += f"\n\nRESEARCH CONTEXT: Analyzing {papers_count} relevant sources"
# Add guideline context if available
if guideline_info:
if guideline_info.get('guidelines_found'):
prompt += f"\nGUIDELINES REFERENCED: {', '.join(guideline_info['guidelines_found'])}"
if guideline_info.get('critical_missing'):
prompt += f"\nGUIDELINE GAPS: Missing {', '.join(guideline_info['critical_missing'][:2])}"
prompt += f"\n\nPlease provide a {role_info['name'].lower()}-appropriate response to: {query}"
return prompt
# ============================================================================
# DOMAIN CONFIGURATION
# ============================================================================
# Domain descriptions (fallback if config.domains not available)
DOMAIN_INFO = {
'internal_medicine': {
'name': 'Internal Medicine',
'icon': 'π₯',
'description': 'General internal medicine and diagnosis'
},
'endocrinology': {
'name': 'Endocrinology',
'icon': 'π§¬',
'description': 'Hormonal and metabolic disorders'
},
'cardiology': {
'name': 'Cardiology',
'icon': 'β€οΈ',
'description': 'Heart and cardiovascular diseases'
},
'neurology': {
'name': 'Neurology',
'icon': 'π§ ',
'description': 'Brain and nervous system disorders'
},
'oncology': {
'name': 'Oncology',
'icon': 'π¦ ',
'description': 'Cancer research and treatment'
},
'infectious_disease': {
'name': 'Infectious Diseases',
'icon': 'π¦ ',
'description': 'Infectious diseases and microbiology'
},
'clinical_research': {
'name': 'Clinical Research',
'icon': 'π',
'description': 'Clinical trials and evidence-based medicine'
},
'general_medical': {
'name': 'General Medical',
'icon': 'βοΈ',
'description': 'General medical research'
},
'pulmonology': {
'name': 'Pulmonology',
'icon': 'π«',
'description': 'Respiratory diseases and lung health'
},
'gastroenterology': {
'name': 'Gastroenterology',
'icon': 'π½οΈ',
'description': 'Digestive system disorders'
},
'nephrology': {
'name': 'Nephrology',
'icon': 'π«',
'description': 'Kidney diseases and disorders'
},
'hematology': {
'name': 'Hematology',
'icon': 'π©Έ',
'description': 'Blood disorders and hematologic diseases'
},
'surgery': {
'name': 'Surgery',
'icon': 'πͺ',
'description': 'Surgical procedures and interventions'
},
'orthopedics': {
'name': 'Orthopedics',
'icon': 'π¦΄',
'description': 'Musculoskeletal disorders and injuries'
},
'urology': {
'name': 'Urology',
'icon': 'π½',
'description': 'Urinary tract and male reproductive system'
},
'ophthalmology': {
'name': 'Ophthalmology',
'icon': 'ποΈ',
'description': 'Eye diseases and vision disorders'
},
'dermatology': {
'name': 'Dermatology',
'icon': 'π¦',
'description': 'Skin diseases and disorders'
},
'psychiatry': {
'name': 'Psychiatry',
'icon': 'π§',
'description': 'Mental health and psychiatric disorders'
},
'obstetrics_gynecology': {
'name': 'Obstetrics & Gynecology',
'icon': 'π€°',
'description': "Women's health and reproductive medicine"
},
'pediatrics': {
'name': 'Pediatrics',
'icon': 'πΆ',
'description': 'Child health and pediatric medicine'
},
'emergency_medicine': {
'name': 'Emergency Medicine',
'icon': 'π',
'description': 'Emergency care and acute medicine'
},
'critical_care': {
'name': 'Critical Care Medicine',
'icon': 'π₯',
'description': 'Intensive care and critical care medicine'
},
'pathology': {
'name': 'Pathology',
'icon': 'π¬',
'description': 'Disease diagnosis and laboratory medicine'
},
'laboratory_medicine': {
'name': 'Laboratory Medicine',
'icon': 'π§ͺ',
'description': 'Clinical laboratory testing and diagnostics'
},
'medical_imaging': {
'name': 'Medical Imaging & Radiology AI',
'icon': 'π·',
'description': 'Medical imaging and radiological diagnosis'
},
'bioinformatics': {
'name': 'Bioinformatics',
'icon': 'π»',
'description': 'Computational biology and data analysis'
},
'genomics': {
'name': 'Genomics & Sequencing',
'icon': 'π§¬',
'description': 'Genomic research and sequencing technologies'
},
'pharmacology': {
'name': 'Pharmacology',
'icon': 'π',
'description': 'Drug research and pharmacology'
},
'public_health': {
'name': 'Public Health Analytics',
'icon': 'π',
'description': 'Public health and epidemiology'
},
'pain_medicine': {
'name': 'Pain Medicine',
'icon': 'π©Ή',
'description': 'Pain management and treatment'
},
'nutrition': {
'name': 'Nutrition',
'icon': 'π',
'description': 'Nutritional science and dietetics'
},
'allergy_immunology': {
'name': 'Allergy & Immunology',
'icon': 'π€§',
'description': 'Allergies and immune system disorders'
},
'rehabilitation_medicine': {
'name': 'Rehabilitation Medicine',
'icon': 'βΏ',
'description': 'Physical medicine and rehabilitation'
},
'auto': {
'name': 'Auto-detect',
'icon': 'π',
'description': 'Automatic domain detection'
}
}
# User context information
USER_CONTEXT_INFO = {
'clinician': {
'name': 'Clinician',
'icon': 'π¨ββοΈ',
'description': 'Medical doctors, nurses, and healthcare providers'
},
'researcher': {
'name': 'Researcher',
'icon': 'π¬',
'description': 'Academic researchers and scientists'
},
'student': {
'name': 'Student',
'icon': 'π',
'description': 'Medical students and trainees'
},
'patient': {
'name': 'Patient',
'icon': 'π€',
'description': 'Patients and general public'
},
'general': {
'name': 'General',
'icon': 'π€',
'description': 'General audience'
}
}
# ============================================================================
# CORE COMPONENTS
# ============================================================================
class PaperRanker:
"""Rank papers by relevance to query and domain"""
def __init__(self):
self.query_cache = {}
def rank_papers(self, papers: List[Dict], query: str, domain: str = None,
user_context: str = "general") -> List[Dict]:
"""Rank papers by relevance with guideline bonus"""
if not papers:
return []
scored_papers = []
query_lower = query.lower()
query_words = set(query_lower.split())
for paper in papers:
score = 0
# Title relevance
title = paper.get('title', '').lower()
if title:
if query_lower in title:
score += 100
score += sum(10 for word in query_words if word in title)
# Abstract relevance
abstract = paper.get('abstract', '').lower()
if abstract:
score += sum(5 for word in query_words if word in abstract)
# Domain relevance
if domain and domain in DOMAIN_INFO:
domain_keywords = [
domain.split('_')[0], # First part of domain name
domain.replace('_', ' ')
]
for keyword in domain_keywords:
if keyword in abstract or keyword in title:
score += 15
# Guideline mention bonus
text = f"{title} {abstract}"
guideline_keywords = ['guideline', 'recommendation', 'consensus', 'position statement',
'ada', 'aha', 'acc', 'esc', 'idsa', 'ats', 'nccn', 'nice', 'who']
if any(keyword in text for keyword in guideline_keywords):
score += 30 # Bonus for guideline-related papers
# Recency bonus
year = self._extract_year(paper)
if year:
current_year = datetime.now().year
age = current_year - year
if age <= 2:
score += 20
elif age <= 5:
score += 10
# Source quality
source = paper.get('source', '').lower()
high_quality_sources = ['pubmed', 'nejm', 'lancet', 'jama', 'nature', 'science',
'circulation', 'jacc', 'jco', 'nejm', 'cell', 'bmj']
if any(hq_source in source for hq_source in high_quality_sources):
score += 15
paper['relevance_score'] = score
scored_papers.append((score, paper))
# Sort by score
scored_papers.sort(reverse=True, key=lambda x: x[0])
# Normalize scores
if scored_papers:
max_score = scored_papers[0][0]
if max_score > 0:
for _, paper in scored_papers:
paper['normalized_score'] = int((paper['relevance_score'] / max_score) * 100)
return [paper for _, paper in scored_papers]
def _extract_year(self, paper: Dict) -> Optional[int]:
"""Extract year from paper"""
pub_date = paper.get('publication_date', '')
if pub_date and isinstance(pub_date, str):
year_pattern = re.search(r'\b(19|20)\d{2}\b', pub_date)
if year_pattern:
try:
return int(year_pattern.group())
except:
pass
# Try other date fields
date_field = paper.get('date', '')
if date_field and isinstance(date_field, str):
year_pattern = re.search(r'\b(19|20)\d{2}\b', date_field)
if year_pattern:
try:
return int(year_pattern.group())
except:
pass
return None
class ConfidenceScorer:
"""Calculate confidence scores for medical evidence with guideline consideration"""
def __init__(self):
self.score_weights = {
'paper_count': 0.15,
'paper_quality': 0.20,
'study_design': 0.20,
'evidence_recency': 0.15,
'source_diversity': 0.10,
'domain_relevance': 0.10,
'guideline_alignment': 0.10 # New: Guideline alignment score
}
def calculate_confidence(self, papers: List[Dict], query: str,
analysis_type: str = "summary",
user_context: str = "general",
domain: str = "general_medical",
guideline_info: Dict = None) -> Dict[str, Any]:
"""Calculate confidence score with guideline consideration"""
if not papers:
return {
'overall_score': 0,
'level': 'VERY LOW β«',
'explanation': 'No supporting evidence',
'factors': {}
}
# Calculate factors
factors = {
'paper_count': self._score_paper_count(papers),
'paper_quality': self._score_paper_quality(papers),
'study_design': self._score_study_design(papers),
'evidence_recency': self._score_evidence_recency(papers),
'source_diversity': self._score_source_diversity(papers),
'domain_relevance': self._score_domain_relevance(papers, domain),
'guideline_alignment': self._score_guideline_alignment(papers, domain, query, guideline_info)
}
# Calculate overall score
overall_score = 0
for factor, weight in self.score_weights.items():
if factor in factors:
overall_score += factors[factor] * weight
# Generate explanation with guideline context
explanation = self._generate_explanation(factors, overall_score, domain, guideline_info)
return {
'overall_score': round(min(100, overall_score), 1),
'level': self._score_to_level(overall_score),
'explanation': explanation,
'factors': {k: round(v, 1) for k, v in factors.items()},
'guideline_info': guideline_info
}
def _score_guideline_alignment(self, papers: List[Dict], domain: str,
query: str, guideline_info: Dict = None) -> float:
"""Score based on guideline alignment"""
if not guideline_info:
return 50 # Neutral if no guideline info
if guideline_info.get('guideline_coverage') == 'NO_PAPERS':
return 0
# Base score on coverage percentage
coverage_pct = guideline_info.get('coverage_percentage', 0)
# Penalize for missing critical guidelines
critical_missing = len(guideline_info.get('critical_missing', []))
if critical_missing > 0:
penalty = critical_missing * 10
coverage_pct = max(0, coverage_pct - penalty)
# Consider guideline relevance based on domain
if domain in ['endocrinology', 'cardiology', 'infectious_disease']:
# Guidelines are critical for these domains
return min(100, coverage_pct)
elif domain in ['oncology', 'neurology', 'internal_medicine']:
# Guidelines are important but not always critical
return min(100, coverage_pct * 0.9)
else:
# Guidelines are less critical
return min(100, coverage_pct * 0.7)
def _score_paper_count(self, papers: List[Dict]) -> float:
"""Score based on number of papers"""
count = len(papers)
if count >= 10:
return 100
elif count >= 7:
return 85
elif count >= 5:
return 70
elif count >= 3:
return 50
elif count >= 1:
return 30
return 0
def _score_paper_quality(self, papers: List[Dict]) -> float:
"""Score based on paper quality"""
if not papers:
return 0
scores = []
for paper in papers[:10]:
score = 50
# Source quality
source = paper.get('source', '').lower()
high_quality_sources = ['pubmed', 'nejm', 'lancet', 'jama', 'nature', 'science',
'circulation', 'jacc', 'jco', 'cell', 'bmj', 'springer']
if any(hq_source in source for hq_source in high_quality_sources):
score += 20
# Journal quality
journal = paper.get('journal', '').lower()
if any(hq_journal in journal for hq_journal in high_quality_sources):
score += 10
# Citations (if available)
citations = paper.get('citations', 0)
if citations > 100:
score += 15
elif citations > 20:
score += 10
scores.append(min(100, score))
return statistics.mean(scores) if scores else 50
def _score_study_design(self, papers: List[Dict]) -> float:
"""Score based on study design"""
if not papers:
return 0
design_scores = {
'RCT': 100,
'randomized controlled trial': 100,
'prospective cohort': 80,
'retrospective cohort': 60,
'case-control': 50,
'review': 30,
'meta-analysis': 90,
'systematic review': 85,
'case report': 20,
'case series': 25,
'guideline': 95,
'consensus': 90
}
scores = []
for paper in papers[:10]:
abstract = paper.get('abstract', '').lower()
title = paper.get('title', '').lower()
text = abstract + " " + title
paper_score = 30
for design, score in design_scores.items():
if design in text:
paper_score = max(paper_score, score)
scores.append(paper_score)
return statistics.mean(scores) if scores else 30
def _score_evidence_recency(self, papers: List[Dict]) -> float:
"""Score based on recency"""
if not papers:
return 0
current_year = datetime.now().year
recent_papers = 0
for paper in papers[:10]:
year = self._extract_year(paper)
if year and current_year - year <= 3:
recent_papers += 1
percentage = (recent_papers / min(10, len(papers))) * 100
return min(100, percentage)
def _score_source_diversity(self, papers: List[Dict]) -> float:
"""Score based on source diversity"""
sources = set()
for paper in papers:
source = paper.get('source', '')
if source:
sources.add(source.lower())
unique_sources = len(sources)
if unique_sources >= 4:
return 100
elif unique_sources >= 3:
return 75
elif unique_sources >= 2:
return 50
elif unique_sources == 1:
return 25
return 0
def _score_domain_relevance(self, papers: List[Dict], domain: str) -> float:
"""Score based on domain relevance"""
if domain == "general_medical" or domain == "auto":
return 50
relevant_papers = 0
domain_terms = domain.split('_')
for paper in papers[:10]:
abstract = paper.get('abstract', '').lower()
title = paper.get('title', '').lower()
text = abstract + " " + title
# Check for domain terms
matches = sum(1 for term in domain_terms if term in text)
if matches >= 1:
relevant_papers += 1
percentage = (relevant_papers / min(10, len(papers))) * 100
return min(100, percentage)
def _extract_year(self, paper: Dict) -> Optional[int]:
"""Extract year from paper"""
pub_date = paper.get('publication_date', '')
if pub_date and isinstance(pub_date, str):
year_pattern = re.search(r'\b(19|20)\d{2}\b', pub_date)
if year_pattern:
try:
return int(year_pattern.group())
except:
pass
return None
def _score_to_level(self, score: float) -> str:
"""Convert score to confidence level"""
if score >= 80:
return "HIGH π’"
elif score >= 60:
return "MODERATE π‘"
elif score >= 40:
return "FAIR π "
else:
return "LOW π΄"
def _generate_explanation(self, factors: Dict, score: float,
domain: str, guideline_info: Dict = None) -> str:
"""Generate explanation for confidence score with guideline context"""
explanations = []
if factors.get('paper_count', 0) >= 70:
explanations.append("Strong evidence base")
elif factors.get('paper_count', 0) <= 30:
explanations.append("Limited evidence base")
if factors.get('study_design', 0) >= 70:
explanations.append("High-quality study designs")
if factors.get('evidence_recency', 0) >= 70:
explanations.append("Recent evidence")
if factors.get('source_diversity', 0) >= 70:
explanations.append("Diverse sources")
# Add guideline-specific explanations
if guideline_info:
guideline_score = factors.get('guideline_alignment', 0)
if guideline_score >= 70:
if guideline_info.get('guidelines_found'):
explanations.append(
f"Good guideline coverage ({len(guideline_info['guidelines_found'])} referenced)")
elif guideline_score <= 30:
if guideline_info.get('critical_missing'):
missing_str = ', '.join(guideline_info['critical_missing'][:3])
explanations.append(f"Missing guideline citations ({missing_str})")
if CONFIG_AVAILABLE:
try:
domain_name = get_domain_display_name(domain)
except:
domain_name = domain.replace('_', ' ').title()
else:
domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title())
explanation = f"{domain_name} confidence: {score:.1f}/100"
if explanations:
explanation += f". Factors: {'; '.join(explanations)}"
return explanation
class UserContextDetector:
"""Detect user context from query"""
def __init__(self):
self.context_patterns = {
"clinician": ['patient', 'clinical', 'treatment', 'diagnosis', 'therapy',
'management', 'guidelines', 'recommend', 'prescribe'],
"researcher": ['research', 'study', 'methodology', 'evidence', 'publication',
'hypothesis', 'experiment', 'results', 'conclusions'],
"student": ['learn', 'study', 'exam', 'textbook', 'course', 'education',
'explain', 'understand', 'concept', 'basics'],
"patient": ['i have', 'my symptoms', 'my doctor', 'my treatment', 'pain',
'suffering', 'experience', 'diagnosed', 'medication']
}
def detect_context(self, query: str, domain: str = None) -> str:
"""Detect user context from query"""
query_lower = query.lower()
# Check for explicit mentions
if 'clinician' in query_lower or 'doctor' in query_lower or 'nurse' in query_lower:
return "clinician"
if 'researcher' in query_lower or 'scientist' in query_lower or 'academic' in query_lower:
return "researcher"
if 'student' in query_lower or 'trainee' in query_lower:
return "student"
if 'patient' in query_lower or 'i have' in query_lower or 'my ' in query_lower[:50]:
return "patient"
# Check patterns
context_scores = {}
for context_type, patterns in self.context_patterns.items():
score = sum(1 for pattern in patterns if pattern in query_lower)
if score > 0:
context_scores[context_type] = score
if context_scores:
return max(context_scores.items(), key=lambda x: x[1])[0]
return "general"
# ============================================================================
# MAIN RAG ENGINE
# ============================================================================
class EnhancedRAGEngine:
"""Production-ready RAG engine for medical research with real paper fetching and guideline detection"""
def __init__(self, vector_store=None, session_id: str = "default",
model: str = "gpt-oss-120b", use_real_time: bool = True):
print("π Initializing Medical Research RAG Engine...")
# Model mapping
model_map = {
"gpt-oss-120b": "gpt-oss-120b",
"llama-70b": "llama-3-70b-instruct",
"llama-8b": "llama-3-8b-instruct",
"mixtral": "mixtral-8x7b-instruct"
}
self.model = model_map.get(model, model)
print(f"π Using model: {self.model}")
self.use_real_time = use_real_time
# Initialize MedicalResearchEngine from main.py
print("π Initializing MedicalResearchEngine...")
try:
# Import and create MedicalResearchEngine
from main import MedicalResearchEngine
self.research_engine = MedicalResearchEngine()
print("β
MedicalResearchEngine loaded successfully!")
self.RESEARCH_ENGINE_AVAILABLE = True
except ImportError as e:
print(f"β οΈ MedicalResearchEngine import failed: {e}")
print("β οΈ Using fallback mode - will generate demo papers")
self.RESEARCH_ENGINE_AVAILABLE = False
except Exception as e:
print(f"β οΈ MedicalResearchEngine initialization failed: {e}")
print("β οΈ Using fallback mode - will generate demo papers")
self.RESEARCH_ENGINE_AVAILABLE = False
# Initialize LLM
try:
# Ensure we can import from llm directory
import sys
import os
# Get the project root (MedSearchPro/
current_dir = os.path.dirname(os.path.abspath(__file__)) # /MedSearchPro/chat/
project_root = os.path.dirname(current_dir)
if project_root not in sys.path:
sys.path.insert(0, project_root)
# Now importing GrokLLM
from llm.llm_provider import GrokLLM
# Initialize LLM components
self.llm = GrokLLM(model=self.model)
print(f"β
LLM loaded: {self.llm.get_active_provider()}")
except Exception as e:
print(f"β οΈ LLM not available - using fallback mode: {e}")
self.llm = None
self.role_reasoning = RoleBasedReasoning() # NEW: Role-based reasoning
self.ranker = PaperRanker()
self.confidence_scorer = ConfidenceScorer()
self.context_detector = UserContextDetector()
self.guideline_detector = GuidelineDetector() # New: Guideline detector
# Initialize RAG components
try:
self.vector_store = vector_store or VectorStore()
except:
self.vector_store = None
print("β οΈ Vector store not available")
try:
self.single_paper_summarizer = SinglePaperSummarizer(model=self.model) if hasattr(self, 'llm') else None
except:
self.single_paper_summarizer = None
# Memory
try:
self.memory = ConversationMemory(session_id=session_id)
except:
print("β οΈ ConversationMemory not available")
self.memory = None
# Metrics
self.metrics = {
'total_queries': 0,
'average_confidence': 0,
'domains_used': Counter(),
'user_contexts': Counter(),
'real_papers_fetched': 0,
'demo_papers_used': 0,
'guideline_coverage': [] # Track guideline coverage over time
}
print(f"β
Medical Research RAG Engine Ready! Session: {session_id}")
if self.RESEARCH_ENGINE_AVAILABLE:
print(" π Real paper fetching: ENABLED")
else:
print(" π Real paper fetching: DISABLED (using demo papers)")
print(" π Guideline detection: ENABLED")
print(" π€ Role-based responses: ENABLED")
def answer_research_question(self,
query: str,
domain: str = "general_medical",
max_papers: int = 20,
use_memory: bool = True,
user_context: str = "auto",
use_fallback: bool = False,
role: str = "general", # NEW: Explicit role parameter
role_system_prompt: str = None, # NEW: Custom role prompt from frontend
**kwargs) -> Dict[str, Any]:
"""Answer medical research questions with role-based reasoning"""
start_time = time.time()
self.metrics['total_queries'] += 1
self.metrics['domains_used'][domain] += 1
print(f"\nπ Processing query: '{query}'")
print(f" Domain: {domain}")
print(f" Role: {role}")
print(f" Max papers: {max_papers}")
print(f" Real-time search: {self.use_real_time}")
try:
# Auto-detect user context if needed (backward compatibility)
if user_context == "auto":
user_context = self.context_detector.detect_context(query, domain)
self.metrics['user_contexts'][user_context] += 1
# NEW: Check for simple queries first (greetings, basic questions)
simple_response = self._handle_simple_query(query, domain, role)
if simple_response:
return simple_response
# Check if query requires research analysis
requires_research = self._requires_research_analysis(query)
if not requires_research:
# For non-research queries, provide direct role-appropriate response
return self._handle_direct_query(query, domain, role)
# Retrieve papers using MedicalResearchEngine
print("π Retrieving relevant papers...")
papers = self._retrieve_real_papers(query, domain, max_papers, use_fallback)
if not papers:
print("β οΈ No papers found, creating fallback response...")
return self._create_no_results_response(query, domain, role)
# Detect guideline citations
print("π Detecting guideline citations...")
guideline_info = self.guideline_detector.detect_guidelines(papers, domain, query)
# Store guideline coverage for metrics
if guideline_info:
self.metrics['guideline_coverage'].append({
'domain': domain,
'coverage': guideline_info.get('coverage_percentage', 0),
'guidelines_found': len(guideline_info.get('guidelines_found', [])),
'critical_missing': len(guideline_info.get('critical_missing', [])),
'timestamp': datetime.now().isoformat()
})
# Rank papers
ranked_papers = self.ranker.rank_papers(papers, query, domain, role)
print(f"π Papers found: {len(ranked_papers)}")
# Track paper sources
real_papers = sum(1 for p in ranked_papers if not p.get('is_demo', False))
demo_papers = sum(1 for p in ranked_papers if p.get('is_demo', False))
self.metrics['real_papers_fetched'] += real_papers
self.metrics['demo_papers_used'] += demo_papers
if demo_papers > 0:
print(f" β οΈ Includes {demo_papers} demo papers (real papers: {real_papers})")
# Report guideline findings
if guideline_info.get('guidelines_found'):
print(f" π Guidelines referenced: {', '.join(guideline_info['guidelines_found'])}")
if guideline_info.get('critical_missing'):
print(f" β οΈ Missing guidelines: {', '.join(guideline_info['critical_missing'][:3])}")
# Calculate confidence with guideline consideration
confidence = self.confidence_scorer.calculate_confidence(
ranked_papers, query, "summary", role, domain, guideline_info
)
# Generate analysis using role-based reasoning
print("π§ Generating role-based analysis...")
analysis = self._generate_role_based_analysis(
query, domain, role, ranked_papers, guideline_info, role_system_prompt
)
# Generate clinical bottom line with role awareness
bottom_line = self._generate_role_bottom_line(
query, domain, role, len(ranked_papers), real_papers, guideline_info
)
# Synthesize final answer
final_answer = self._synthesize_role_answer(
query, domain, role, analysis, ranked_papers,
bottom_line, confidence, guideline_info
)
# Update memory
if use_memory and self.memory:
self._update_memory(query, final_answer, domain, role, ranked_papers, guideline_info)
# Update metrics
response_time = time.time() - start_time
self.metrics['average_confidence'] = (
(self.metrics['average_confidence'] * (self.metrics['total_queries'] - 1) +
confidence['overall_score']) / self.metrics['total_queries']
)
print(f"β
Analysis complete in {response_time:.2f}s")
print(f" Confidence: {confidence['overall_score']}/100")
print(f" Papers used: {len(ranked_papers)}")
print(f" Real papers: {real_papers}, Demo papers: {demo_papers}")
print(f" Guideline coverage: {guideline_info.get('coverage_percentage', 0)}%")
return final_answer
except Exception as e:
print(f"β Error in research analysis: {e}")
import traceback
traceback.print_exc()
return self._create_error_response(query, domain, role, str(e))
def _handle_simple_query(self, query: str, domain: str, role: str) -> Optional[Dict[str, Any]]:
"""Handle simple queries like greetings with role-appropriate responses"""
query_lower = query.lower().strip()
# Simple greetings
simple_greetings = ['hi', 'hello', 'hey', 'greetings', 'good morning',
'good afternoon', 'good evening', 'howdy']
if query_lower in simple_greetings:
print(" π Detected simple greeting")
return self._create_greeting_response(query, domain, role)
# Very short queries (1-2 words) that aren't research questions
if len(query.split()) <= 2 and not self._looks_like_research_query(query):
print(" π¬ Detected simple query")
return self._create_simple_response(query, domain, role)
return None
def _looks_like_research_query(self, query: str) -> bool:
"""Check if query looks like a research question"""
query_lower = query.lower()
# Research question indicators
research_indicators = [
'compare', 'difference', 'similar', 'contrast', 'analyze', 'analysis',
'study', 'research', 'evidence', 'paper', 'article', 'trial', 'clinical',
'method', 'approach', 'technique', 'treatment', 'therapy', 'diagnosis',
'prognosis', 'outcome', 'efficacy', 'effectiveness', 'safety', 'risk',
'benefit', 'recommendation', 'guideline', 'standard', 'protocol'
]
# Check if query contains research indicators
for indicator in research_indicators:
if indicator in query_lower:
return True
# Check question words
question_words = ['what', 'why', 'how', 'when', 'where', 'which', 'who']
if any(query_lower.startswith(word) for word in question_words):
# Check if it's a complex question (more than basic)
if len(query.split()) > 3:
return True
return False
def _requires_research_analysis(self, query: str) -> bool:
"""Determine if query requires full research analysis"""
query_lower = query.lower().strip()
# Definitely simple queries
simple_patterns = [
r'^hi$', r'^hello$', r'^hey$', r'^greetings$',
r'^good morning$', r'^good afternoon$', r'^good evening$',
r'^how are you$', r"^what's up$", r'^sup$',
r'^thanks$', r'^thank you$', r'^bye$', r'^goodbye$'
]
for pattern in simple_patterns:
if re.match(pattern, query_lower):
return False
# Check if it's a substantive question
if len(query.split()) <= 2 and not self._looks_like_research_query(query):
return False
return True
def _create_greeting_response(self, query: str, domain: str, role: str) -> Dict[str, Any]:
"""Create role-appropriate greeting response"""
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
# Role-specific greetings
greetings = {
'patient': "π Hello! I'm here to help you understand health topics in simple, clear terms. What would you like to know?",
'student': "π Hi there! I'm here to help you learn about medical topics. What are you curious about today?",
'clinician': "π Hello. I'm ready to assist with evidence-based medical information. How can I help you today?",
'doctor': "π Hello, doctor. I'm available to discuss clinical questions and evidence. What would you like to explore?",
'researcher': "π Greetings. I'm available to discuss research topics and evidence-based analysis. What would you like to explore?",
'professor': "π Hello. I can assist with academic discussions and evidence synthesis. What topic interests you?",
'pharmacist': "π Hello. I can help with medication-related questions and information. How can I assist you today?",
'general': "π Hello! I'm your Medical Research Assistant. I can help with evidence-based information across various specialties. How can I assist you today?"
}
greeting = greetings.get(role, greetings['general'])
if CONFIG_AVAILABLE:
try:
domain_info = {
'name': get_domain_display_name(domain),
'icon': DOMAIN_INFO.get(domain, {}).get('icon', 'βοΈ')
}
except:
domain_info = DOMAIN_INFO.get(domain, {
'name': domain.replace('_', ' ').title(),
'icon': 'βοΈ'
})
else:
domain_info = DOMAIN_INFO.get(domain, {
'name': domain.replace('_', ' ').title(),
'icon': 'βοΈ'
})
answer = f"""# {greeting}
**Role:** {role_info['name']} {role_info['icon']}
**Domain:** {domain_info['name']} {domain_info.get('icon', '')}
Feel free to ask me anything! I'll provide information tailored to your needs as a {role_info['name'].lower()}."""
return {
"query": query,
"domain": domain,
"domain_info": domain_info,
"user_context": role,
"user_context_info": role_info,
"answer": answer,
"analysis": greeting,
"bottom_line": greeting,
"papers_used": 0,
"real_papers_used": 0,
"demo_papers_used": 0,
"confidence_score": {
'overall_score': 95.0,
'level': 'HIGH π’',
'explanation': 'Simple greeting response'
},
"guideline_info": None,
"reasoning_method": "greeting",
"real_time_search": self.use_real_time,
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
"metrics": {
'response_time': time.time(),
'papers_analyzed': 0,
'domain': domain,
'user_context': role
}
}
def _create_simple_response(self, query: str, domain: str, role: str) -> Dict[str, Any]:
"""Create role-appropriate response for simple queries"""
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
# Generate simple, direct response
simple_responses = {
'patient': f"I'd be happy to help with '{query}'. Could you tell me a bit more about what you're looking for?",
'student': f"That's an interesting topic! To help you best, could you provide more details about what you'd like to know regarding '{query}'?",
'clinician': f"Regarding '{query}', I can provide evidence-based information. Please share more specifics about your clinical question.",
'doctor': f"For '{query}', I can offer medical information. Could you elaborate on the clinical context or specific aspects you're interested in?",
'researcher': f"On the topic of '{query}', I can discuss research perspectives. What specific aspect would you like to explore?",
'professor': f"Regarding '{query}', I can provide academic perspectives. What particular angle or detail would you like to discuss?",
'pharmacist': f"About '{query}', I can offer medication-related information. Could you specify what you'd like to know?",
'general': f"I can help with information about '{query}'. Could you provide more details about what specifically you're interested in?"
}
response = simple_responses.get(role, simple_responses['general'])
if CONFIG_AVAILABLE:
try:
domain_info = {
'name': get_domain_display_name(domain),
'icon': DOMAIN_INFO.get(domain, {}).get('icon', 'βοΈ')
}
except:
domain_info = DOMAIN_INFO.get(domain, {
'name': domain.replace('_', ' ').title(),
'icon': 'βοΈ'
})
else:
domain_info = DOMAIN_INFO.get(domain, {
'name': domain.replace('_', ' ').title(),
'icon': 'βοΈ'
})
answer = f"""# π¬ **Response**
**Role:** {role_info['name']} {role_info['icon']}
**Domain:** {domain_info['name']} {domain_info.get('icon', '')}
{response}
*Tip: For more detailed information, try asking a more specific question!*"""
return {
"query": query,
"domain": domain,
"domain_info": domain_info,
"user_context": role,
"user_context_info": role_info,
"answer": answer,
"analysis": response,
"bottom_line": response,
"papers_used": 0,
"real_papers_used": 0,
"demo_papers_used": 0,
"confidence_score": {
'overall_score': 85.0,
'level': 'HIGH π’',
'explanation': 'Simple query response'
},
"guideline_info": None,
"reasoning_method": "simple_response",
"real_time_search": self.use_real_time,
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
"metrics": {
'response_time': time.time(),
'papers_analyzed': 0,
'domain': domain,
'user_context': role
}
}
def _handle_direct_query(self, query: str, domain: str, role: str) -> Dict[str, Any]:
"""Handle direct queries without research papers"""
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
# Use LLM for direct response if available
if self.llm:
try:
prompt = self.role_reasoning.create_role_prompt(query, domain, role, 0, None)
response = self.llm.generate(
prompt,
system_message=f"You are assisting a {role_info['name'].lower()}. Provide helpful, accurate information.",
max_tokens=1000
)
# Clean up response
response = response.strip()
if not response:
response = f"I'd be happy to help with '{query}'. Could you provide more details about what specifically you're looking for?"
except Exception as e:
print(f"β οΈ LLM direct response failed: {e}")
response = f"I can help with information about '{query}'. Please feel free to ask more specific questions!"
else:
response = f"I'd be happy to discuss '{query}'. What specific aspect would you like to know more about?"
if CONFIG_AVAILABLE:
try:
domain_info = {
'name': get_domain_display_name(domain),
'icon': DOMAIN_INFO.get(domain, {}).get('icon', 'βοΈ')
}
except:
domain_info = DOMAIN_INFO.get(domain, {
'name': domain.replace('_', ' ').title(),
'icon': 'βοΈ'
})
else:
domain_info = DOMAIN_INFO.get(domain, {
'name': domain.replace('_', ' ').title(),
'icon': 'βοΈ'
})
answer = f"""# π¬ **Response**
**Role:** {role_info['name']} {role_info['icon']}
**Domain:** {domain_info['name']} {domain_info.get('icon', '')}
{response}
*Note: This is a direct response. For evidence-based research analysis with papers, please ask a more specific research question.*"""
return {
"query": query,
"domain": domain,
"domain_info": domain_info,
"user_context": role,
"user_context_info": role_info,
"answer": answer,
"analysis": response,
"bottom_line": response,
"papers_used": 0,
"real_papers_used": 0,
"demo_papers_used": 0,
"confidence_score": {
'overall_score': 80.0,
'level': 'HIGH π’',
'explanation': 'Direct query response without papers'
},
"guideline_info": None,
"reasoning_method": "direct_response",
"real_time_search": self.use_real_time,
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
"metrics": {
'response_time': time.time(),
'papers_analyzed': 0,
'domain': domain,
'user_context': role
}
}
def _retrieve_real_papers(self, query: str, domain: str, max_papers: int,
use_fallback: bool = False) -> List[Dict]:
"""Retrieve real papers using MedicalResearchEngine"""
papers = []
# Use MedicalResearchEngine if available
if self.RESEARCH_ENGINE_AVAILABLE and self.use_real_time:
try:
print(f" π Using MedicalResearchEngine for real-time search...")
# Use the research engine to get real papers
result = self.research_engine.answer_user_query(
user_query=query,
domain=domain,
use_real_time=True,
use_fallback=use_fallback
)
# Extract papers from result
if result and 'supporting_papers' in result:
raw_papers = result['supporting_papers']
for raw_paper in raw_papers[:max_papers]:
# Transform to our paper format
paper = {
'id': raw_paper.get('paper_id') or
raw_paper.get('doi') or
raw_paper.get('pmid') or
f"{hash(raw_paper.get('title', ''))}",
'title': raw_paper.get('title', 'Untitled'),
'abstract': raw_paper.get('abstract') or
raw_paper.get('summary') or
raw_paper.get('description', ''),
'source': raw_paper.get('source', 'unknown'),
'publication_date': raw_paper.get('publication_date') or
raw_paper.get('date') or
raw_paper.get('year', ''),
'authors': self._parse_authors(raw_paper.get('authors', '')),
'journal': raw_paper.get('journal', ''),
'doi': raw_paper.get('doi', ''),
'url': raw_paper.get('url') or raw_paper.get('link', ''),
'citations': raw_paper.get('citation_count', 0) or
raw_paper.get('citations', 0),
'is_preprint': raw_paper.get('is_preprint', False),
'is_fallback': raw_paper.get('is_fallback', False),
'is_demo': False, # Real paper!
'search_domain': domain
}
# Clean up the abstract
if paper['abstract']:
# Remove excessive whitespace
paper['abstract'] = ' '.join(paper['abstract'].split())
# Limit length
if len(paper['abstract']) > 2000:
paper['abstract'] = paper['abstract'][:2000] + "..."
papers.append(paper)
print(f" β
Retrieved {len(papers)} real papers from MedicalResearchEngine")
else:
print(f" β οΈ No papers returned from MedicalResearchEngine")
except Exception as e:
print(f" β οΈ MedicalResearchEngine failed: {e}")
# Try vector store as fallback
if len(papers) < max_papers // 2 and self.vector_store:
try:
print(f" π Trying vector store...")
results = self.vector_store.search(
query=query,
domain=domain,
n_results=max_papers - len(papers)
)
seen_ids = set([p['id'] for p in papers if p.get('id')])
for result in results:
paper_id = result['metadata'].get('paper_id')
if paper_id and paper_id not in seen_ids:
paper = {
'id': paper_id,
'title': result['metadata'].get('paper_title', ''),
'abstract': result['text'],
'source': result['metadata'].get('source', 'vector_store'),
'publication_date': result['metadata'].get('publication_date', ''),
'authors': result['metadata'].get('authors', '').split(',')
if result['metadata'].get('authors') else [],
'citations': result['metadata'].get('citations', 0),
'is_demo': False
}
papers.append(paper)
seen_ids.add(paper_id)
print(f" β
Added {len(results)} papers from vector store")
except Exception as e:
print(f" β οΈ Vector store search failed: {e}")
# Generate demo papers only if we have very few real papers
if len(papers) < max_papers // 3:
needed = max_papers - len(papers)
demo_papers = self._create_demo_papers(query, domain, needed)
papers.extend(demo_papers)
print(f" π Added {len(demo_papers)} demo papers for illustration")
return papers[:max_papers]
def _parse_authors(self, authors_input) -> List[str]:
"""Parse authors from various input formats"""
if not authors_input:
return []
if isinstance(authors_input, list):
return authors_input
if isinstance(authors_input, str):
# Try to split by common separators
if ';' in authors_input:
return [a.strip() for a in authors_input.split(';') if a.strip()]
elif ',' in authors_input:
# Check if it's "Last, First" format or just comma-separated names
parts = [p.strip() for p in authors_input.split(',')]
if len(parts) > 2: # Probably comma-separated names
return parts
else:
# Might be "Last, First" format - return as is
return [authors_input]
else:
return [authors_input]
return []
def _create_demo_papers(self, query: str, domain: str, count: int) -> List[Dict]:
"""Create demo papers for illustration only"""
papers = []
current_year = datetime.now().year
# Common medical journal sources
journal_sources = {
'infectious_disease': ['New England Journal of Medicine', 'The Lancet Infectious Diseases',
'Clinical Infectious Diseases', 'Journal of Antimicrobial Chemotherapy'],
'cardiology': ['New England Journal of Medicine', 'Journal of the American College of Cardiology',
'Circulation', 'European Heart Journal'],
'endocrinology': ['Diabetes Care', 'The Lancet Diabetes & Endocrinology',
'Journal of Clinical Endocrinology & Metabolism'],
'neurology': ['Neurology', 'The Lancet Neurology', 'JAMA Neurology', 'Brain'],
'oncology': ['Journal of Clinical Oncology', 'The Lancet Oncology', 'JAMA Oncology',
'Annals of Oncology'],
'internal_medicine': ['New England Journal of Medicine', 'The Lancet', 'JAMA',
'Annals of Internal Medicine']
}
sources = journal_sources.get(domain, ['PubMed', 'Medical Research Database'])
for i in range(min(count, 5)): # Limit demo papers
# Generate title based on query
query_terms = [word for word in query.lower().split() if len(word) > 4]
if query_terms:
base_term = random.choice(query_terms).title()
title = f"Recent Advances in {base_term}: A {random.choice(['Systematic Review', 'Meta-analysis', 'Clinical Trial'])}"
else:
title = f"Current Research in {domain.replace('_', ' ').title()}"
# Generate abstract
abstract = f"This study examines {query.lower()}. Results demonstrate significant findings relevant to clinical practice. Further research is warranted to confirm these observations."
# Generate authors
first_names = ['James', 'Mary', 'Robert', 'Patricia', 'John', 'Jennifer']
last_names = ['Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia']
authors = [f"{random.choice(first_names)} {random.choice(last_names)}" for _ in range(random.randint(2, 5))]
# Publication date
year_offset = i % 4
pub_year = current_year - year_offset
month = random.randint(1, 12)
pub_date = f"{pub_year}-{month:02d}"
paper = {
'id': f"demo_{domain}_{i}_{int(time.time())}",
'title': title,
'abstract': abstract,
'source': random.choice(sources),
'publication_date': pub_date,
'authors': authors,
'citations': random.randint(0, 50),
'is_demo': True,
'is_preprint': random.random() > 0.7,
'journal': random.choice(sources)
}
papers.append(paper)
return papers
def _generate_role_based_analysis(self, query: str, domain: str, role: str,
papers: List[Dict], guideline_info: Dict = None,
custom_role_prompt: str = None) -> str:
"""Generate role-based analysis using LLM if available"""
if not self.llm:
return self._create_fallback_role_analysis(query, domain, role, papers, guideline_info)
# Create role-based prompt
prompt = self.role_reasoning.create_role_prompt(
query, domain, role, len(papers), guideline_info
)
# Add paper information for research context
if papers:
paper_info = "\n".join([
f"{i + 1}. {p.get('title', 'Untitled')} ({p.get('source', 'Unknown')})"
for i, p in enumerate(papers[:3])
])
prompt += f"\n\n**Relevant Sources:**\n{paper_info}"
# Add demo paper disclaimer if any demo papers
demo_count = sum(1 for p in papers if p.get('is_demo', False))
if demo_count > 0:
prompt += f"\n\nNote: {demo_count} illustrative examples included for context."
try:
# Use custom role prompt if provided, otherwise use default
system_message = custom_role_prompt if custom_role_prompt else f"You are assisting a {role}. Provide helpful, accurate information."
analysis = self.llm.generate(
prompt,
system_message=system_message,
max_tokens=2000
)
return analysis
except Exception as e:
print(f"β οΈ LLM role-based analysis failed: {e}")
return self._create_fallback_role_analysis(query, domain, role, papers, guideline_info)
def _create_fallback_role_analysis(self, query: str, domain: str, role: str,
papers: List[Dict], guideline_info: Dict = None) -> str:
"""Create fallback analysis when LLM is unavailable"""
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
if CONFIG_AVAILABLE:
try:
domain_name = get_domain_display_name(domain)
except:
domain_name = domain.replace('_', ' ').title()
else:
domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title())
# Count real vs demo papers
real_papers = [p for p in papers if not p.get('is_demo', False)]
demo_papers = [p for p in papers if p.get('is_demo', False)]
analysis = f"""**{role_info['name']}-Focused Analysis**
**Query:** {query}
**Domain Context:** {domain_name}
**Role Perspective:** {role_info['name']}"""
# Add guideline information
if guideline_info:
if guideline_info.get('guidelines_found'):
analysis += f"\n**Guidelines Referenced:** {', '.join(guideline_info['guidelines_found'])}"
if guideline_info.get('critical_missing'):
analysis += f"\n**Guideline Gaps:** Missing explicit citations for {', '.join(guideline_info['critical_missing'][:3])}"
analysis += f"""
**Key Information for {role_info['name']}:**
Based on analysis of {len(papers)} relevant sources ({len(real_papers)} real, {len(demo_papers)} illustrative):
1. **{role_info['name']}-Relevant Insights:**
- Information tailored to {role_info['name'].lower()} needs and perspective
- Practical implications for {role_info['name'].lower()} context
- Actionable takeaways appropriate for this role
2. **Domain Context:**
- Considerations specific to {domain_name}
- Relevant standards and approaches in this field
- Important context for application
3. **Evidence Considerations:**
- {len(papers)} sources analyzed
- Quality and relevance assessed for {role_info['name'].lower()} needs
- {"Guideline awareness as noted above" if guideline_info else "Standard evidence considerations"}
**Recommendations for {role_info['name']}:**
- Apply information within {role_info['name'].lower()} role context
- Consider individual circumstances and specific needs
- {"Consult referenced guidelines as appropriate" if guideline_info and guideline_info.get('guidelines_found') else "Reference standard practices"}
- Seek additional information for specific cases
- Integrate with professional judgment and experience
*Note: This analysis is tailored for {role_info['name'].lower()} perspective. For other perspectives, different considerations may apply.*"""
if demo_papers:
analysis += f"\n\n*Includes {len(demo_papers)} illustrative examples for comprehensive analysis.*"
return analysis
def _generate_role_bottom_line(self, query: str, domain: str, role: str,
papers_count: int, real_papers_count: int,
guideline_info: Dict = None) -> str:
"""Generate role-appropriate bottom line"""
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
if CONFIG_AVAILABLE:
try:
domain_name = get_domain_display_name(domain)
except:
domain_name = domain.replace('_', ' ').title()
else:
domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title())
bottom_line = f"""**Bottom Line for {role_info['name']}:**
Based on {papers_count} sources in {domain_name} ({real_papers_count} real sources), here are the key takeaways for {role_info['name'].lower()} perspective."""
# Add guideline-specific bottom line
if guideline_info:
if guideline_info.get('guidelines_found'):
bottom_line += f"\n\n**Guideline Context:** {len(guideline_info['guidelines_found'])} major guidelines referenced."
if guideline_info.get('critical_missing'):
missing_list = ', '.join(guideline_info['critical_missing'][:2])
bottom_line += f"\n**Consider:** Missing explicit guideline citations for {missing_list}."
coverage = guideline_info.get('coverage_percentage', 0)
if coverage < 50:
bottom_line += f"\n**Evidence Note:** Guideline coverage is limited."
bottom_line += f"""
**{role_info['name']}-Specific Considerations:**
- Information tailored to {role_info['name'].lower()} role and needs
- Practical application within {role_info['name'].lower()} context
- Integration with {role_info['name'].lower()} knowledge and experience
- {"Guideline-aware decision making" if guideline_info else "Evidence-informed approach"}
- Consideration of specific circumstances and constraints"""
if papers_count > real_papers_count:
bottom_line += f"\n\n*Note: Includes {papers_count - real_papers_count} illustrative examples for context.*"
return bottom_line
def _synthesize_role_answer(self, query: str, domain: str, role: str,
analysis: str, papers: List[Dict],
bottom_line: str, confidence: Dict[str, Any],
guideline_info: Dict = None) -> Dict[str, Any]:
"""Synthesize final answer with role information"""
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
if CONFIG_AVAILABLE:
try:
domain_info = {
'name': get_domain_display_name(domain),
'icon': DOMAIN_INFO.get(domain, {}).get('icon', 'βοΈ'),
'description': get_domain_description(domain)
}
except:
domain_info = DOMAIN_INFO.get(domain, {
'name': domain.replace('_', ' ').title(),
'icon': 'βοΈ',
'description': f'Research in {domain.replace("_", " ")}'
})
else:
domain_info = DOMAIN_INFO.get(domain, {
'name': domain.replace('_', ' ').title(),
'icon': 'βοΈ',
'description': f'Research in {domain.replace("_", " ")}'
})
# Count real vs demo papers
real_papers = [p for p in papers if not p.get('is_demo', False)]
demo_papers = [p for p in papers if p.get('is_demo', False)]
# Format paper citations
paper_citations = []
for i, paper in enumerate(papers[:5], 1):
title = paper.get('title', 'Untitled')
authors = paper.get('authors', [])
year = paper.get('publication_date', '').split('-')[0] if paper.get('publication_date') else ''
source = paper.get('source', 'Unknown')
journal = paper.get('journal', '')
is_demo = paper.get('is_demo', False)
is_preprint = paper.get('is_preprint', False)
# Format authors
if authors and isinstance(authors, list) and len(authors) > 0:
if len(authors) == 1:
author_str = authors[0]
elif len(authors) == 2:
author_str = f"{authors[0]} and {authors[1]}"
else:
author_str = f"{authors[0]} et al."
else:
author_str = "Authors not specified"
# Build citation
citation = f"{i}. **{title}**"
demo_indicator = "π " if is_demo else ""
preprint_indicator = "β‘ " if is_preprint else ""
if author_str and year:
citation += f"\n {demo_indicator}{preprint_indicator}*{author_str} ({year})*"
elif author_str:
citation += f"\n {demo_indicator}{preprint_indicator}*{author_str}*"
else:
citation += f"\n {demo_indicator}{preprint_indicator}*Unknown authors*"
if journal:
citation += f"\n Journal: {journal}"
elif source and source != 'unknown':
citation += f"\n Source: {source}"
paper_citations.append(citation)
# Build guideline summary section
guideline_summary = ""
if guideline_info:
guideline_summary = "## π **Guideline Assessment**\n\n"
if guideline_info.get('guidelines_found'):
guideline_summary += f"**β
Guidelines Referenced:** {', '.join(guideline_info['guidelines_found'])}\n\n"
if guideline_info.get('critical_missing'):
missing_list = ', '.join(guideline_info['critical_missing'])
guideline_summary += f"**β οΈ Missing Guideline Citations:** {missing_list}\n\n"
guideline_summary += f"**Coverage Score:** {guideline_info.get('coverage_percentage', 0)}%\n\n"
# Build answer
answer = f"""# π¬ **{role_info['name']}-Focused Analysis**
**Role:** {role_info['name']} {role_info['icon']}
**Domain:** {domain_info['name']} {domain_info.get('icon', '')}
**Evidence Confidence:** {confidence['level']} ({confidence['overall_score']}/100)
**Sources Analyzed:** {len(papers)} ({len(real_papers)} real, {len(demo_papers)} illustrative)
---
## π **Executive Summary**
{bottom_line}
---
{guideline_summary}## π **Detailed Analysis**
{analysis}
---
## π **Supporting Evidence**
{chr(10).join(paper_citations) if paper_citations else "*No papers cited for this simple query*"}
---
## π― **Key Takeaways for {role_info['name']}**
1. Role-appropriate information and insights
2. Domain-specific considerations for {domain_info['name'].lower()}
3. Practical implications tailored to {role_info['name'].lower()} needs
4. {"Guideline-aware recommendations" if guideline_info else "Evidence-informed approach"}
*Analysis performed with {role_info['name'].lower()}-focused reasoning*
*Last updated: {datetime.now().strftime("%Y-%m-%d %H:%M")}*"""
return {
"query": query,
"domain": domain,
"domain_info": domain_info,
"user_context": role,
"user_context_info": role_info,
"answer": answer,
"analysis": analysis,
"bottom_line": bottom_line,
"papers_used": len(papers),
"real_papers_used": len(real_papers),
"demo_papers_used": len(demo_papers),
"confidence_score": confidence,
"guideline_info": guideline_info,
"reasoning_method": "role_based",
"real_time_search": self.use_real_time,
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
"metrics": {
'response_time': time.time(),
'papers_analyzed': len(papers),
'domain': domain,
'user_context': role
}
}
def _update_memory(self, query: str, response: Dict[str, Any], domain: str,
role: str, papers: List[Dict], guideline_info: Dict = None):
"""Update conversation memory with role info"""
if not self.memory:
return
memory_data = {
'query': query,
'domain': domain,
'role': role,
'papers_used': len(papers),
'real_papers': sum(1 for p in papers if not p.get('is_demo', False)),
'demo_papers': sum(1 for p in papers if p.get('is_demo', False)),
'confidence_score': response.get('confidence_score', {}).get('overall_score', 0),
'timestamp': datetime.now().isoformat()
}
# Add guideline info if available
if guideline_info:
memory_data['guidelines_found'] = guideline_info.get('guidelines_found', [])
memory_data['critical_missing'] = guideline_info.get('critical_missing', [])
memory_data['guideline_coverage'] = guideline_info.get('coverage_percentage', 0)
self.memory.add_interaction(
user_message=query,
ai_response=response.get('answer', '')[:1000],
metadata=memory_data
)
def _create_no_results_response(self, query: str, domain: str, role: str) -> Dict[str, Any]:
"""Create response when no papers are found"""
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
if CONFIG_AVAILABLE:
try:
domain_info = {
'name': get_domain_display_name(domain),
'icon': DOMAIN_INFO.get(domain, {}).get('icon', 'βοΈ')
}
except:
domain_info = DOMAIN_INFO.get(domain, {
'name': domain.replace('_', ' ').title(),
'icon': 'βοΈ'
})
else:
domain_info = DOMAIN_INFO.get(domain, {
'name': domain.replace('_', ' ').title(),
'icon': 'βοΈ'
})
answer = f"""# π **Limited Research Found**
**Role:** {role_info['name']} {role_info['icon']}
**Domain:** {domain_info['name']}
**Query:** {query}
**Suggestions for {role_info['name']}:**
1. Try broadening your search terms
2. Consider related topics in {domain_info['name']}
3. Check spelling of technical terms
4. Try a more general domain selection
**For Role-Appropriate Information:**
- Ask more general questions about the topic
- Request explanations of concepts
- Inquire about standard approaches or practices
- Seek practical guidance rather than specific research
**Example {role_info['name'].lower()}-appropriate queries:**
- "Basic explanation of [topic] for {role_info['name'].lower()}"
- "Standard approaches to [issue]"
- "Practical guidance for [situation]"
- "Key concepts about [subject]"
*Note: Some specialized topics may have limited published research. I can still provide general information and guidance tailored to your role.*"""
return {
"query": query,
"domain": domain,
"domain_info": domain_info,
"user_context": role,
"user_context_info": role_info,
"answer": answer,
"papers_used": 0,
"real_papers_used": 0,
"demo_papers_used": 0,
"confidence_score": {
'overall_score': 0,
'level': 'VERY LOW β«',
'explanation': 'No supporting evidence found'
},
"error": "no_results",
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE
}
def _create_error_response(self, query: str, domain: str, role: str, error: str) -> Dict[str, Any]:
"""Create error response"""
role_info = RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.get(role, RoleBasedReasoning.ROLE_SYSTEM_PROMPTS['general'])
if CONFIG_AVAILABLE:
try:
domain_info = {
'name': get_domain_display_name(domain),
'icon': DOMAIN_INFO.get(domain, {}).get('icon', 'βοΈ')
}
except:
domain_info = DOMAIN_INFO.get(domain, {
'name': domain.replace('_', ' ').title(),
'icon': 'βοΈ'
})
else:
domain_info = DOMAIN_INFO.get(domain, {
'name': domain.replace('_', ' ').title(),
'icon': 'βοΈ'
})
answer = f"""# π¨ **Analysis Error**
**Role:** {role_info['name']} {role_info['icon']}
**Domain:** {domain_info['name']}
**Query:** {query}
**Error:** {error}
**Troubleshooting for {role_info['name']}:**
1. Check your internet connection
2. Try a simpler query or rephrase
3. Verify domain selection is appropriate
4. Contact support if problem persists
**For Role-Appropriate Alternatives:**
- Ask a simpler version of your question
- Request general information instead of specific research
- Try breaking complex questions into smaller parts
- Use more common terminology
Please try again or reformulate your question for {role_info['name'].lower()}-appropriate assistance."""
return {
"query": query,
"domain": domain,
"domain_info": domain_info,
"user_context": role,
"user_context_info": role_info,
"answer": answer,
"papers_used": 0,
"real_papers_used": 0,
"demo_papers_used": 0,
"confidence_score": {
'overall_score': 0,
'level': 'ERROR π΄',
'explanation': f'Analysis failed: {error}'
},
"error": error,
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE
}
def summarize_single_paper(self,
paper_id: str = None,
paper_title: str = None,
paper_data: Dict = None,
user_query: str = None,
domain: str = "general_medical") -> Dict[str, Any]:
"""Summarize a single research paper with guideline detection"""
print(f"\nπ Summarizing paper: {paper_title or paper_id}")
try:
# Use single paper summarizer if available
if self.single_paper_summarizer and (paper_title or paper_data):
paper = paper_data or {'title': paper_title or 'Unknown'}
summary_result = self.single_paper_summarizer.summarize_paper(
paper, user_query
)
if summary_result.get("success"):
# Add guideline detection for single paper
if paper.get('abstract'):
guideline_info = self.guideline_detector.detect_guidelines([paper], domain, user_query or "")
if guideline_info.get('guidelines_found'):
summary_result[
'guideline_context'] = f"References {', '.join(guideline_info['guidelines_found'])} guidelines"
return summary_result
# Fallback summary
return self._create_fallback_summary(paper_title, domain, user_query)
except Exception as e:
print(f"β Paper summarization failed: {e}")
return {
"success": False,
"error": str(e),
"paper_title": paper_title,
"summary": f"Unable to generate summary. Error: {e}"
}
def _create_fallback_summary(self, paper_title: str, domain: str, user_query: str) -> Dict[str, Any]:
"""Create fallback paper summary"""
if CONFIG_AVAILABLE:
try:
domain_name = get_domain_display_name(domain)
except:
domain_name = domain.replace('_', ' ').title()
else:
domain_name = DOMAIN_INFO.get(domain, {}).get('name', domain.replace('_', ' ').title())
summary = f"""**Paper Summary:** {paper_title}
**Domain Context:** {domain_name}
**User Query:** {user_query or 'General summary requested'}
**Key Points:**
1. This paper contributes to the {domain_name} literature
2. Study design and methodology align with field standards
3. Findings have implications for clinical practice and research
4. Limitations and future directions are discussed
**Guideline Considerations:**
- Review paper for explicit guideline citations
- Consider alignment with major {domain_name} guidelines
- Consult specific guidelines for clinical application
**Recommendations:**
- Review full text for detailed methodology
- Consider findings in context of broader literature
- Consult with domain experts for application
- Reference established clinical guidelines
*Note: This is a general summary. Full paper review is recommended for detailed analysis.*"""
return {
"success": True,
"paper_title": paper_title,
"summary": summary,
"quick_summary": f"Summary of {paper_title} in {domain_name}",
"domain": domain,
"confidence": 0.6
}
def get_engine_status(self) -> Dict[str, Any]:
"""Get engine status with role metrics"""
# Calculate average guideline coverage
avg_guideline_coverage = 0
if self.metrics['guideline_coverage']:
avg_guideline_coverage = sum(g['coverage'] for g in self.metrics['guideline_coverage']) / len(
self.metrics['guideline_coverage'])
return {
"engine_name": "Medical Research RAG Engine",
"version": "2.2.0",
"model": self.model if hasattr(self, 'model') else "Unknown",
"features": ["role_based_reasoning", "real_paper_fetching",
"confidence_scoring", "guideline_detection", "simple_query_handling"],
"research_engine_available": self.RESEARCH_ENGINE_AVAILABLE,
"real_time_search": self.use_real_time,
"roles_supported": list(RoleBasedReasoning.ROLE_SYSTEM_PROMPTS.keys()),
"guideline_databases": len(GuidelineDetector.GUIDELINE_DATABASES),
"metrics": {
"total_queries": self.metrics['total_queries'],
"average_confidence": round(self.metrics['average_confidence'], 1),
"average_guideline_coverage": round(avg_guideline_coverage, 1),
"domains_used": dict(self.metrics['domains_used']),
"user_contexts": dict(self.metrics['user_contexts']),
"real_papers_fetched": self.metrics['real_papers_fetched'],
"demo_papers_used": self.metrics['demo_papers_used']
},
"domains_supported": len(DOMAIN_INFO),
"simple_query_handling": "ENABLED"
}
def clear_memory(self):
"""Clear conversation memory"""
if self.memory:
self.memory.clear_memory()
print("π§Ή Engine memory cleared")
else:
print("β οΈ Memory system not available")
# ============================================================================
# TEST FUNCTION
# ============================================================================
def test_role_based_rag_engine():
"""Test the medical RAG engine with role-based responses"""
print("\n" + "=" * 60)
print("π§ͺ TESTING ROLE-BASED RAG ENGINE")
print("=" * 60)
try:
# Initialize engine
engine = EnhancedRAGEngine(
session_id="role_test",
model="gpt-oss-120b",
use_real_time=False # Disable real-time for faster testing
)
# Test queries with different roles
test_cases = [
{
"query": "hi",
"domain": "general_medical",
"role": "patient"
},
{
"query": "hello",
"domain": "cardiology",
"role": "doctor"
},
{
"query": "hey",
"domain": "endocrinology",
"role": "student"
},
{
"query": "Compare first-line antibiotics for community-acquired pneumonia",
"domain": "infectious_disease",
"role": "clinician"
}
]
for i, test_case in enumerate(test_cases, 1):
print(f"\nπ Test Case {i}:")
print(f" Query: '{test_case['query']}'")
print(f" Domain: {test_case['domain']}")
print(f" Role: {test_case['role']}")
# Process query
response = engine.answer_research_question(
query=test_case['query'],
domain=test_case['domain'],
max_papers=5,
role=test_case['role'],
use_fallback=True
)
if response and 'error' not in response:
print(f"\nβ
Test Successful!")
print(f" Response type: {response.get('reasoning_method', 'unknown')}")
print(f" Papers used: {response.get('papers_used', 0)}")
print(f" Confidence: {response.get('confidence_score', {}).get('overall_score', 0)}/100")
# Check if it's a simple response
if response.get('reasoning_method') in ['greeting', 'simple_response', 'direct_response']:
print(f" β Simple query handled appropriately!")
# Show engine status
status = engine.get_engine_status()
print(f"\nπ§ Engine Status:")
print(f" Role-based responses: ENABLED")
print(f" Simple query handling: ENABLED")
print(f" Roles supported: {len(status['roles_supported'])}")
print(f" Total queries: {status['metrics']['total_queries']}")
return True
except Exception as e:
print(f"\nβ Test failed with exception: {e}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
# Run test
test_result = test_role_based_rag_engine()
if test_result:
print(f"\n{'=' * 60}")
print("π ROLE-BASED RAG ENGINE TEST COMPLETE!")
print(" Role-based reasoning: β")
print(" Simple query handling: β")
print(" Domain-agnostic approach: β")
print(" Guideline detection: β")
print(f"{'=' * 60}")
else:
print("\nβ Engine test failed") |