TrailHead / src /rag.py
sxandie's picture
feat: Phase 2 - Functional simulation, OSM POI Overpass integration, and first-aid RAG
262624f
Raw
History Blame Contribute Delete
2.76 kB
import os
import json
import re
DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
GUIDES_FILE = os.path.join(DATA_DIR, "first_aid_guide.json")
def retrieve_first_aid(query_text):
"""
Search first_aid_guide.json for sections relevant to the query.
Returns (markdown_grounding_text, source_list) or (None, [])
"""
if not os.path.exists(GUIDES_FILE):
print(f"[rag.py] Guide file not found at {GUIDES_FILE}")
return None, []
try:
with open(GUIDES_FILE, "r", encoding="utf-8") as f:
guides = json.load(f)
except Exception as e:
print(f"[rag.py] Error reading guides: {e}")
return None, []
query_text_lower = query_text.lower()
query_words = set(re.findall(r"\w+", query_text_lower))
matches = []
# Pre-defined keyword map for high relevance scores
keywords_map = {
"section 1": ["bleed", "wound", "cut", "blood", "bandage", "tourniquet", "injury"],
"section 2": ["cold", "hypothermia", "freeze", "frostbite", "shiver", "rewarm"],
"section 3": ["heat", "exhaustion", "dehydration", "stroke", "hot", "sunstroke", "sweat"],
"section 4": ["altitude", "ams", "hape", "hace", "headache", "dizzy", "mountain sickness", "nausea", "pulmonary", "cerebral"],
"section 5": ["sprain", "fracture", "break", "splint", "ankle", "joint", "bone", "rice", "strain"]
}
for guide in guides:
score = 0
section = guide.get("section", "")
text = guide.get("text", "")
section_lower = section.lower()
# 1. Map based matching
for key, words in keywords_map.items():
if key in section_lower:
for w in words:
if w in query_text_lower:
score += 3
# 2. General overlap matching
combined_text = (section + " " + text).lower()
for word in query_words:
if len(word) > 2 and word in combined_text:
score += 1
if score > 0:
matches.append((score, guide))
# Sort matches by score descending
matches.sort(key=lambda x: x[0], reverse=True)
if not matches:
return None, []
grounding_parts = []
sources = []
# Take top matching guide to ground the model response
for idx, (score, guide) in enumerate(matches[:1]):
sec = guide["section"]
txt = guide["text"]
grounding_parts.append(
f"### {sec}\n"
f"{txt}\n"
)
sources.append(sec)
grounding_text = "\n---\n".join(grounding_parts)
return grounding_text, sources