import os import json import re DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data") GUIDES_FILE = os.path.join(DATA_DIR, "first_aid_guide.json") def retrieve_first_aid(query_text): """ Search first_aid_guide.json for sections relevant to the query. Returns (markdown_grounding_text, source_list) or (None, []) """ if not os.path.exists(GUIDES_FILE): print(f"[rag.py] Guide file not found at {GUIDES_FILE}") return None, [] try: with open(GUIDES_FILE, "r", encoding="utf-8") as f: guides = json.load(f) except Exception as e: print(f"[rag.py] Error reading guides: {e}") return None, [] query_text_lower = query_text.lower() query_words = set(re.findall(r"\w+", query_text_lower)) matches = [] # Pre-defined keyword map for high relevance scores keywords_map = { "section 1": ["bleed", "wound", "cut", "blood", "bandage", "tourniquet", "injury"], "section 2": ["cold", "hypothermia", "freeze", "frostbite", "shiver", "rewarm"], "section 3": ["heat", "exhaustion", "dehydration", "stroke", "hot", "sunstroke", "sweat"], "section 4": ["altitude", "ams", "hape", "hace", "headache", "dizzy", "mountain sickness", "nausea", "pulmonary", "cerebral"], "section 5": ["sprain", "fracture", "break", "splint", "ankle", "joint", "bone", "rice", "strain"] } for guide in guides: score = 0 section = guide.get("section", "") text = guide.get("text", "") section_lower = section.lower() # 1. Map based matching for key, words in keywords_map.items(): if key in section_lower: for w in words: if w in query_text_lower: score += 3 # 2. General overlap matching combined_text = (section + " " + text).lower() for word in query_words: if len(word) > 2 and word in combined_text: score += 1 if score > 0: matches.append((score, guide)) # Sort matches by score descending matches.sort(key=lambda x: x[0], reverse=True) if not matches: return None, [] grounding_parts = [] sources = [] # Take top matching guide to ground the model response for idx, (score, guide) in enumerate(matches[:1]): sec = guide["section"] txt = guide["text"] grounding_parts.append( f"### {sec}\n" f"{txt}\n" ) sources.append(sec) grounding_text = "\n---\n".join(grounding_parts) return grounding_text, sources