Spaces:

build-small-hackathon
/

TrailHead

Build error

App Files Files Community

TrailHead / src /rag.py

sxandie

feat: Phase 2 - Functional simulation, OSM POI Overpass integration, and first-aid RAG

262624f 20 days ago

Raw

History Blame Contribute Delete

2.76 kB

	import os
	import json
	import re

	DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "data")
	GUIDES_FILE = os.path.join(DATA_DIR, "first_aid_guide.json")

	def retrieve_first_aid(query_text):
	"""
	Search first_aid_guide.json for sections relevant to the query.
	Returns (markdown_grounding_text, source_list) or (None, [])
	"""
	if not os.path.exists(GUIDES_FILE):
	print(f"[rag.py] Guide file not found at {GUIDES_FILE}")
	return None, []

	try:
	with open(GUIDES_FILE, "r", encoding="utf-8") as f:
	guides = json.load(f)
	except Exception as e:
	print(f"[rag.py] Error reading guides: {e}")
	return None, []

	query_text_lower = query_text.lower()
	query_words = set(re.findall(r"\w+", query_text_lower))

	matches = []

	# Pre-defined keyword map for high relevance scores
	keywords_map = {
	"section 1": ["bleed", "wound", "cut", "blood", "bandage", "tourniquet", "injury"],
	"section 2": ["cold", "hypothermia", "freeze", "frostbite", "shiver", "rewarm"],
	"section 3": ["heat", "exhaustion", "dehydration", "stroke", "hot", "sunstroke", "sweat"],
	"section 4": ["altitude", "ams", "hape", "hace", "headache", "dizzy", "mountain sickness", "nausea", "pulmonary", "cerebral"],
	"section 5": ["sprain", "fracture", "break", "splint", "ankle", "joint", "bone", "rice", "strain"]
	}

	for guide in guides:
	score = 0
	section = guide.get("section", "")
	text = guide.get("text", "")
	section_lower = section.lower()

	# 1. Map based matching
	for key, words in keywords_map.items():
	if key in section_lower:
	for w in words:
	if w in query_text_lower:
	score += 3

	# 2. General overlap matching
	combined_text = (section + " " + text).lower()
	for word in query_words:
	if len(word) > 2 and word in combined_text:
	score += 1

	if score > 0:
	matches.append((score, guide))

	# Sort matches by score descending
	matches.sort(key=lambda x: x[0], reverse=True)

	if not matches:
	return None, []

	grounding_parts = []
	sources = []

	# Take top matching guide to ground the model response
	for idx, (score, guide) in enumerate(matches[:1]):
	sec = guide["section"]
	txt = guide["text"]

	grounding_parts.append(
	f"### {sec}\n"
	f"{txt}\n"
	)
	sources.append(sec)

	grounding_text = "\n---\n".join(grounding_parts)
	return grounding_text, sources