import re def chunk_text(text, source, chunk_size=120): sentences = re.split(r'(?<=[.!?])\s+', text) chunks = [] current = [] length = 0 for s in sentences: s = s.strip() # remove exam noise if any(x in s for x in [ "APRIL/MAY", "CO1", "Marks", "Bloom", "Unit", "Semester" ]): continue words = s.split() if len(words) < 5: continue if length + len(words) > chunk_size: chunks.append({ "source": source, "text": " ".join(current) }) current = [] length = 0 current.append(s) length += len(words) if current: chunks.append({ "source": source, "text": " ".join(current) }) return chunks def compress_context(text, question): sentences = text.split(". ") keywords = question.lower().split() scored = [] for s in sentences: score = sum(1 for k in keywords if k in s.lower()) scored.append((score, s)) scored.sort(reverse=True) top = [s for _, s in scored[:3]] return ". ".join(top)