Spaces:
Sleeping
Sleeping
File size: 1,259 Bytes
1505bbf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 | import re
def chunk_text(text, source, chunk_size=120):
sentences = re.split(r'(?<=[.!?])\s+', text)
chunks = []
current = []
length = 0
for s in sentences:
s = s.strip()
# remove exam noise
if any(x in s for x in [
"APRIL/MAY",
"CO1",
"Marks",
"Bloom",
"Unit",
"Semester"
]):
continue
words = s.split()
if len(words) < 5:
continue
if length + len(words) > chunk_size:
chunks.append({
"source": source,
"text": " ".join(current)
})
current = []
length = 0
current.append(s)
length += len(words)
if current:
chunks.append({
"source": source,
"text": " ".join(current)
})
return chunks
def compress_context(text, question):
sentences = text.split(". ")
keywords = question.lower().split()
scored = []
for s in sentences:
score = sum(1 for k in keywords if k in s.lower())
scored.append((score, s))
scored.sort(reverse=True)
top = [s for _, s in scored[:3]]
return ". ".join(top) |