Spaces:
Sleeping
Sleeping
File size: 949 Bytes
00ff675 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 | import re
VERB_SPLITS = [
"write", "explain", "implement", "design",
"define", "describe", "reverse", "asked",
"questions", "discussion"
]
STOP_PHRASES = [
"round", "mostly", "started with", "ended with",
"concluded", "experience", "interview"
]
def extract_atomic_units(chunks):
atomic = []
for chunk in chunks:
# skip obvious junk
if any(p in chunk for p in STOP_PHRASES):
continue
# split on commas
parts = [p.strip() for p in chunk.split(",")]
for part in parts:
# split further on verbs
exploded = [part]
for v in VERB_SPLITS:
temp = []
for e in exploded:
temp.extend(e.split(v))
exploded = temp
for e in exploded:
e = e.strip()
if len(e.split()) >= 3:
atomic.append(e)
return atomic
|