Spaces:
Sleeping
Sleeping
File size: 428 Bytes
709c859 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
from bs4 import BeautifulSoup
def parse_lesson(html):
soup = BeautifulSoup(html, "lxml")
lesson_title = soup.find("h1").get_text(strip=True)
sections = []
for sec in soup.find_all(["section", "article"]):
header = sec.find(["h2", "h3", "h4"])
text = sec.get_text("\n", strip=True)
if text:
sections.append({
"heading": header.get_text(strip=True) if header else "General",
"text": text
})
return lesson_title, sections |