File size: 428 Bytes
709c859
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from bs4 import BeautifulSoup


def parse_lesson(html):
soup = BeautifulSoup(html, "lxml")


lesson_title = soup.find("h1").get_text(strip=True)
sections = []


for sec in soup.find_all(["section", "article"]):
header = sec.find(["h2", "h3", "h4"])
text = sec.get_text("\n", strip=True)


if text:
sections.append({
"heading": header.get_text(strip=True) if header else "General",
"text": text
})


return lesson_title, sections