from bs4 import BeautifulSoup def parse_lesson(html): soup = BeautifulSoup(html, "lxml") lesson_title = soup.find("h1").get_text(strip=True) sections = [] for sec in soup.find_all(["section", "article"]): header = sec.find(["h2", "h3", "h4"]) text = sec.get_text("\n", strip=True) if text: sections.append({ "heading": header.get_text(strip=True) if header else "General", "text": text }) return lesson_title, sections