Spaces:
Sleeping
Sleeping
| from bs4 import BeautifulSoup | |
| def parse_lesson(html): | |
| soup = BeautifulSoup(html, "lxml") | |
| lesson_title = soup.find("h1").get_text(strip=True) | |
| sections = [] | |
| for sec in soup.find_all(["section", "article"]): | |
| header = sec.find(["h2", "h3", "h4"]) | |
| text = sec.get_text("\n", strip=True) | |
| if text: | |
| sections.append({ | |
| "heading": header.get_text(strip=True) if header else "General", | |
| "text": text | |
| }) | |
| return lesson_title, sections |