from bs4 import BeautifulSoup import json def parse_scene_document(html_content): soup = BeautifulSoup(html_content, 'html.parser') scene_header = soup.find('h3').text scene_number = scene_header.split(': ')[1] # Extract synopsis synopsis = soup.find('p').text.replace('Synopsis:', '').strip() # Extract frames from table frames = [] table = soup.find('table') if table: rows = table.find_all('tr')[1:] # Skip header row for row in rows: cells = row.find_all('td') frame = { 'frame_num': cells[0].text.strip(), 'description': cells[1].text.strip(), 'characters': eval(cells[2].text.strip()), # Convert string list to actual list 'narration': cells[3].text.strip(), 'location': cells[4].text.strip(), 'setting': cells[5].text.strip() } print(frame) frames.append(frame) return { 'scene_number': scene_number, 'synopsis': synopsis, 'frames': frames }