Spaces:
Runtime error
Runtime error
File size: 1,106 Bytes
d895ad6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | from bs4 import BeautifulSoup
import json
def parse_scene_document(html_content):
soup = BeautifulSoup(html_content, 'html.parser')
scene_header = soup.find('h3').text
scene_number = scene_header.split(': ')[1]
# Extract synopsis
synopsis = soup.find('p').text.replace('Synopsis:', '').strip()
# Extract frames from table
frames = []
table = soup.find('table')
if table:
rows = table.find_all('tr')[1:] # Skip header row
for row in rows:
cells = row.find_all('td')
frame = {
'frame_num': cells[0].text.strip(),
'description': cells[1].text.strip(),
'characters': eval(cells[2].text.strip()), # Convert string list to actual list
'narration': cells[3].text.strip(),
'location': cells[4].text.strip(),
'setting': cells[5].text.strip()
}
print(frame)
frames.append(frame)
return {
'scene_number': scene_number,
'synopsis': synopsis,
'frames': frames
} |