Spaces:
Sleeping
Sleeping
| import re | |
| def delete_special(pre_text, character_list): | |
| for c in character_list: | |
| pre_text = pre_text.replace(c, "") | |
| return pre_text | |
| def break_down2scenes(text: str): | |
| # Split the text based on the 's#' pattern | |
| scenes = re.split(r'(s#\d+)', text) | |
| # Remove empty elements from the split results | |
| scenes = [scene for scene in scenes if scene.strip()] | |
| scenes_list = [] | |
| current_scene_number = None | |
| for i in range(0, len(scenes), 2): # Process the 's#' marker and its corresponding text as pairs | |
| scene_marker = scenes[i].strip() | |
| try: | |
| scene_number = int(scene_marker.split('#')[1]) # Extract the number part | |
| except: | |
| if len(scenes) % 2 == 1: | |
| return [scenes[0]] | |
| import ipdb;ipdb.set_trace(context=10) | |
| scene_text = scenes[i+1].strip() if i+1 < len(scenes) else "" | |
| # Check if the scene numbers are in the correct sequence | |
| if current_scene_number is not None: | |
| expected_scene_number = current_scene_number + 1 | |
| if scene_number != expected_scene_number: | |
| raise ValueError(f"Unexpected scene number: {scene_number}, expected {expected_scene_number}") | |
| # Store the scene number and its corresponding text together | |
| scenes_list.append({ | |
| 'detected_scene_number': scene_number, | |
| 'text': f"{scene_marker}\n{scene_text}".strip() | |
| }) | |
| filtered_scene_list = [] | |
| scene_number = 0 | |
| for scene_dict in scenes_list: | |
| detected_scene_number = int(scene_dict['detected_scene_number']) | |
| filtered_scene_list.append(scene_dict['text']) | |
| scene_number = detected_scene_number | |
| return filtered_scene_list | |