Ansnaeem commited on
Commit
020c59d
·
verified ·
1 Parent(s): c4d4bda

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -22
app.py CHANGED
@@ -20,32 +20,64 @@ FORMATTING RULES (STRICT):
20
  """
21
 
22
  def parse_script(full_text):
23
- # 1. Extract [SCRIPT] segments
24
- script_parts = re.findall(r'\[?SCRIPT\]?:?\s*(.*?)(?=\[?SCENE DESCRIPTION\]?|$)', full_text, re.DOTALL | re.IGNORECASE)
 
25
 
26
- cleaned_dialogue = []
27
- for part in script_parts:
28
- # Remove markdown headers (e.g., ### Hook)
29
- clean = re.sub(r'^#+.*$', '', part, flags=re.MULTILINE)
30
- # Remove bolding/italics
31
- clean = clean.replace("**", "").replace("*", "").replace("__", "")
32
- # Remove common metadata words/colons at start of lines (e.g., "Hook:", "Intro:")
33
- clean = re.sub(r'^(Hook|Intro|Body|CTA|Conclusion|Outro):\s*', '', clean, flags=re.IGNORECASE | re.MULTILINE).strip()
34
- if clean:
35
- cleaned_dialogue.append(clean)
36
-
37
- clean_script = "\n\n".join(cleaned_dialogue)
38
 
39
- # 2. Extract [SCENE DESCRIPTION] segments
40
- scene_parts = re.findall(r'\[?SCENE DESCRIPTION\]?:?\s*(.*?)(?=\[?SCRIPT\]?|$)', full_text, re.DOTALL | re.IGNORECASE)
41
- clean_scenes = "\n".join([p.strip().replace("**", "") for p in scene_parts if p.strip()])
42
 
43
- # Fallback
44
- if not clean_script and full_text:
45
- # If no tags, try to strip common AI intro fluff
46
- clean_script = re.sub(r'^(Here is|Sure|Okay|I can help|Youtube Script).*?$', '', full_text, flags=re.IGNORECASE | re.MULTILINE).strip()
47
 
48
- return clean_script, clean_scenes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  def save_to_file(script_text):
51
  if not script_text:
 
20
  """
21
 
22
  def parse_script(full_text):
23
+ # Use regular expression to find all instances of the tags
24
+ # This pattern matches [SCRIPT] or [SCENE DESCRIPTION] with optional brackets and colons
25
+ tag_pattern = r'(\[?(?:SCRIPT|SCENE DESCRIPTION)\]?:?)'
26
 
27
+ # Split the text by the tags
28
+ parts = re.split(tag_pattern, full_text, flags=re.IGNORECASE)
29
+
30
+ clean_script = []
31
+ clean_scenes = []
 
 
 
 
 
 
 
32
 
33
+ current_tag = None
 
 
34
 
35
+ for part in parts:
36
+ lower_part = part.lower().strip()
 
 
37
 
38
+ # Check if the current part is a tag
39
+ if "script" in lower_part and ("[" in lower_part or "script" == lower_part.strip("[]:")):
40
+ current_tag = "SCRIPT"
41
+ elif "scene" in lower_part and ("[" in lower_part or "scene" in lower_part.strip("[]:")):
42
+ current_tag = "SCENE"
43
+ elif current_tag:
44
+ # This is the content following a tag
45
+ content = part.strip()
46
+ if not content:
47
+ continue
48
+
49
+ if current_tag == "SCRIPT":
50
+ # Agressive cleaning for TTS:
51
+ # 1. Remove markdown headers
52
+ content = re.sub(r'^#+.*$', '', content, flags=re.MULTILINE)
53
+ # 2. Remove common metadata labels at the start of lines
54
+ content = re.sub(r'^(Hook|Intro|Body|CTA|Conclusion|Outro|Segment|Step \d+):\s*', '', content, flags=re.IGNORECASE | re.MULTILINE)
55
+ # 3. Remove text in parentheses (visual cues/directions)
56
+ content = re.sub(r'\(.*?\)', '', content, flags=re.DOTALL)
57
+ # 4. Remove brackets like [Upbeat Music] that might be inside a script tag
58
+ content = re.sub(r'\[.*?\]', '', content, flags=re.DOTALL)
59
+ # 5. Remove bolding/formatting
60
+ content = content.replace("**", "").replace("*", "").replace("__", "")
61
+
62
+ final_content = content.strip()
63
+ if final_content:
64
+ clean_script.append(final_content)
65
+
66
+ elif current_tag == "SCENE":
67
+ clean_scenes.append(content)
68
+
69
+ # Join the cleaned parts
70
+ script_text = "\n\n".join(clean_script)
71
+ scenes_text = "\n\n".join(clean_scenes)
72
+
73
+ # Final cleanup of the script text to ensure no lingering "ION]" or empty lines
74
+ script_text = re.sub(r'^ION\]\s*', '', script_text, flags=re.IGNORECASE | re.MULTILINE)
75
+
76
+ # Fallback: if no script was found, use the clean version of the full text
77
+ if not script_text and full_text:
78
+ script_text = re.sub(r'\[.*?\]', '', full_text, flags=re.DOTALL).strip()
79
+
80
+ return script_text, scenes_text
81
 
82
  def save_to_file(script_text):
83
  if not script_text: