Ansnaeem commited on
Commit
041a66a
·
verified ·
1 Parent(s): 020c59d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -55
app.py CHANGED
@@ -12,72 +12,68 @@ SYSTEM_PROMPT = """You are 'ScriptForge AI', a professional YouTube Script Write
12
  Your goal is to write highly engaging scripts in the 2nd person (using 'You', 'Your').
13
 
14
  FORMATTING RULES (STRICT):
15
- 1. ONLY output content within [SCENE DESCRIPTION] and [SCRIPT] tags.
16
- 2. NO introduction text, NO "Here is your script", and NO titles outside the tags.
17
- 3. [SCENE DESCRIPTION] is for visuals.
18
- 4. [SCRIPT] is ONLY for the spoken dialogue. Do NOT include headings like "Hook:", "Intro:", or "CTA:" inside the [SCRIPT] tag.
19
- 5. Talk directly to the audience.
 
 
 
 
20
  """
21
 
22
  def parse_script(full_text):
23
- # Use regular expression to find all instances of the tags
24
- # This pattern matches [SCRIPT] or [SCENE DESCRIPTION] with optional brackets and colons
25
- tag_pattern = r'(\[?(?:SCRIPT|SCENE DESCRIPTION)\]?:?)'
26
 
27
- # Split the text by the tags
28
- parts = re.split(tag_pattern, full_text, flags=re.IGNORECASE)
29
 
30
- clean_script = []
31
- clean_scenes = []
32
 
33
  current_tag = None
34
 
35
  for part in parts:
36
- lower_part = part.lower().strip()
37
-
38
- # Check if the current part is a tag
39
- if "script" in lower_part and ("[" in lower_part or "script" == lower_part.strip("[]:")):
40
- current_tag = "SCRIPT"
41
- elif "scene" in lower_part and ("[" in lower_part or "scene" in lower_part.strip("[]:")):
42
- current_tag = "SCENE"
43
- elif current_tag:
44
- # This is the content following a tag
45
- content = part.strip()
46
- if not content:
47
- continue
48
-
49
- if current_tag == "SCRIPT":
50
- # Agressive cleaning for TTS:
51
- # 1. Remove markdown headers
52
- content = re.sub(r'^#+.*$', '', content, flags=re.MULTILINE)
53
- # 2. Remove common metadata labels at the start of lines
54
- content = re.sub(r'^(Hook|Intro|Body|CTA|Conclusion|Outro|Segment|Step \d+):\s*', '', content, flags=re.IGNORECASE | re.MULTILINE)
55
- # 3. Remove text in parentheses (visual cues/directions)
56
- content = re.sub(r'\(.*?\)', '', content, flags=re.DOTALL)
57
- # 4. Remove brackets like [Upbeat Music] that might be inside a script tag
58
- content = re.sub(r'\[.*?\]', '', content, flags=re.DOTALL)
59
- # 5. Remove bolding/formatting
60
- content = content.replace("**", "").replace("*", "").replace("__", "")
61
 
62
- final_content = content.strip()
63
- if final_content:
64
- clean_script.append(final_content)
65
 
66
- elif current_tag == "SCENE":
67
- clean_scenes.append(content)
68
-
69
- # Join the cleaned parts
70
- script_text = "\n\n".join(clean_script)
71
- scenes_text = "\n\n".join(clean_scenes)
72
-
73
- # Final cleanup of the script text to ensure no lingering "ION]" or empty lines
74
- script_text = re.sub(r'^ION\]\s*', '', script_text, flags=re.IGNORECASE | re.MULTILINE)
75
-
76
- # Fallback: if no script was found, use the clean version of the full text
77
- if not script_text and full_text:
78
- script_text = re.sub(r'\[.*?\]', '', full_text, flags=re.DOTALL).strip()
79
-
80
- return script_text, scenes_text
81
 
82
  def save_to_file(script_text):
83
  if not script_text:
 
12
  Your goal is to write highly engaging scripts in the 2nd person (using 'You', 'Your').
13
 
14
  FORMATTING RULES (STRICT):
15
+ 1. Use ONLY these two tags: [VISUAL] for scenes, and [AUDIO] for spoken words.
16
+ 2. [VISUAL]: Describe the visuals, camera shots, or on-screen text.
17
+ 3. [AUDIO]: Write ONLY the spoken words. No actor directions, no "Host:", no markdown headers.
18
+ 4. Do not output any intro text. Start directly with a [VISUAL] or [AUDIO] tag.
19
+ 5. Example:
20
+ [VISUAL]
21
+ Wide shot of a clear blue sky.
22
+ [AUDIO]
23
+ Today is going to be amazing.
24
  """
25
 
26
  def parse_script(full_text):
27
+ # Normalize tags just in case
28
+ full_text = re.sub(r'\[?(?:SCENE DESCRIPTION|SCENE|VISUALS)\]?:?', '[VISUAL]', full_text, flags=re.IGNORECASE)
29
+ full_text = re.sub(r'\[?(?:SCRIPT|NARRATION|AUDIO)\]?:?', '[AUDIO]', full_text, flags=re.IGNORECASE)
30
 
31
+ # Split by tags
32
+ parts = re.split(r'(\[(?:VISUAL|AUDIO)\])', full_text, flags=re.IGNORECASE)
33
 
34
+ clean_audio = []
35
+ clean_visuals = []
36
 
37
  current_tag = None
38
 
39
  for part in parts:
40
+ part = part.strip()
41
+ if not part:
42
+ continue
43
+
44
+ if part.upper() == "[AUDIO]":
45
+ current_tag = "AUDIO"
46
+ elif part.upper() == "[VISUAL]":
47
+ current_tag = "VISUAL"
48
+ elif current_tag == "AUDIO":
49
+ # Clean Audio
50
+ # Remove parenthetical notes like (whispering)
51
+ content = re.sub(r'\(.*?\)', '', part, flags=re.DOTALL)
52
+ # Remove headers
53
+ content = re.sub(r'^#+.*$', '', content, flags=re.MULTILINE)
54
+ # Remove labels like "Host:"
55
+ content = re.sub(r'^\w+:\s*', '', content, flags=re.MULTILINE)
56
+ # Remove formatting
57
+ content = content.replace("**", "").replace("*", "")
58
+
59
+ if content.strip():
60
+ clean_audio.append(content.strip())
 
 
 
 
61
 
62
+ elif current_tag == "VISUAL":
63
+ clean_visuals.append(part.strip())
 
64
 
65
+ # Fallback: If no tags found, attempt heuristic split
66
+ if not clean_audio and not clean_visuals:
67
+ lines = full_text.split('\n')
68
+ for line in lines:
69
+ line = line.strip()
70
+ if not line: continue
71
+ if line.startswith('(') or line.startswith('[') or "EXT." in line or "INT." in line:
72
+ clean_visuals.append(line)
73
+ else:
74
+ clean_audio.append(line)
75
+
76
+ return "\n\n".join(clean_audio), "\n\n".join(clean_visuals)
 
 
 
77
 
78
  def save_to_file(script_text):
79
  if not script_text: