Spaces:
Sleeping
Sleeping
Update pipeline/script_engine.py
Browse files- pipeline/script_engine.py +32 -23
pipeline/script_engine.py
CHANGED
|
@@ -110,37 +110,46 @@ class ScriptEngine:
|
|
| 110 |
|
| 111 |
# ------------------------------------------------------------------
|
| 112 |
def _parse(self, raw: str) -> dict:
|
| 113 |
-
"""
|
| 114 |
-
#
|
| 115 |
-
# SmolLM often repeats the prompt or adds 'Assistant:' prefix
|
| 116 |
if "Assistant:" in raw:
|
| 117 |
raw = raw.split("Assistant:")[-1]
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
|
| 139 |
def _validate(self, data: dict):
|
| 140 |
required = {"title", "scenes"}
|
| 141 |
missing = required - data.keys()
|
| 142 |
if missing:
|
| 143 |
raise ValueError(f"Missing essential fields: {missing}")
|
|
|
|
|
|
|
| 144 |
|
| 145 |
def _rule_based_fallback(self, story: str, style: str, duration: int) -> dict:
|
| 146 |
"""Minimal offline scene splitter — splits story into sentences."""
|
|
|
|
| 110 |
|
| 111 |
# ------------------------------------------------------------------
|
| 112 |
def _parse(self, raw: str) -> dict:
|
| 113 |
+
"""Deep search for the largest valid JSON object in a noisy string."""
|
| 114 |
+
# 1. Basic cleanup
|
|
|
|
| 115 |
if "Assistant:" in raw:
|
| 116 |
raw = raw.split("Assistant:")[-1]
|
| 117 |
+
|
| 118 |
+
# 2. Iterative search for valid JSON blocks
|
| 119 |
+
# We try to find { and } and shrink the window until it parses
|
| 120 |
+
best_data = None
|
| 121 |
+
|
| 122 |
+
# Find all '{' indices
|
| 123 |
+
open_braces = [i for i, char in enumerate(raw) if char == '{']
|
| 124 |
+
# Find all '}' indices (reversed to try largest first)
|
| 125 |
+
close_braces = [i for i, char in enumerate(raw) if char == '}'][::-1]
|
| 126 |
+
|
| 127 |
+
for start in open_braces:
|
| 128 |
+
for end in close_braces:
|
| 129 |
+
if end < start:
|
| 130 |
+
continue
|
| 131 |
+
try:
|
| 132 |
+
candidate = raw[start : end + 1]
|
| 133 |
+
# Simple cleanup for MD blocks
|
| 134 |
+
candidate = re.sub(r"```json\s*", "", candidate)
|
| 135 |
+
candidate = candidate.replace("```", "").strip()
|
| 136 |
+
|
| 137 |
+
data = json.loads(candidate)
|
| 138 |
+
if isinstance(data, dict) and "scenes" in data:
|
| 139 |
+
self._validate(data)
|
| 140 |
+
return data
|
| 141 |
+
except Exception:
|
| 142 |
+
continue
|
| 143 |
+
|
| 144 |
+
raise ValueError("Could not extract a valid screenplay JSON from LLM output.")
|
| 145 |
|
| 146 |
def _validate(self, data: dict):
|
| 147 |
required = {"title", "scenes"}
|
| 148 |
missing = required - data.keys()
|
| 149 |
if missing:
|
| 150 |
raise ValueError(f"Missing essential fields: {missing}")
|
| 151 |
+
if not data["scenes"]:
|
| 152 |
+
raise ValueError("Screenplay scenes list is empty")
|
| 153 |
|
| 154 |
def _rule_based_fallback(self, story: str, style: str, duration: int) -> dict:
|
| 155 |
"""Minimal offline scene splitter — splits story into sentences."""
|