babaTEEpe commited on
Commit
7a62ee7
·
verified ·
1 Parent(s): 788e480

Update pipeline/script_engine.py

Browse files
Files changed (1) hide show
  1. pipeline/script_engine.py +32 -23
pipeline/script_engine.py CHANGED
@@ -110,37 +110,46 @@ class ScriptEngine:
110
 
111
  # ------------------------------------------------------------------
112
  def _parse(self, raw: str) -> dict:
113
- """Robust JSON extraction from LLM response."""
114
- # Clean up common debris
115
- # SmolLM often repeats the prompt or adds 'Assistant:' prefix
116
  if "Assistant:" in raw:
117
  raw = raw.split("Assistant:")[-1]
118
-
119
- try:
120
- # Find the first { and last }
121
- start = raw.find("{")
122
- end = raw.rfind("}")
123
- if start == -1 or end == -1:
124
- raise ValueError("No valid JSON found in response.")
125
-
126
- json_str = raw[start : end + 1]
127
-
128
- # Remove any markdown code block markers
129
- json_str = re.sub(r"```json\s*", "", json_str)
130
- json_str = json_str.replace("```", "").strip()
131
-
132
- data = json.loads(json_str)
133
- self._validate(data)
134
- return data
135
- except Exception as e:
136
- print(f" JSON Extraction failed: {e}")
137
- raise e
 
 
 
 
 
 
 
 
138
 
139
  def _validate(self, data: dict):
140
  required = {"title", "scenes"}
141
  missing = required - data.keys()
142
  if missing:
143
  raise ValueError(f"Missing essential fields: {missing}")
 
 
144
 
145
  def _rule_based_fallback(self, story: str, style: str, duration: int) -> dict:
146
  """Minimal offline scene splitter — splits story into sentences."""
 
110
 
111
  # ------------------------------------------------------------------
112
  def _parse(self, raw: str) -> dict:
113
+ """Deep search for the largest valid JSON object in a noisy string."""
114
+ # 1. Basic cleanup
 
115
  if "Assistant:" in raw:
116
  raw = raw.split("Assistant:")[-1]
117
+
118
+ # 2. Iterative search for valid JSON blocks
119
+ # We try to find { and } and shrink the window until it parses
120
+ best_data = None
121
+
122
+ # Find all '{' indices
123
+ open_braces = [i for i, char in enumerate(raw) if char == '{']
124
+ # Find all '}' indices (reversed to try largest first)
125
+ close_braces = [i for i, char in enumerate(raw) if char == '}'][::-1]
126
+
127
+ for start in open_braces:
128
+ for end in close_braces:
129
+ if end < start:
130
+ continue
131
+ try:
132
+ candidate = raw[start : end + 1]
133
+ # Simple cleanup for MD blocks
134
+ candidate = re.sub(r"```json\s*", "", candidate)
135
+ candidate = candidate.replace("```", "").strip()
136
+
137
+ data = json.loads(candidate)
138
+ if isinstance(data, dict) and "scenes" in data:
139
+ self._validate(data)
140
+ return data
141
+ except Exception:
142
+ continue
143
+
144
+ raise ValueError("Could not extract a valid screenplay JSON from LLM output.")
145
 
146
  def _validate(self, data: dict):
147
  required = {"title", "scenes"}
148
  missing = required - data.keys()
149
  if missing:
150
  raise ValueError(f"Missing essential fields: {missing}")
151
+ if not data["scenes"]:
152
+ raise ValueError("Screenplay scenes list is empty")
153
 
154
  def _rule_based_fallback(self, story: str, style: str, duration: int) -> dict:
155
  """Minimal offline scene splitter — splits story into sentences."""