Spaces:

babaTEEpe
/

video_contents_generator

Sleeping

App Files Files Community

babaTEEpe commited on Mar 1

Commit

7a62ee7

verified ·

1 Parent(s): 788e480

Update pipeline/script_engine.py

Browse files

Files changed (1) hide show

pipeline/script_engine.py +32 -23

pipeline/script_engine.py CHANGED Viewed

@@ -110,37 +110,46 @@ class ScriptEngine:
     # ------------------------------------------------------------------
     def _parse(self, raw: str) -> dict:
-        """Robust JSON extraction from LLM response."""
-        # Clean up common debris
-        # SmolLM often repeats the prompt or adds 'Assistant:' prefix
         if "Assistant:" in raw:
             raw = raw.split("Assistant:")[-1]
-        try:
-            # Find the first { and last }
-            start = raw.find("{")
-            end = raw.rfind("}")
-            if start == -1 or end == -1:
-                raise ValueError("No valid JSON found in response.")
-            json_str = raw[start : end + 1]
-            # Remove any markdown code block markers
-            json_str = re.sub(r"```json\s*", "", json_str)
-            json_str = json_str.replace("```", "").strip()
-            data = json.loads(json_str)
-            self._validate(data)
-            return data
-        except Exception as e:
-            print(f"❌ JSON Extraction failed: {e}")
-            raise e
     def _validate(self, data: dict):
         required = {"title", "scenes"}
         missing = required - data.keys()
         if missing:
             raise ValueError(f"Missing essential fields: {missing}")
     def _rule_based_fallback(self, story: str, style: str, duration: int) -> dict:
         """Minimal offline scene splitter — splits story into sentences."""

     # ------------------------------------------------------------------
     def _parse(self, raw: str) -> dict:
+        """Deep search for the largest valid JSON object in a noisy string."""
+        # 1. Basic cleanup
         if "Assistant:" in raw:
             raw = raw.split("Assistant:")[-1]
+        # 2. Iterative search for valid JSON blocks
+        # We try to find { and } and shrink the window until it parses
+        best_data = None
+        # Find all '{' indices
+        open_braces = [i for i, char in enumerate(raw) if char == '{']
+        # Find all '}' indices (reversed to try largest first)
+        close_braces = [i for i, char in enumerate(raw) if char == '}'][::-1]
+        for start in open_braces:
+            for end in close_braces:
+                if end < start:
+                    continue
+                try:
+                    candidate = raw[start : end + 1]
+                    # Simple cleanup for MD blocks
+                    candidate = re.sub(r"```json\s*", "", candidate)
+                    candidate = candidate.replace("```", "").strip()
+                    data = json.loads(candidate)
+                    if isinstance(data, dict) and "scenes" in data:
+                        self._validate(data)
+                        return data
+                except Exception:
+                    continue
+        raise ValueError("Could not extract a valid screenplay JSON from LLM output.")
     def _validate(self, data: dict):
         required = {"title", "scenes"}
         missing = required - data.keys()
         if missing:
             raise ValueError(f"Missing essential fields: {missing}")
+        if not data["scenes"]:
+             raise ValueError("Screenplay scenes list is empty")
     def _rule_based_fallback(self, story: str, style: str, duration: int) -> dict:
         """Minimal offline scene splitter — splits story into sentences."""