Spaces:

singhalamaan116
/

EcoEval-LLM

Sleeping

App Files Files Community

singhalamaan116 commited on 13 days ago

Commit

cb2d7b5

verified ·

1 Parent(s): a06dde5

Update ecoeval/core.py

Browse files

Files changed (1) hide show

ecoeval/core.py +38 -9

ecoeval/core.py CHANGED Viewed

@@ -86,8 +86,11 @@ def _extract_code(generated: str) -> str:
     Clean raw model output into executable Python:
     - Keep from the first 'def ' onwards when possible.
-    - Strip leading docstrings.
-    - Drop lines that are clearly meta-text (Input:, Output:, >>>, etc.).
     """
     text = generated.strip()
@@ -109,28 +112,54 @@ def _extract_code(generated: str) -> str:
         "Output Format:",
         "Python 3:",
         "The function ",
         "The above code",
         "The following code",
         "- ",  # bullet lists like "- Write a function ..."
     )
-    cleaned_lines: List[str] = []
-    for line in text.splitlines():
         stripped = line.strip()
         if not stripped:
-            cleaned_lines.append("")  # keep blank lines for indentation blocks
             continue
         if any(stripped.startswith(bp) for bp in bad_prefixes):
             continue
         if stripped.startswith("```"):
             continue
-        cleaned_lines.append(line)
-    cleaned = "\n".join(cleaned_lines).strip()
-    return cleaned
 # ---------- Generation + execution ----------

     Clean raw model output into executable Python:
     - Keep from the first 'def ' onwards when possible.
+    - Remove triple-quoted docstrings.
+    - Drop obvious natural-language lines.
+    - Stop at top-level 'if __name__ == "__main__"' or other
+      top-level control-flow scaffolding that often causes
+      indentation errors.
     """
     text = generated.strip()
         "Output Format:",
         "Python 3:",
         "The function ",
+        "The first line ",
         "The above code",
         "The following code",
         "- ",  # bullet lists like "- Write a function ..."
     )
+    lines = text.splitlines()
+    cleaned: List[str] = []
+    in_docstring = False
+    for line in lines:
         stripped = line.strip()
+        # Track and drop any triple-quoted docstring blocks anywhere
+        if '"""' in stripped or "'''" in stripped:
+            # toggle docstring state and skip this line
+            in_docstring = not in_docstring
+            continue
+        if in_docstring:
+            continue
         if not stripped:
+            # keep blank lines (can be inside function)
+            cleaned.append("")
             continue
+        # Drop obvious NL/meta text
         if any(stripped.startswith(bp) for bp in bad_prefixes):
             continue
         if stripped.startswith("```"):
             continue
+        # Detect top-level (unindented) scaffolding and stop there
+        is_top_level = (line == stripped)  # no leading spaces/tabs
+        if is_top_level and stripped.startswith("if __name__"):
+            # stop before main-guard
+            break
+        if is_top_level and stripped.startswith(("for ", "while ", "if ", "elif ", "else:", "try:", "except", "with ")):
+            # likely problem-causing scaffold; stop here
+            break
+        cleaned.append(line)
+    code = "\n".join(cleaned).rstrip()
+    return code
 # ---------- Generation + execution ----------