singhalamaan116 commited on
Commit
cb2d7b5
·
verified ·
1 Parent(s): a06dde5

Update ecoeval/core.py

Browse files
Files changed (1) hide show
  1. ecoeval/core.py +38 -9
ecoeval/core.py CHANGED
@@ -86,8 +86,11 @@ def _extract_code(generated: str) -> str:
86
  Clean raw model output into executable Python:
87
 
88
  - Keep from the first 'def ' onwards when possible.
89
- - Strip leading docstrings.
90
- - Drop lines that are clearly meta-text (Input:, Output:, >>>, etc.).
 
 
 
91
  """
92
  text = generated.strip()
93
 
@@ -109,28 +112,54 @@ def _extract_code(generated: str) -> str:
109
  "Output Format:",
110
  "Python 3:",
111
  "The function ",
 
112
  "The above code",
113
  "The following code",
114
  "- ", # bullet lists like "- Write a function ..."
115
  )
116
 
117
- cleaned_lines: List[str] = []
118
- for line in text.splitlines():
 
 
 
119
  stripped = line.strip()
 
 
 
 
 
 
 
 
 
120
  if not stripped:
121
- cleaned_lines.append("") # keep blank lines for indentation blocks
 
122
  continue
123
 
 
124
  if any(stripped.startswith(bp) for bp in bad_prefixes):
125
  continue
126
-
127
  if stripped.startswith("```"):
128
  continue
129
 
130
- cleaned_lines.append(line)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- cleaned = "\n".join(cleaned_lines).strip()
133
- return cleaned
134
 
135
 
136
  # ---------- Generation + execution ----------
 
86
  Clean raw model output into executable Python:
87
 
88
  - Keep from the first 'def ' onwards when possible.
89
+ - Remove triple-quoted docstrings.
90
+ - Drop obvious natural-language lines.
91
+ - Stop at top-level 'if __name__ == "__main__"' or other
92
+ top-level control-flow scaffolding that often causes
93
+ indentation errors.
94
  """
95
  text = generated.strip()
96
 
 
112
  "Output Format:",
113
  "Python 3:",
114
  "The function ",
115
+ "The first line ",
116
  "The above code",
117
  "The following code",
118
  "- ", # bullet lists like "- Write a function ..."
119
  )
120
 
121
+ lines = text.splitlines()
122
+ cleaned: List[str] = []
123
+ in_docstring = False
124
+
125
+ for line in lines:
126
  stripped = line.strip()
127
+
128
+ # Track and drop any triple-quoted docstring blocks anywhere
129
+ if '"""' in stripped or "'''" in stripped:
130
+ # toggle docstring state and skip this line
131
+ in_docstring = not in_docstring
132
+ continue
133
+ if in_docstring:
134
+ continue
135
+
136
  if not stripped:
137
+ # keep blank lines (can be inside function)
138
+ cleaned.append("")
139
  continue
140
 
141
+ # Drop obvious NL/meta text
142
  if any(stripped.startswith(bp) for bp in bad_prefixes):
143
  continue
 
144
  if stripped.startswith("```"):
145
  continue
146
 
147
+ # Detect top-level (unindented) scaffolding and stop there
148
+ is_top_level = (line == stripped) # no leading spaces/tabs
149
+
150
+ if is_top_level and stripped.startswith("if __name__"):
151
+ # stop before main-guard
152
+ break
153
+
154
+ if is_top_level and stripped.startswith(("for ", "while ", "if ", "elif ", "else:", "try:", "except", "with ")):
155
+ # likely problem-causing scaffold; stop here
156
+ break
157
+
158
+ cleaned.append(line)
159
+
160
+ code = "\n".join(cleaned).rstrip()
161
+ return code
162
 
 
 
163
 
164
 
165
  # ---------- Generation + execution ----------