Spaces:
Running
Running
Commit Β·
2b9263c
1
Parent(s): d320f70
fixed prompt removal bug
Browse files
app.py
CHANGED
|
@@ -73,14 +73,6 @@ def strip_prompt_from_output(output, prompt):
|
|
| 73 |
result = output_stripped[len(prompt_stripped):].strip()
|
| 74 |
return result
|
| 75 |
|
| 76 |
-
# If exact match didn't work, try finding where prompt ends in output
|
| 77 |
-
# This handles cases where there might be formatting differences
|
| 78 |
-
prompt_words = prompt_stripped.split()
|
| 79 |
-
if prompt_words and output_stripped.split()[:len(prompt_words)] == prompt_words:
|
| 80 |
-
# Remove matching words at the beginning
|
| 81 |
-
result = ' '.join(output_stripped.split()[len(prompt_words):])
|
| 82 |
-
return result
|
| 83 |
-
|
| 84 |
return output
|
| 85 |
|
| 86 |
|
|
@@ -221,18 +213,18 @@ def evaluate_submission(file_obj, debug=False):
|
|
| 221 |
)
|
| 222 |
# Remove prompt from output to only validate generated text
|
| 223 |
cleaned_output = strip_prompt_from_output(output, prompt)
|
| 224 |
-
assistant_response = extract_assistant_response(cleaned_output)
|
| 225 |
|
| 226 |
-
print(f"Response: {
|
| 227 |
|
| 228 |
-
passed = 'e' not in
|
| 229 |
if passed:
|
| 230 |
ex1_passed += 1
|
| 231 |
-
ex1_outputs.append({"prompt": prompt, "output":
|
| 232 |
if debug:
|
| 233 |
print(f"Ex1 Test {i+1}: {'β' if passed else 'β'}")
|
| 234 |
print(f" Prompt: {prompt}")
|
| 235 |
-
print(f" Output: {
|
| 236 |
print()
|
| 237 |
except TimeoutException:
|
| 238 |
ex1_timeout = True
|
|
@@ -273,18 +265,18 @@ def evaluate_submission(file_obj, debug=False):
|
|
| 273 |
)
|
| 274 |
# Remove prompt from output to only validate generated text
|
| 275 |
cleaned_output = strip_prompt_from_output(output, prompt)
|
| 276 |
-
assistant_response = extract_assistant_response(cleaned_output)
|
| 277 |
|
| 278 |
-
print(f"Response: {
|
| 279 |
|
| 280 |
-
passed = "toulouse" not in
|
| 281 |
if passed:
|
| 282 |
ex2_passed += 1
|
| 283 |
-
ex2_outputs.append({"prompt": prompt, "output":
|
| 284 |
if debug:
|
| 285 |
print(f"Ex2 Test {i+1}: {'β' if passed else 'β'}")
|
| 286 |
print(f" Prompt: {prompt}")
|
| 287 |
-
print(f" Output: {
|
| 288 |
print()
|
| 289 |
except TimeoutException:
|
| 290 |
ex2_timeout = True
|
|
|
|
| 73 |
result = output_stripped[len(prompt_stripped):].strip()
|
| 74 |
return result
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
return output
|
| 77 |
|
| 78 |
|
|
|
|
| 213 |
)
|
| 214 |
# Remove prompt from output to only validate generated text
|
| 215 |
cleaned_output = strip_prompt_from_output(output, prompt)
|
| 216 |
+
# assistant_response = extract_assistant_response(cleaned_output)
|
| 217 |
|
| 218 |
+
print(f"Response: {cleaned_output}")
|
| 219 |
|
| 220 |
+
passed = 'e' not in cleaned_output.lower() and len(cleaned_output.strip()) > 3
|
| 221 |
if passed:
|
| 222 |
ex1_passed += 1
|
| 223 |
+
ex1_outputs.append({"prompt": prompt, "output": cleaned_output, "passed": passed})
|
| 224 |
if debug:
|
| 225 |
print(f"Ex1 Test {i+1}: {'β' if passed else 'β'}")
|
| 226 |
print(f" Prompt: {prompt}")
|
| 227 |
+
print(f" Output: {output}")
|
| 228 |
print()
|
| 229 |
except TimeoutException:
|
| 230 |
ex1_timeout = True
|
|
|
|
| 265 |
)
|
| 266 |
# Remove prompt from output to only validate generated text
|
| 267 |
cleaned_output = strip_prompt_from_output(output, prompt)
|
| 268 |
+
# assistant_response = extract_assistant_response(cleaned_output)
|
| 269 |
|
| 270 |
+
print(f"Response: {cleaned_output}")
|
| 271 |
|
| 272 |
+
passed = "toulouse" not in cleaned_output.lower() and len(cleaned_output.strip()) > 3
|
| 273 |
if passed:
|
| 274 |
ex2_passed += 1
|
| 275 |
+
ex2_outputs.append({"prompt": prompt, "output": output, "passed": passed})
|
| 276 |
if debug:
|
| 277 |
print(f"Ex2 Test {i+1}: {'β' if passed else 'β'}")
|
| 278 |
print(f" Prompt: {prompt}")
|
| 279 |
+
print(f" Output: {output}")
|
| 280 |
print()
|
| 281 |
except TimeoutException:
|
| 282 |
ex2_timeout = True
|