nathanael-fijalkow commited on
Commit
2b9263c
Β·
1 Parent(s): d320f70

fixed prompt removal bug

Browse files
Files changed (1) hide show
  1. app.py +10 -18
app.py CHANGED
@@ -73,14 +73,6 @@ def strip_prompt_from_output(output, prompt):
73
  result = output_stripped[len(prompt_stripped):].strip()
74
  return result
75
 
76
- # If exact match didn't work, try finding where prompt ends in output
77
- # This handles cases where there might be formatting differences
78
- prompt_words = prompt_stripped.split()
79
- if prompt_words and output_stripped.split()[:len(prompt_words)] == prompt_words:
80
- # Remove matching words at the beginning
81
- result = ' '.join(output_stripped.split()[len(prompt_words):])
82
- return result
83
-
84
  return output
85
 
86
 
@@ -221,18 +213,18 @@ def evaluate_submission(file_obj, debug=False):
221
  )
222
  # Remove prompt from output to only validate generated text
223
  cleaned_output = strip_prompt_from_output(output, prompt)
224
- assistant_response = extract_assistant_response(cleaned_output)
225
 
226
- print(f"Response: {assistant_response}")
227
 
228
- passed = 'e' not in assistant_response.lower() and len(assistant_response.strip()) > 10
229
  if passed:
230
  ex1_passed += 1
231
- ex1_outputs.append({"prompt": prompt, "output": assistant_response, "passed": passed})
232
  if debug:
233
  print(f"Ex1 Test {i+1}: {'βœ“' if passed else 'βœ—'}")
234
  print(f" Prompt: {prompt}")
235
- print(f" Output: {assistant_response}")
236
  print()
237
  except TimeoutException:
238
  ex1_timeout = True
@@ -273,18 +265,18 @@ def evaluate_submission(file_obj, debug=False):
273
  )
274
  # Remove prompt from output to only validate generated text
275
  cleaned_output = strip_prompt_from_output(output, prompt)
276
- assistant_response = extract_assistant_response(cleaned_output)
277
 
278
- print(f"Response: {assistant_response}")
279
 
280
- passed = "toulouse" not in assistant_response.lower() and len(assistant_response.strip()) > 10
281
  if passed:
282
  ex2_passed += 1
283
- ex2_outputs.append({"prompt": prompt, "output": assistant_response, "passed": passed})
284
  if debug:
285
  print(f"Ex2 Test {i+1}: {'βœ“' if passed else 'βœ—'}")
286
  print(f" Prompt: {prompt}")
287
- print(f" Output: {assistant_response}")
288
  print()
289
  except TimeoutException:
290
  ex2_timeout = True
 
73
  result = output_stripped[len(prompt_stripped):].strip()
74
  return result
75
 
 
 
 
 
 
 
 
 
76
  return output
77
 
78
 
 
213
  )
214
  # Remove prompt from output to only validate generated text
215
  cleaned_output = strip_prompt_from_output(output, prompt)
216
+ # assistant_response = extract_assistant_response(cleaned_output)
217
 
218
+ print(f"Response: {cleaned_output}")
219
 
220
+ passed = 'e' not in cleaned_output.lower() and len(cleaned_output.strip()) > 3
221
  if passed:
222
  ex1_passed += 1
223
+ ex1_outputs.append({"prompt": prompt, "output": cleaned_output, "passed": passed})
224
  if debug:
225
  print(f"Ex1 Test {i+1}: {'βœ“' if passed else 'βœ—'}")
226
  print(f" Prompt: {prompt}")
227
+ print(f" Output: {output}")
228
  print()
229
  except TimeoutException:
230
  ex1_timeout = True
 
265
  )
266
  # Remove prompt from output to only validate generated text
267
  cleaned_output = strip_prompt_from_output(output, prompt)
268
+ # assistant_response = extract_assistant_response(cleaned_output)
269
 
270
+ print(f"Response: {cleaned_output}")
271
 
272
+ passed = "toulouse" not in cleaned_output.lower() and len(cleaned_output.strip()) > 3
273
  if passed:
274
  ex2_passed += 1
275
+ ex2_outputs.append({"prompt": prompt, "output": output, "passed": passed})
276
  if debug:
277
  print(f"Ex2 Test {i+1}: {'βœ“' if passed else 'βœ—'}")
278
  print(f" Prompt: {prompt}")
279
+ print(f" Output: {output}")
280
  print()
281
  except TimeoutException:
282
  ex2_timeout = True