saiteki-kai commited on
Commit
b904e85
·
verified ·
1 Parent(s): 7eebfb4

fix: improve prompt generation logging and add padding token ID

Browse files
Files changed (1) hide show
  1. app_nobatching.py +2 -2
app_nobatching.py CHANGED
@@ -94,7 +94,6 @@ def generate(submission: list[dict[str, str]], team_id: str) -> list[dict[str, s
94
 
95
  # Format prompts using chat template
96
  for i, prompt in enumerate(prompts):
97
- print(f"Generating response for prompt {i + 1}/{len(prompts)}")
98
  start_time = time.perf_counter()
99
 
100
  messages = [{"role": "user", "content": prompt}]
@@ -111,6 +110,7 @@ def generate(submission: list[dict[str, str]], team_id: str) -> list[dict[str, s
111
  do_sample=False,
112
  temperature=None,
113
  repetition_penalty=REPETITION_PENALTY,
 
114
  eos_token_id=chat_tokenizer.eos_token_id,
115
  )
116
 
@@ -121,7 +121,7 @@ def generate(submission: list[dict[str, str]], team_id: str) -> list[dict[str, s
121
 
122
  generation_time = time.perf_counter() - start_time
123
  times.append(generation_time)
124
- print(f"✓ Prompt {i + 1} generated in {generation_time:.3f}s")
125
 
126
  print(
127
  f"✓ Generation completed: {sum(times):.3f}s ({len(prompts) / sum(times):.1f} prompts/s) (average {sum(times) / len(prompts):.3f}s per prompt)"
 
94
 
95
  # Format prompts using chat template
96
  for i, prompt in enumerate(prompts):
 
97
  start_time = time.perf_counter()
98
 
99
  messages = [{"role": "user", "content": prompt}]
 
110
  do_sample=False,
111
  temperature=None,
112
  repetition_penalty=REPETITION_PENALTY,
113
+ pad_token_id=chat_tokenizer.pad_token_id,
114
  eos_token_id=chat_tokenizer.eos_token_id,
115
  )
116
 
 
121
 
122
  generation_time = time.perf_counter() - start_time
123
  times.append(generation_time)
124
+ print(f"✓ Prompt {i + 1:02}/25 generated in {generation_time:.3f}s")
125
 
126
  print(
127
  f"✓ Generation completed: {sum(times):.3f}s ({len(prompts) / sum(times):.1f} prompts/s) (average {sum(times) / len(prompts):.3f}s per prompt)"