Spaces:
Sleeping
Sleeping
fix: improve prompt generation logging and add padding token ID
Browse files- app_nobatching.py +2 -2
app_nobatching.py
CHANGED
|
@@ -94,7 +94,6 @@ def generate(submission: list[dict[str, str]], team_id: str) -> list[dict[str, s
|
|
| 94 |
|
| 95 |
# Format prompts using chat template
|
| 96 |
for i, prompt in enumerate(prompts):
|
| 97 |
-
print(f"Generating response for prompt {i + 1}/{len(prompts)}")
|
| 98 |
start_time = time.perf_counter()
|
| 99 |
|
| 100 |
messages = [{"role": "user", "content": prompt}]
|
|
@@ -111,6 +110,7 @@ def generate(submission: list[dict[str, str]], team_id: str) -> list[dict[str, s
|
|
| 111 |
do_sample=False,
|
| 112 |
temperature=None,
|
| 113 |
repetition_penalty=REPETITION_PENALTY,
|
|
|
|
| 114 |
eos_token_id=chat_tokenizer.eos_token_id,
|
| 115 |
)
|
| 116 |
|
|
@@ -121,7 +121,7 @@ def generate(submission: list[dict[str, str]], team_id: str) -> list[dict[str, s
|
|
| 121 |
|
| 122 |
generation_time = time.perf_counter() - start_time
|
| 123 |
times.append(generation_time)
|
| 124 |
-
print(f"✓ Prompt {i + 1} generated in {generation_time:.3f}s")
|
| 125 |
|
| 126 |
print(
|
| 127 |
f"✓ Generation completed: {sum(times):.3f}s ({len(prompts) / sum(times):.1f} prompts/s) (average {sum(times) / len(prompts):.3f}s per prompt)"
|
|
|
|
| 94 |
|
| 95 |
# Format prompts using chat template
|
| 96 |
for i, prompt in enumerate(prompts):
|
|
|
|
| 97 |
start_time = time.perf_counter()
|
| 98 |
|
| 99 |
messages = [{"role": "user", "content": prompt}]
|
|
|
|
| 110 |
do_sample=False,
|
| 111 |
temperature=None,
|
| 112 |
repetition_penalty=REPETITION_PENALTY,
|
| 113 |
+
pad_token_id=chat_tokenizer.pad_token_id,
|
| 114 |
eos_token_id=chat_tokenizer.eos_token_id,
|
| 115 |
)
|
| 116 |
|
|
|
|
| 121 |
|
| 122 |
generation_time = time.perf_counter() - start_time
|
| 123 |
times.append(generation_time)
|
| 124 |
+
print(f"✓ Prompt {i + 1:02}/25 generated in {generation_time:.3f}s")
|
| 125 |
|
| 126 |
print(
|
| 127 |
f"✓ Generation completed: {sum(times):.3f}s ({len(prompts) / sum(times):.1f} prompts/s) (average {sum(times) / len(prompts):.3f}s per prompt)"
|