Spaces:
Running
Running
fix: reserve tokens for system prompt and generation in extraction windows
Browse filesWindow size was using full n_ctx (4096), causing overflow when system
prompt and generation tokens were added. Now reserves 1500 tokens
(~200 for prompt + ~1024 for output + safety margin).
Max window tokens: 4096 - 1500 = 2596 tokens
Fixes: ValueError: Requested tokens (4796) exceed context window of 4096
app.py
CHANGED
|
@@ -1482,6 +1482,9 @@ def summarize_advanced(
|
|
| 1482 |
# In production, this would be more sophisticated
|
| 1483 |
lines = [l.strip() for l in transcript.split('\n') if l.strip()]
|
| 1484 |
|
|
|
|
|
|
|
|
|
|
| 1485 |
# Simple windowing: split into chunks based on token count
|
| 1486 |
windows = []
|
| 1487 |
current_window = []
|
|
@@ -1491,7 +1494,7 @@ def summarize_advanced(
|
|
| 1491 |
for line_num, line in enumerate(lines):
|
| 1492 |
line_tokens = tokenizer.count(line)
|
| 1493 |
|
| 1494 |
-
if current_tokens + line_tokens >
|
| 1495 |
# Create window
|
| 1496 |
window_content = '\n'.join(current_window)
|
| 1497 |
windows.append(Window(
|
|
|
|
| 1482 |
# In production, this would be more sophisticated
|
| 1483 |
lines = [l.strip() for l in transcript.split('\n') if l.strip()]
|
| 1484 |
|
| 1485 |
+
# Reserve tokens for system prompt (~200) and output (~1024)
|
| 1486 |
+
max_window_tokens = extraction_n_ctx - 1500 # Safe buffer for prompts and generation
|
| 1487 |
+
|
| 1488 |
# Simple windowing: split into chunks based on token count
|
| 1489 |
windows = []
|
| 1490 |
current_window = []
|
|
|
|
| 1494 |
for line_num, line in enumerate(lines):
|
| 1495 |
line_tokens = tokenizer.count(line)
|
| 1496 |
|
| 1497 |
+
if current_tokens + line_tokens > max_window_tokens and current_window:
|
| 1498 |
# Create window
|
| 1499 |
window_content = '\n'.join(current_window)
|
| 1500 |
windows.append(Window(
|