Spaces:
Sleeping
Sleeping
gary-boon
Claude Opus 4.5
commited on
Commit
·
bb689ce
1
Parent(s):
d1d37a8
feat: add auto_complete parameter for token generation
Browse files- Add auto_complete parameter to streaming and non-streaming endpoints
- Cap max_tokens at 128 when auto_complete is enabled
- Existing EOS early-stop logic handles completion detection
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
- backend/model_service.py +12 -2
backend/model_service.py
CHANGED
|
@@ -1573,9 +1573,14 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
|
|
| 1573 |
# Get parameters
|
| 1574 |
prompt = request.get("prompt", "def quicksort(arr):")
|
| 1575 |
max_tokens = request.get("max_tokens", 8)
|
|
|
|
| 1576 |
temperature = request.get("temperature", 0.7)
|
| 1577 |
|
| 1578 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1579 |
|
| 1580 |
# Get model config for prompt formatting
|
| 1581 |
from .model_config import get_model_config
|
|
@@ -2092,9 +2097,14 @@ async def analyze_research_attention_stream(request: Dict[str, Any], authenticat
|
|
| 2092 |
# Get parameters
|
| 2093 |
prompt = request.get("prompt", "def quicksort(arr):")
|
| 2094 |
max_tokens = request.get("max_tokens", 8)
|
|
|
|
| 2095 |
temperature = request.get("temperature", 0.7)
|
| 2096 |
|
| 2097 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2098 |
|
| 2099 |
# === STAGE 1: TOKENIZING ===
|
| 2100 |
yield sse_event('tokenizing', stage=1, totalStages=5, progress=2,
|
|
|
|
| 1573 |
# Get parameters
|
| 1574 |
prompt = request.get("prompt", "def quicksort(arr):")
|
| 1575 |
max_tokens = request.get("max_tokens", 8)
|
| 1576 |
+
auto_complete = request.get("auto_complete", False)
|
| 1577 |
temperature = request.get("temperature", 0.7)
|
| 1578 |
|
| 1579 |
+
# If auto_complete mode, ensure we have a reasonable upper limit
|
| 1580 |
+
if auto_complete:
|
| 1581 |
+
max_tokens = min(max_tokens, 128)
|
| 1582 |
+
|
| 1583 |
+
logger.info(f"Research attention analysis: prompt_len={len(prompt)}, max_tokens={max_tokens}, auto_complete={auto_complete}")
|
| 1584 |
|
| 1585 |
# Get model config for prompt formatting
|
| 1586 |
from .model_config import get_model_config
|
|
|
|
| 2097 |
# Get parameters
|
| 2098 |
prompt = request.get("prompt", "def quicksort(arr):")
|
| 2099 |
max_tokens = request.get("max_tokens", 8)
|
| 2100 |
+
auto_complete = request.get("auto_complete", False)
|
| 2101 |
temperature = request.get("temperature", 0.7)
|
| 2102 |
|
| 2103 |
+
# If auto_complete mode, ensure we have a reasonable upper limit
|
| 2104 |
+
if auto_complete:
|
| 2105 |
+
max_tokens = min(max_tokens, 128)
|
| 2106 |
+
|
| 2107 |
+
logger.info(f"[SSE] Research attention analysis: prompt_len={len(prompt)}, max_tokens={max_tokens}, auto_complete={auto_complete}, request_id={request_id}")
|
| 2108 |
|
| 2109 |
# === STAGE 1: TOKENIZING ===
|
| 2110 |
yield sse_event('tokenizing', stage=1, totalStages=5, progress=2,
|