gary-boon Claude Opus 4.5 commited on
Commit
bb689ce
·
1 Parent(s): d1d37a8

feat: add auto_complete parameter for token generation

Browse files

- Add auto_complete parameter to streaming and non-streaming endpoints
- Cap max_tokens at 128 when auto_complete is enabled
- Existing EOS early-stop logic handles completion detection

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (1) hide show
  1. backend/model_service.py +12 -2
backend/model_service.py CHANGED
@@ -1573,9 +1573,14 @@ async def analyze_research_attention(request: Dict[str, Any], authenticated: boo
1573
  # Get parameters
1574
  prompt = request.get("prompt", "def quicksort(arr):")
1575
  max_tokens = request.get("max_tokens", 8)
 
1576
  temperature = request.get("temperature", 0.7)
1577
 
1578
- logger.info(f"Research attention analysis: prompt_len={len(prompt)}, max_tokens={max_tokens}")
 
 
 
 
1579
 
1580
  # Get model config for prompt formatting
1581
  from .model_config import get_model_config
@@ -2092,9 +2097,14 @@ async def analyze_research_attention_stream(request: Dict[str, Any], authenticat
2092
  # Get parameters
2093
  prompt = request.get("prompt", "def quicksort(arr):")
2094
  max_tokens = request.get("max_tokens", 8)
 
2095
  temperature = request.get("temperature", 0.7)
2096
 
2097
- logger.info(f"[SSE] Research attention analysis: prompt_len={len(prompt)}, max_tokens={max_tokens}, request_id={request_id}")
 
 
 
 
2098
 
2099
  # === STAGE 1: TOKENIZING ===
2100
  yield sse_event('tokenizing', stage=1, totalStages=5, progress=2,
 
1573
  # Get parameters
1574
  prompt = request.get("prompt", "def quicksort(arr):")
1575
  max_tokens = request.get("max_tokens", 8)
1576
+ auto_complete = request.get("auto_complete", False)
1577
  temperature = request.get("temperature", 0.7)
1578
 
1579
+ # If auto_complete mode, ensure we have a reasonable upper limit
1580
+ if auto_complete:
1581
+ max_tokens = min(max_tokens, 128)
1582
+
1583
+ logger.info(f"Research attention analysis: prompt_len={len(prompt)}, max_tokens={max_tokens}, auto_complete={auto_complete}")
1584
 
1585
  # Get model config for prompt formatting
1586
  from .model_config import get_model_config
 
2097
  # Get parameters
2098
  prompt = request.get("prompt", "def quicksort(arr):")
2099
  max_tokens = request.get("max_tokens", 8)
2100
+ auto_complete = request.get("auto_complete", False)
2101
  temperature = request.get("temperature", 0.7)
2102
 
2103
+ # If auto_complete mode, ensure we have a reasonable upper limit
2104
+ if auto_complete:
2105
+ max_tokens = min(max_tokens, 128)
2106
+
2107
+ logger.info(f"[SSE] Research attention analysis: prompt_len={len(prompt)}, max_tokens={max_tokens}, auto_complete={auto_complete}, request_id={request_id}")
2108
 
2109
  # === STAGE 1: TOKENIZING ===
2110
  yield sse_event('tokenizing', stage=1, totalStages=5, progress=2,