Taylor commited on
Commit
ec694f7
Β·
1 Parent(s): fcac5c7

perf: raise token limits to 256 (both PyTorch and Aether)

Browse files

Zero cost -- all our engine, no API calls, no per-token billing.

Files changed (2) hide show
  1. aether-server.mjs +2 -2
  2. app.py +2 -2
aether-server.mjs CHANGED
@@ -251,7 +251,7 @@ function loadModel(ggufPath, tokPath) {
251
  }
252
 
253
  // ─── Inference ──────────────────────────────────────────────────────────────
254
- function generate(prompt, maxTokens = 50) {
255
  const t0 = performance.now();
256
  const o = op();
257
 
@@ -357,7 +357,7 @@ const server = createServer((req, res) => {
357
  req.on('end', () => {
358
  try {
359
  const { prompt, max_tokens } = JSON.parse(body);
360
- const result = generate(prompt, max_tokens || 50);
361
  res.writeHead(200, { 'Content-Type': 'application/json' });
362
  res.end(JSON.stringify(result));
363
  } catch (e) {
 
251
  }
252
 
253
  // ─── Inference ──────────────────────────────────────────────────────────────
254
+ function generate(prompt, maxTokens = 8192) {
255
  const t0 = performance.now();
256
  const o = op();
257
 
 
357
  req.on('end', () => {
358
  try {
359
  const { prompt, max_tokens } = JSON.parse(body);
360
+ const result = generate(prompt, max_tokens || 256);
361
  res.writeHead(200, { 'Content-Type': 'application/json' });
362
  res.end(JSON.stringify(result));
363
  } catch (e) {
app.py CHANGED
@@ -61,7 +61,7 @@ def gen_pytorch(prompt):
61
  t0 = time.perf_counter()
62
  with torch.no_grad():
63
  outputs = base_model.generate(
64
- **inputs, max_new_tokens=50, temperature=0.7, top_p=0.9,
65
  do_sample=True, pad_token_id=base_tokenizer.eos_token_id,
66
  )
67
  elapsed = time.perf_counter() - t0
@@ -72,7 +72,7 @@ def gen_pytorch(prompt):
72
 
73
  def gen_aether(prompt):
74
  try:
75
- data = json.dumps({"prompt": prompt, "max_tokens": 50}).encode()
76
  req = urllib.request.Request("http://127.0.0.1:7861/generate", data=data,
77
  headers={"Content-Type": "application/json"})
78
  resp = urllib.request.urlopen(req, timeout=300)
 
61
  t0 = time.perf_counter()
62
  with torch.no_grad():
63
  outputs = base_model.generate(
64
+ **inputs, max_new_tokens=256, temperature=0.7, top_p=0.9,
65
  do_sample=True, pad_token_id=base_tokenizer.eos_token_id,
66
  )
67
  elapsed = time.perf_counter() - t0
 
72
 
73
  def gen_aether(prompt):
74
  try:
75
+ data = json.dumps({"prompt": prompt, "max_tokens": 256}).encode()
76
  req = urllib.request.Request("http://127.0.0.1:7861/generate", data=data,
77
  headers={"Content-Type": "application/json"})
78
  resp = urllib.request.urlopen(req, timeout=300)