Spaces:
Running
Running
Taylor commited on
Commit Β·
ec694f7
1
Parent(s): fcac5c7
perf: raise token limits to 256 (both PyTorch and Aether)
Browse filesZero cost -- all our engine, no API calls, no per-token billing.
- aether-server.mjs +2 -2
- app.py +2 -2
aether-server.mjs
CHANGED
|
@@ -251,7 +251,7 @@ function loadModel(ggufPath, tokPath) {
|
|
| 251 |
}
|
| 252 |
|
| 253 |
// βββ Inference ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 254 |
-
function generate(prompt, maxTokens =
|
| 255 |
const t0 = performance.now();
|
| 256 |
const o = op();
|
| 257 |
|
|
@@ -357,7 +357,7 @@ const server = createServer((req, res) => {
|
|
| 357 |
req.on('end', () => {
|
| 358 |
try {
|
| 359 |
const { prompt, max_tokens } = JSON.parse(body);
|
| 360 |
-
const result = generate(prompt, max_tokens ||
|
| 361 |
res.writeHead(200, { 'Content-Type': 'application/json' });
|
| 362 |
res.end(JSON.stringify(result));
|
| 363 |
} catch (e) {
|
|
|
|
| 251 |
}
|
| 252 |
|
| 253 |
// βββ Inference ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 254 |
+
function generate(prompt, maxTokens = 8192) {
|
| 255 |
const t0 = performance.now();
|
| 256 |
const o = op();
|
| 257 |
|
|
|
|
| 357 |
req.on('end', () => {
|
| 358 |
try {
|
| 359 |
const { prompt, max_tokens } = JSON.parse(body);
|
| 360 |
+
const result = generate(prompt, max_tokens || 256);
|
| 361 |
res.writeHead(200, { 'Content-Type': 'application/json' });
|
| 362 |
res.end(JSON.stringify(result));
|
| 363 |
} catch (e) {
|
app.py
CHANGED
|
@@ -61,7 +61,7 @@ def gen_pytorch(prompt):
|
|
| 61 |
t0 = time.perf_counter()
|
| 62 |
with torch.no_grad():
|
| 63 |
outputs = base_model.generate(
|
| 64 |
-
**inputs, max_new_tokens=
|
| 65 |
do_sample=True, pad_token_id=base_tokenizer.eos_token_id,
|
| 66 |
)
|
| 67 |
elapsed = time.perf_counter() - t0
|
|
@@ -72,7 +72,7 @@ def gen_pytorch(prompt):
|
|
| 72 |
|
| 73 |
def gen_aether(prompt):
|
| 74 |
try:
|
| 75 |
-
data = json.dumps({"prompt": prompt, "max_tokens":
|
| 76 |
req = urllib.request.Request("http://127.0.0.1:7861/generate", data=data,
|
| 77 |
headers={"Content-Type": "application/json"})
|
| 78 |
resp = urllib.request.urlopen(req, timeout=300)
|
|
|
|
| 61 |
t0 = time.perf_counter()
|
| 62 |
with torch.no_grad():
|
| 63 |
outputs = base_model.generate(
|
| 64 |
+
**inputs, max_new_tokens=256, temperature=0.7, top_p=0.9,
|
| 65 |
do_sample=True, pad_token_id=base_tokenizer.eos_token_id,
|
| 66 |
)
|
| 67 |
elapsed = time.perf_counter() - t0
|
|
|
|
| 72 |
|
| 73 |
def gen_aether(prompt):
|
| 74 |
try:
|
| 75 |
+
data = json.dumps({"prompt": prompt, "max_tokens": 256}).encode()
|
| 76 |
req = urllib.request.Request("http://127.0.0.1:7861/generate", data=data,
|
| 77 |
headers={"Content-Type": "application/json"})
|
| 78 |
resp = urllib.request.urlopen(req, timeout=300)
|