Spaces:

AndaiMD
/

brainbench

Sleeping

AndaiMD commited on Jun 19, 2025

Commit

d1e903b

1 Parent(s): c2ebdd7

generate

Files changed (1) hide show

app/main.py CHANGED Viewed

@@ -10,24 +10,28 @@ model, tokenizer = load_model()
 async def predict(request: Request):
     data = await request.json()
     input_text = data.get("input", "")
-    # Tokenize and move to model device
-    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
-    # Generate next 15 tokens
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
-            max_new_tokens=15,
-            do_sample=True,  # Optional: adds randomness
-            temperature=0.8,  # Optional: more natural output
             pad_token_id=tokenizer.eos_token_id
         )
-    # Decode only new part of generation
     generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Extract the continuation only (optional but useful)
-    continuation = generated_text[len(input_text):].strip()
     return JSONResponse(content={"output": continuation})

 async def predict(request: Request):
     data = await request.json()
     input_text = data.get("input", "")
+    # Extract last 5 words
+    last_5_words = " ".join(input_text.strip().split()[-5:])
+    # Tokenize and generate continuation
+    inputs = tokenizer(last_5_words, return_tensors="pt").to(model.device)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
+            max_new_tokens=20,
+            do_sample=True,
+            temperature=0.8,
+            top_k=50,
+            top_p=0.95,
             pad_token_id=tokenizer.eos_token_id
         )
+    # Decode generated text
     generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Remove the prompt portion to isolate generated words
+    continuation = generated_text[len(last_5_words):].strip()
     return JSONResponse(content={"output": continuation})