Spaces:

VirtualInsight
/

Lumen-Instruct

Sleeping

VirtualInsight commited on Oct 23, 2025

Commit

28bae52

verified ·

1 Parent(s): 92854fe

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -39,15 +39,15 @@ print(f"EOS token ID: {EOS_TOKEN_ID}")
 @torch.no_grad()
 def generate_response(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
     """
-    Generates a chat-style response using the Lumen-Instruct model.
     """
-    # Format the input as a structured conversation
     formatted_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
     # Tokenize input
     input_ids = torch.tensor([tokenizer.encode(formatted_prompt).ids], dtype=torch.long, device=device)
-    # Generate response with sampling
     output = generate(
         model,
         input_ids,
@@ -59,17 +59,20 @@ def generate_response(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
         eos_token_id=EOS_TOKEN_ID,
     )
-    # Decode full output text
     full_text = tokenizer.decode(output[0].tolist())
-    # Extract only assistant’s part
     if "<|im_start|>assistant" in full_text:
         response = full_text.split("<|im_start|>assistant")[-1]
-        if "<|im_end|>" in response:
-            response = response.split("<|im_end|>")[0]
-        return response.strip()
-    return full_text.strip()
 # -----------------------------
 # Gradio Interface
@@ -88,7 +91,7 @@ demo = gr.Interface(
 )
 # -----------------------------
-# Launch
 # -----------------------------
 if __name__ == "__main__":
-    demo.launch()

 @torch.no_grad()
 def generate_response(prompt, max_tokens=200, temperature=0.7, top_p=0.9):
     """
+    Generates a clean assistant-only response from the Lumen Instruct model.
     """
+    # Format input as a conversation prompt
     formatted_prompt = f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n"
     # Tokenize input
     input_ids = torch.tensor([tokenizer.encode(formatted_prompt).ids], dtype=torch.long, device=device)
+    # Generate output
     output = generate(
         model,
         input_ids,
         eos_token_id=EOS_TOKEN_ID,
     )
+    # Decode full text
     full_text = tokenizer.decode(output[0].tolist())
+    # 🧹 Clean extraction of assistant’s reply only
     if "<|im_start|>assistant" in full_text:
         response = full_text.split("<|im_start|>assistant")[-1]
+        response = response.split("<|im_end|>")[0] if "<|im_end|>" in response else response
+    else:
+        response = full_text
+    # Remove potential leftover role tokens and clean spaces
+    response = response.replace("assistant", "").replace("user", "").strip()
+    return response
 # -----------------------------
 # Gradio Interface
 )
 # -----------------------------
+# Launch Interface
 # -----------------------------
 if __name__ == "__main__":
+    demo.launch(share=True)