Spaces:

DDDDEvvvvv
/

devmegablack-chat

Running

App Files Files Community

DDDDEvvvvv commited on Dec 29, 2025

Commit

a30a54f

verified ·

1 Parent(s): 59ffec9

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -15

app.py CHANGED Viewed

@@ -2,29 +2,53 @@ import gradio as gr
 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 import torch
-# Model setup
 model_name = "facebook/blenderbot-400M-distill"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 device = "cuda" if torch.cuda.is_available() else "cpu"
 model.to(device)
 model.eval()
-# Optional speed boost on GPU
 if device == "cuda":
     model = model.half()
 persona = "You are a helpful, concise, friendly assistant."
 def respond(message, history):
     history.append({"role": "user", "content": message})
-    # Build context from last 3 turns
     context = persona + "\n"
-    for msg in history[-6:]:
-        role = "User" if msg["role"] == "user" else "Bot"
-        context += f"{role}: {msg['content']}\n"
     context += "Bot:"
     inputs = tokenizer(
@@ -34,32 +58,56 @@ def respond(message, history):
         max_length=512
     ).to(device)
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
-            max_new_tokens=120,
             do_sample=True,
-            temperature=0.7,
-            top_p=0.9,
-            repetition_penalty=1.1
         )
     response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    history.append({"role": "assistant", "content": response_text})
-    return history, history
 def reset_chat():
     return [], []
 with gr.Blocks(css="""
 body {background-color: #000 !important; color: #fff !important;}
 .gr-chatbot {background-color: #111 !important; border-radius: 12px; height: 100% !important;}
 .gr-chatbot .message.user {border-color: #0ff; background-color: transparent !important;}
 .gr-chatbot .message.bot {border-color: #aaa; background-color: transparent !important;}
-.gr-textbox textarea {background-color: transparent !important; color: #fff !important; border: 1px solid #555 !important;}
-.gr-textbox textarea::selection {background-color: #0ff !important; color: #000 !important;}
-.gr-button {background-color: #0ff !important; color: #000 !important; border-radius: 8px;}
 footer {display: none !important;}
 """) as demo:

 from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 import torch
+# ------------------ MODEL SETUP ------------------
 model_name = "facebook/blenderbot-400M-distill"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 device = "cuda" if torch.cuda.is_available() else "cpu"
+print("Using device:", device)
 model.to(device)
 model.eval()
+# Half precision ONLY on GPU
 if device == "cuda":
     model = model.half()
 persona = "You are a helpful, concise, friendly assistant."
+# ------------------ CHAT FUNCTION ------------------
 def respond(message, history):
+    # Add user message
     history.append({"role": "user", "content": message})
+    # Add loading placeholder
+    history.append({"role": "assistant", "content": "⏳ Thinking..."})
+    yield history, history
+    # --------- BUILD CONTEXT (TURN-BASED MEMORY) ---------
     context = persona + "\n"
+    # Group messages into turns (user + bot)
+    turns = []
+    temp = []
+    for msg in history[:-1]:  # exclude "Thinking..."
+        temp.append(msg)
+        if len(temp) == 2:
+            turns.append(temp)
+            temp = []
+    # Keep last 3 full turns
+    for turn in turns[-3:]:
+        for msg in turn:
+            role = "User" if msg["role"] == "user" else "Bot"
+            context += f"{role}: {msg['content']}\n"
     context += "Bot:"
     inputs = tokenizer(
         max_length=512
     ).to(device)
+    # ------------------ GENERATION ------------------
     with torch.no_grad():
         outputs = model.generate(
             **inputs,
+            max_new_tokens=80,
             do_sample=True,
+            temperature=0.65,
+            top_p=0.85,
+            repetition_penalty=1.1,
+            num_beams=1
         )
     response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Replace loading text
+    history[-1]["content"] = response_text
+    # Optional hard trim to prevent slowdown
+    if len(history) > 12:
+        history = history[-10:]
+    yield history, history
+# ------------------ RESET ------------------
 def reset_chat():
     return [], []
+# ------------------ UI ------------------
 with gr.Blocks(css="""
 body {background-color: #000 !important; color: #fff !important;}
 .gr-chatbot {background-color: #111 !important; border-radius: 12px; height: 100% !important;}
 .gr-chatbot .message.user {border-color: #0ff; background-color: transparent !important;}
 .gr-chatbot .message.bot {border-color: #aaa; background-color: transparent !important;}
+.gr-textbox textarea {
+    background-color: transparent !important;
+    color: #fff !important;
+    border: 1px solid #555 !important;
+}
+.gr-textbox textarea::selection {
+    background-color: #0ff !important;
+    color: #000 !important;
+}
+.gr-button {
+    background-color: #0ff !important;
+    color: #000 !important;
+    border-radius: 8px;
+}
 footer {display: none !important;}
 """) as demo: