Spaces:

ZENLLC
/

Simple-Chatbot

Sleeping

App Files Files Community

ZENLLC commited on Jul 11, 2025

Commit

99d702e

verified ·

1 Parent(s): fd49e19

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -53

app.py CHANGED Viewed

@@ -1,78 +1,84 @@
-# app.py  –  Simple key-free chatbot for Hugging Face Spaces
 import gradio as gr
-from transformers import (
-    AutoTokenizer,
-    AutoModelForSeq2SeqLM,
-    pipeline,
-    Conversation,
-)
-# ---------------------------------------------------------------------------
-# 1. Model choice – swap this to any public chat model that fits in free CPU
-# ---------------------------------------------------------------------------
-MODEL_NAME = "facebook/blenderbot-400M-distill"   # ~720 MB
-# ---------------------------------------------------------------------------
-# 2. Load model, tokenizer, and wrap in the conversational pipeline
-# ---------------------------------------------------------------------------
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model     = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
-chatbot = pipeline(
-    task="conversational",
-    model=model,
-    tokenizer=tokenizer,
-    max_new_tokens=128,      # cut-off length for each reply
-    temperature=0.7,         # creativity vs. coherence
-    top_p=0.95,              # nucleus sampling
 )
-# ---------------------------------------------------------------------------
-# 3. Gradio callback – MUST return just the bot’s reply string
-# ---------------------------------------------------------------------------
-def respond(message, history):
     """
     Parameters
     ----------
     message : str
-        The latest user message (single turn).
-    history : list of (str, str)
-        Gradio’s running list of (user, bot) pairs.
     Returns
     -------
     str
-        The bot’s reply to display in ChatInterface.
     """
-    # Rebuild a HF Conversation object from the running history
-    conv = Conversation("")
-    for user_msg, bot_msg in history:
-        conv.add_user_input(user_msg)
-        conv.append_response(bot_msg)
-    conv.add_user_input(message)
-    # Generate a reply
-    chatbot(conv)
-    reply = conv.generated_responses[-1]
-    return reply
-# ---------------------------------------------------------------------------
-# 4. Launch the UI
-# ---------------------------------------------------------------------------
 demo = gr.ChatInterface(
-    fn=respond,
-    title="🗣️ Simple BlenderBot Chat",
-    description=(
-        "Runs entirely on free Hugging Face CPU • "
-        "No external API keys required • "
-        "Powered by facebook/blenderbot-400M-distill"
     ),
-    examples=[
         "Hi there!",
-        "Tell me an interesting fact about Mars.",
-        "How do I build a kite?",
     ],
-    theme="soft",
 )
 if __name__ == "__main__":

+"""
+A key-free Hugging Face Space chatbot built with:
+  • microsoft/DialoGPT-small  (356 MB causal-LM, perfect for free CPU)
+  • gradio.ChatInterface      (simple two-arg callback)
+Paste this file + requirements.txt into a new Gradio Space and press ⏵ Run.
+"""
 import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+MODEL_NAME = "microsoft/DialoGPT-small"   # swap to any open-weights causal LM
+# ---------------------------------------------------------------------
+# 1 · Load model & tokenizer
+# ---------------------------------------------------------------------
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model     = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+# ---------------------------------------------------------------------
+# 2 · Chat callback — must return *only* the reply string
+# ---------------------------------------------------------------------
+MAX_CONTEXT   = 1024                     # DialoGPT’s context window
+GEN_KWARGS    = dict(                    # tweak to taste
+    max_new_tokens = 120,
+    do_sample      = False,              # deterministic ⇒ fewer “nonsense” tokens
+    pad_token_id   = tokenizer.eos_token_id,
 )
+def respond(message: str, history: list[list[str, str]]) -> str:
     """
     Parameters
     ----------
     message : str
+        Latest user message.
+    history : list[(user, bot), …]
+        Passed in by gr.ChatInterface.
     Returns
     -------
     str
+        Bot's reply (ChatInterface handles updating history UI).
     """
+    # --- Build a single token sequence using DialoGPT’s EOS delimiter
+    sequence = ""
+    for usr, bot in history:
+        sequence += usr + tokenizer.eos_token
+        sequence += bot + tokenizer.eos_token
+    sequence += message + tokenizer.eos_token
+    input_ids = tokenizer(sequence, return_tensors="pt").input_ids
+    # Keep only the last MAX_CONTEXT tokens so we never overflow
+    if input_ids.shape[-1] > MAX_CONTEXT:
+        input_ids = input_ids[:, -MAX_CONTEXT:]
+    output_ids = model.generate(input_ids, **GEN_KWARGS)
+    # Everything *after* the original input is the new reply
+    reply_ids = output_ids[0, input_ids.shape[-1]:]
+    reply     = tokenizer.decode(reply_ids, skip_special_tokens=True).strip()
+    return reply or "…"
+# ---------------------------------------------------------------------
+# 3 · Launch UI
+# ---------------------------------------------------------------------
 demo = gr.ChatInterface(
+    fn          = respond,
+    title       = "🤖 Key-Free DialoGPT Chatbot",
+    description = (
+        "Runs entirely on open weights (no API keys). "
+        "Swap `MODEL_NAME` to try any other causal-LM that fits CPU RAM."
     ),
+    examples    = [
         "Hi there!",
+        "Give me a fun fact about Jupiter.",
+        "Tell me a short robot joke.",
     ],
+    theme       = "soft",
 )
 if __name__ == "__main__":