ZENLLC commited on
Commit
99d702e
·
verified ·
1 Parent(s): fd49e19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -53
app.py CHANGED
@@ -1,78 +1,84 @@
1
- # app.py – Simple key-free chatbot for Hugging Face Spaces
 
 
 
 
 
 
 
2
  import gradio as gr
3
- from transformers import (
4
- AutoTokenizer,
5
- AutoModelForSeq2SeqLM,
6
- pipeline,
7
- Conversation,
8
- )
9
 
10
- # ---------------------------------------------------------------------------
11
- # 1. Model choice – swap this to any public chat model that fits in free CPU
12
- # ---------------------------------------------------------------------------
13
- MODEL_NAME = "facebook/blenderbot-400M-distill" # ~720 MB
14
 
15
- # ---------------------------------------------------------------------------
16
- # 2. Load model, tokenizer, and wrap in the conversational pipeline
17
- # ---------------------------------------------------------------------------
18
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
19
- model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
20
 
21
- chatbot = pipeline(
22
- task="conversational",
23
- model=model,
24
- tokenizer=tokenizer,
25
- max_new_tokens=128, # cut-off length for each reply
26
- temperature=0.7, # creativity vs. coherence
27
- top_p=0.95, # nucleus sampling
 
28
  )
29
 
30
- # ---------------------------------------------------------------------------
31
- # 3. Gradio callback – MUST return just the bot’s reply string
32
- # ---------------------------------------------------------------------------
33
- def respond(message, history):
34
  """
35
  Parameters
36
  ----------
37
  message : str
38
- The latest user message (single turn).
39
- history : list of (str, str)
40
- Gradio’s running list of (user, bot) pairs.
41
 
42
  Returns
43
  -------
44
  str
45
- The bot’s reply to display in ChatInterface.
46
  """
47
- # Rebuild a HF Conversation object from the running history
48
- conv = Conversation("")
49
- for user_msg, bot_msg in history:
50
- conv.add_user_input(user_msg)
51
- conv.append_response(bot_msg)
52
- conv.add_user_input(message)
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- # Generate a reply
55
- chatbot(conv)
56
- reply = conv.generated_responses[-1]
57
- return reply
58
 
59
- # ---------------------------------------------------------------------------
60
- # 4. Launch the UI
61
- # ---------------------------------------------------------------------------
62
  demo = gr.ChatInterface(
63
- fn=respond,
64
- title="🗣️ Simple BlenderBot Chat",
65
- description=(
66
- "Runs entirely on free Hugging Face CPU "
67
- "No external API keys required "
68
- "Powered by facebook/blenderbot-400M-distill"
69
  ),
70
- examples=[
71
  "Hi there!",
72
- "Tell me an interesting fact about Mars.",
73
- "How do I build a kite?",
74
  ],
75
- theme="soft",
76
  )
77
 
78
  if __name__ == "__main__":
 
1
+ """
2
+ A key-free Hugging Face Space chatbot built with:
3
+ • microsoft/DialoGPT-small (356 MB causal-LM, perfect for free CPU)
4
+ • gradio.ChatInterface (simple two-arg callback)
5
+
6
+ Paste this file + requirements.txt into a new Gradio Space and press ⏵ Run.
7
+ """
8
+
9
  import gradio as gr
10
+ import torch
11
+ from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
 
12
 
13
+ MODEL_NAME = "microsoft/DialoGPT-small" # swap to any open-weights causal LM
 
 
 
14
 
15
+ # ---------------------------------------------------------------------
16
+ # 1 · Load model & tokenizer
17
+ # ---------------------------------------------------------------------
18
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
19
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
20
 
21
+ # ---------------------------------------------------------------------
22
+ # 2 · Chat callback — must return *only* the reply string
23
+ # ---------------------------------------------------------------------
24
+ MAX_CONTEXT = 1024 # DialoGPT’s context window
25
+ GEN_KWARGS = dict( # tweak to taste
26
+ max_new_tokens = 120,
27
+ do_sample = False, # deterministic ⇒ fewer “nonsense” tokens
28
+ pad_token_id = tokenizer.eos_token_id,
29
  )
30
 
31
+ def respond(message: str, history: list[list[str, str]]) -> str:
 
 
 
32
  """
33
  Parameters
34
  ----------
35
  message : str
36
+ Latest user message.
37
+ history : list[(user, bot), …]
38
+ Passed in by gr.ChatInterface.
39
 
40
  Returns
41
  -------
42
  str
43
+ Bot's reply (ChatInterface handles updating history UI).
44
  """
45
+ # --- Build a single token sequence using DialoGPT’s EOS delimiter
46
+ sequence = ""
47
+ for usr, bot in history:
48
+ sequence += usr + tokenizer.eos_token
49
+ sequence += bot + tokenizer.eos_token
50
+ sequence += message + tokenizer.eos_token
51
+
52
+ input_ids = tokenizer(sequence, return_tensors="pt").input_ids
53
+
54
+ # Keep only the last MAX_CONTEXT tokens so we never overflow
55
+ if input_ids.shape[-1] > MAX_CONTEXT:
56
+ input_ids = input_ids[:, -MAX_CONTEXT:]
57
+
58
+ output_ids = model.generate(input_ids, **GEN_KWARGS)
59
+
60
+ # Everything *after* the original input is the new reply
61
+ reply_ids = output_ids[0, input_ids.shape[-1]:]
62
+ reply = tokenizer.decode(reply_ids, skip_special_tokens=True).strip()
63
 
64
+ return reply or "…"
 
 
 
65
 
66
+ # ---------------------------------------------------------------------
67
+ # 3 · Launch UI
68
+ # ---------------------------------------------------------------------
69
  demo = gr.ChatInterface(
70
+ fn = respond,
71
+ title = "🤖 Key-Free DialoGPT Chatbot",
72
+ description = (
73
+ "Runs entirely on open weights (no API keys). "
74
+ "Swap `MODEL_NAME` to try any other causal-LM that fits CPU RAM."
 
75
  ),
76
+ examples = [
77
  "Hi there!",
78
+ "Give me a fun fact about Jupiter.",
79
+ "Tell me a short robot joke.",
80
  ],
81
+ theme = "soft",
82
  )
83
 
84
  if __name__ == "__main__":