DylanZimmer commited on
Commit
dbf3c9a
·
1 Parent(s): 10579df

700M-chat

Browse files
Files changed (1) hide show
  1. app.py +37 -29
app.py CHANGED
@@ -1,12 +1,19 @@
1
- import gradio as gr
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM
4
 
5
- # Load model and tokenizer
6
- #model_name = "HuggingFaceTB/SmolLM3-3B"
7
- model_name = "EleutherAI/gpt-neo-125M"
8
- tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")\
 
 
 
 
 
 
 
 
10
 
11
  def chat_fxn_caller(message, history, system_prompt="", temperature=0.6, top_p=0.95, max_tokens=32768):
12
  messages = []
@@ -21,36 +28,37 @@ def chat_fxn_caller(message, history, system_prompt="", temperature=0.6, top_p=0
21
 
22
  messages.append({"role": "user", "content": message})
23
 
24
- text = tokenizer.apply_chat_template(
25
- messages,
26
- tokenize=False,
27
- add_generation_prompt=True #SmolLm3 specific, tells model give next response
 
 
 
 
 
 
 
 
28
  )
29
 
30
- model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
31
-
32
- with torch.no_grad():
33
- generated_ids = model.generate(
34
- **model_inputs,
35
- max_new_tokens=max_tokens,
36
- temperature=temperature,
37
- top_p=top_p,
38
- do_sample=True,
39
- pad_token_id=tokenizer.eos_token_id
40
- )
41
-
42
- output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
43
- response = tokenizer.decode(output_ids, skip_special_tokens=True)
44
-
45
- return response
46
 
47
- prompt = "Be a good chatbox"
 
48
 
49
  demo = gr.ChatInterface(
50
  chat_fxn_caller,
51
  type="messages",
52
  additional_inputs=[
53
- gr.Textbox(prompt, label="System Prompt"),
54
  ],
55
  )
56
 
 
 
1
  import torch
2
+ from transformers import pipeline
3
 
4
+ # Set up the text-generation pipeline
5
+ model_name = "amusktweewt/tiny-model-700M-chat"
6
+ chatbot = pipeline(
7
+ "text-generation",
8
+ model=model_name,
9
+ device=0 if torch.cuda.is_available() else -1
10
+ )
11
+
12
+ # Ensure that bos_token and eos_token are explicitly set as strings
13
+ chatbot.tokenizer.bos_token = "<sos>"
14
+ chatbot.tokenizer.eos_token = "<|endoftext|>"
15
+
16
+ system_prompt = "You are a highly intelligent and helpful AI assistant named Tiny Chat, developed by amusktweewt. Always refer to yourself like that. Your responses should be clear, concise, and accurate. Always prioritize user needs, provide well-structured answers, and maintain a friendly yet professional tone. Adapt to the user's preferences and communication style. When needed, ask clarifying questions to ensure the best response. Be honest about limitations and avoid making assumptions. Keep interactions engaging, informative, and efficient."})
17
 
18
  def chat_fxn_caller(message, history, system_prompt="", temperature=0.6, top_p=0.95, max_tokens=32768):
19
  messages = []
 
28
 
29
  messages.append({"role": "user", "content": message})
30
 
31
+ prompt = chatbot.tokenizer.apply_chat_template(messages, tokenize=False)
32
+
33
+ response = chatbot(
34
+ prompt,
35
+ do_sample=True,
36
+ max_new_tokens=512,
37
+ top_k=50,
38
+ temperature=0.6,
39
+ num_return_sequences=1,
40
+ repetition_penalty=1.1,
41
+ pad_token_id=chatbot.tokenizer.eos_token_id,
42
+ min_new_tokens=20
43
  )
44
 
45
+ full_text = response[0]["generated_text"]
46
+ response = full_text[len(demo = gr.ChatInterface(
47
+ chat_fxn_caller,
48
+ type="messages",
49
+ additional_inputs=[
50
+ gr.Textbox(prompt, label="System Prompt"),
51
+ ],
52
+ )
 
 
 
 
 
 
 
 
53
 
54
+ demo.launch(share=True)prompt):].strip()
55
+ return response
56
 
57
  demo = gr.ChatInterface(
58
  chat_fxn_caller,
59
  type="messages",
60
  additional_inputs=[
61
+ gr.Textbox(system_prompt, label="System Prompt"),
62
  ],
63
  )
64