Spaces:

dzehuggingface
/

SmallChat-FxnCaller

Sleeping

App Files Files Community

DylanZimmer commited on Aug 15, 2025

Commit

dbf3c9a

1 Parent(s): 10579df

700M-chat

Browse files

Files changed (1) hide show

app.py +37 -29

app.py CHANGED Viewed

@@ -1,12 +1,19 @@
-import gradio as gr
 import torch
-from transformers import AutoTokenizer, AutoModelForCausalLM
-# Load model and tokenizer
-#model_name = "HuggingFaceTB/SmolLM3-3B"
-model_name = "EleutherAI/gpt-neo-125M"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")\
 def chat_fxn_caller(message, history, system_prompt="", temperature=0.6, top_p=0.95, max_tokens=32768):
     messages = []
@@ -21,36 +28,37 @@ def chat_fxn_caller(message, history, system_prompt="", temperature=0.6, top_p=0
     messages.append({"role": "user", "content": message})
-    text = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True  #SmolLm3 specific, tells model give next response
     )
-    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
-    with torch.no_grad():
-        generated_ids = model.generate(
-            **model_inputs,
-            max_new_tokens=max_tokens,
-            temperature=temperature,
-            top_p=top_p,
-            do_sample=True,
-            pad_token_id=tokenizer.eos_token_id
-        )
-    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
-    response = tokenizer.decode(output_ids, skip_special_tokens=True)
-    return response
-prompt = "Be a good chatbox"
 demo = gr.ChatInterface(
     chat_fxn_caller,
     type="messages",
     additional_inputs=[
-        gr.Textbox(prompt, label="System Prompt"),
     ],
 )

 import torch
+from transformers import pipeline
+# Set up the text-generation pipeline
+model_name = "amusktweewt/tiny-model-700M-chat"
+chatbot = pipeline(
+    "text-generation",
+    model=model_name,
+    device=0 if torch.cuda.is_available() else -1
+)
+# Ensure that bos_token and eos_token are explicitly set as strings
+chatbot.tokenizer.bos_token = "<sos>"
+chatbot.tokenizer.eos_token = "<|endoftext|>"
+system_prompt = "You are a highly intelligent and helpful AI assistant named Tiny Chat, developed by amusktweewt. Always refer to yourself like that. Your responses should be clear, concise, and accurate. Always prioritize user needs, provide well-structured answers, and maintain a friendly yet professional tone. Adapt to the user's preferences and communication style. When needed, ask clarifying questions to ensure the best response. Be honest about limitations and avoid making assumptions. Keep interactions engaging, informative, and efficient."})
 def chat_fxn_caller(message, history, system_prompt="", temperature=0.6, top_p=0.95, max_tokens=32768):
     messages = []
     messages.append({"role": "user", "content": message})
+    prompt = chatbot.tokenizer.apply_chat_template(messages, tokenize=False)
+    response = chatbot(
+        prompt,
+        do_sample=True,
+        max_new_tokens=512,
+        top_k=50,
+        temperature=0.6,
+        num_return_sequences=1,
+        repetition_penalty=1.1,
+        pad_token_id=chatbot.tokenizer.eos_token_id,
+        min_new_tokens=20
     )
+    full_text = response[0]["generated_text"]
+    response = full_text[len(demo = gr.ChatInterface(
+    chat_fxn_caller,
+    type="messages",
+    additional_inputs=[
+        gr.Textbox(prompt, label="System Prompt"),
+    ],
+)
+demo.launch(share=True)prompt):].strip()
+    return response
 demo = gr.ChatInterface(
     chat_fxn_caller,
     type="messages",
     additional_inputs=[
+        gr.Textbox(system_prompt, label="System Prompt"),
     ],
 )