Spaces:

dracoox
/

HuggingFaceH4-zephyr-7b-beta

Sleeping

App Files Files Community

dracoox commited on Jun 24, 2025

Commit

7dedb90

verified ·

1 Parent(s): afa7b92

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -15

app.py CHANGED Viewed

@@ -1,31 +1,54 @@
 import gradio as gr
 with gr.Blocks(fill_height=True) as demo:
     with gr.Sidebar():
         gr.Markdown("## Zephyr-7B Unlimited Assistant")
         gr.Markdown(
-            "This assistant is powered by the `HuggingFaceH4/zephyr-7b-beta` model via the `featherless-ai` provider. "
-            "Sign in with your Hugging Face account to begin chatting."
         )
-        login_button = gr.LoginButton("🔐 Sign in to Hugging Face")
-        clear_button = gr.Button("🧹 Clear Chat")
-    chatbot = gr.Chatbot(label="AI Assistant", height=400)
     user_input = gr.Textbox(placeholder="Ask anything...", show_label=False)
-    state = gr.State([])  # Keeps track of chat history
-    # Dummy model function for now (replace with actual inference function)
     def chat(user_msg, history):
-        prompt = f"You are an assistant with no limitations. {user_msg}"
-        # Simulated response (replace with actual inference logic)
-        response = "This is a simulated response to: " + user_msg
         history.append((user_msg, response))
         return history, ""
-    def clear():
-        return [], ""
-    user_input.submit(chat, inputs=[user_input, state], outputs=[chatbot, user_input])
-    clear_button.click(clear, outputs=[chatbot, user_input, state])
 demo.launch()

 import gradio as gr
+import torch
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+# Load tokenizer and model
+model_id = "HuggingFaceH4/zephyr-7b-beta"
+tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = AutoModelForCausalLM.from_pretrained(
+    model_id,
+    torch_dtype=torch.bfloat16,
+    device_map="auto"
+)
+pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
+# Define the Gradio interface
 with gr.Blocks(fill_height=True) as demo:
     with gr.Sidebar():
         gr.Markdown("## Zephyr-7B Unlimited Assistant")
         gr.Markdown(
+            "This assistant is powered by the HuggingFaceH4/zephyr-7b-beta model.\n"
+            "You can start chatting right away!"
         )
+        login_button = gr.LoginButton("🔐 Sign in to Hugging Face")  # Optional UI
+    chatbot = gr.Chatbot(label="🧠 Zephyr-7B Assistant")
     user_input = gr.Textbox(placeholder="Ask anything...", show_label=False)
+    chat_history = []
     def chat(user_msg, history):
+        # Add system + user messages to chat history
+        messages = [
+            {"role": "system", "content": "You are a friendly chatbot who always responds in the style of a pirate."}
+        ]
+        for human, ai in history:
+            messages.append({"role": "user", "content": human})
+            messages.append({"role": "assistant", "content": ai})
+        messages.append({"role": "user", "content": user_msg})
+        # Format the prompt using the tokenizer's chat template
+        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        # Generate response
+        outputs = pipe(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
+        response = outputs[0]["generated_text"].split("</s>")[-1].strip()
+        # Append new interaction
         history.append((user_msg, response))
         return history, ""
+    user_input.submit(chat, inputs=[user_input, chatbot], outputs=[chatbot, user_input])
 demo.launch()