File size: 1,857 Bytes
045a686 9ad926d 082f600 b52f3ea 9ad926d 082f600 9ad926d 082f600 9ad926d 082f600 9ad926d 045a686 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import streamlit as st
from huggingface_hub import InferenceClient
import os
# Constants
SPACE_URL = "https://z7svds7k42bwhhgm.us-east-1.aws.endpoints.huggingface.cloud"
HF_API_KEY = os.getenv("HF_API_KEY")
DUBS_PATH = "🐾" # Optional: Replace with an avatar path if needed
# Streamlit Configuration
st.set_page_config(page_title="Chatbot Test", page_icon="🤖", layout="centered")
client = InferenceClient(SPACE_URL, token=HF_API_KEY)
def fetch_response(prompt):
"""
Fetch full text response from the HF Inference Endpoint using the InferenceClient.
Returns tokens in a streaming fashion.
"""
partial_text = ""
gen_kwargs = {
"max_new_tokens": 512,
"top_k": 30,
"top_p": 0.9,
"temperature": 0.2,
"repetition_penalty": 1.02,
"stop_sequences": ["<|endoftext|>"]
}
stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
for response in stream:
if response.token.special:
continue
# Stop if we encounter stop_sequences
if response.token.text in gen_kwargs["stop_sequences"]:
break
partial_text += response.token.text
yield response.token.text
# Streamlit Chat Interface
st.title("Chatbot Testing Interface")
# User Input Section
prompt = st.chat_input("Enter your message...")
if prompt:
# Display the user's message
st.chat_message("user").write(prompt)
# Build the chat history (use prompt directly for stateless behavior)
chat_history = f"<|user|>{prompt}<|end|> \n <|assistant|> "
# Generate the response
with st.spinner("Dubs is thinking... Woof Woof! 🐾"):
with st.chat_message("assistant", avatar=DUBS_PATH):
full_response = fetch_response(chat_history)
st.write_stream(full_response)
|