File size: 1,857 Bytes
045a686
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ad926d
082f600
b52f3ea
9ad926d
082f600
9ad926d
 
082f600
 
 
 
 
 
 
 
9ad926d
082f600
9ad926d
 
 
 
 
 
 
 
045a686
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import streamlit as st
from huggingface_hub import InferenceClient
import os

# Constants
SPACE_URL = "https://z7svds7k42bwhhgm.us-east-1.aws.endpoints.huggingface.cloud"
HF_API_KEY = os.getenv("HF_API_KEY")
DUBS_PATH = "🐾"  # Optional: Replace with an avatar path if needed

# Streamlit Configuration
st.set_page_config(page_title="Chatbot Test", page_icon="🤖", layout="centered")

client = InferenceClient(SPACE_URL, token=HF_API_KEY)



    

def fetch_response(prompt):
    """
    Fetch full text response from the HF Inference Endpoint using the InferenceClient.
    Returns tokens in a streaming fashion.
    """
    partial_text = ""
    gen_kwargs = {
        "max_new_tokens": 512,
        "top_k": 30,
        "top_p": 0.9,
        "temperature": 0.2,
        "repetition_penalty": 1.02,
        "stop_sequences": ["<|endoftext|>"]
    }

    stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)

    for response in stream:
        if response.token.special:
            continue
        # Stop if we encounter stop_sequences
        if response.token.text in gen_kwargs["stop_sequences"]:
            break
        partial_text += response.token.text
        yield response.token.text





# Streamlit Chat Interface
st.title("Chatbot Testing Interface")

# User Input Section
prompt = st.chat_input("Enter your message...")

if prompt:
    # Display the user's message
    st.chat_message("user").write(prompt)

    # Build the chat history (use prompt directly for stateless behavior)
    chat_history = f"<|user|>{prompt}<|end|> \n <|assistant|> "

    # Generate the response
    with st.spinner("Dubs is thinking... Woof Woof! 🐾"):
        with st.chat_message("assistant", avatar=DUBS_PATH):
            full_response = fetch_response(chat_history)
            st.write_stream(full_response)