Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,7 @@ import torch
|
|
| 4 |
import os
|
| 5 |
from huggingface_hub import login
|
| 6 |
from peft import PeftModel, PeftConfig
|
|
|
|
| 7 |
|
| 8 |
# Login with HF_TOKEN (if available)
|
| 9 |
hf_token = os.environ.get("HF_TOKEN")
|
|
@@ -18,7 +19,7 @@ else:
|
|
| 18 |
|
| 19 |
# Model and Adapter Configuration
|
| 20 |
model_id = "Prajjwalng/gemma_customer_care" # Base model
|
| 21 |
-
adapter_id = "Prajjwalng/gemma_customercare_adapters"
|
| 22 |
|
| 23 |
# Initialize model and tokenizer (load only once)
|
| 24 |
@st.cache_resource
|
|
@@ -32,7 +33,7 @@ def load_model(model_id):
|
|
| 32 |
)
|
| 33 |
|
| 34 |
tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)
|
| 35 |
-
return base_model,tokenizer
|
| 36 |
|
| 37 |
merged_model, tokenizer = load_model(model_id)
|
| 38 |
|
|
@@ -69,6 +70,9 @@ st.title("Customer Care ChatBot")
|
|
| 69 |
# Initialize chat history
|
| 70 |
if "messages" not in st.session_state:
|
| 71 |
st.session_state.messages = []
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
# Display chat messages from history on app rerun
|
| 74 |
for message in st.session_state.messages:
|
|
@@ -86,11 +90,13 @@ if prompt := st.chat_input("How can I help you?"):
|
|
| 86 |
# Generate and display chatbot response
|
| 87 |
with st.chat_message("assistant"):
|
| 88 |
message_placeholder = st.empty()
|
|
|
|
|
|
|
| 89 |
full_response = ""
|
| 90 |
response = get_completion(prompt, merged_model, tokenizer)
|
|
|
|
| 91 |
|
| 92 |
# Simulate stream of responses with milliseconds delay
|
| 93 |
-
import time
|
| 94 |
for chunk in response.split():
|
| 95 |
full_response += chunk + " "
|
| 96 |
time.sleep(0.05)
|
|
|
|
| 4 |
import os
|
| 5 |
from huggingface_hub import login
|
| 6 |
from peft import PeftModel, PeftConfig
|
| 7 |
+
import time
|
| 8 |
|
| 9 |
# Login with HF_TOKEN (if available)
|
| 10 |
hf_token = os.environ.get("HF_TOKEN")
|
|
|
|
| 19 |
|
| 20 |
# Model and Adapter Configuration
|
| 21 |
model_id = "Prajjwalng/gemma_customer_care" # Base model
|
| 22 |
+
adapter_id = "Prajjwalng/gemma_customercare_adapters" # adapter model
|
| 23 |
|
| 24 |
# Initialize model and tokenizer (load only once)
|
| 25 |
@st.cache_resource
|
|
|
|
| 33 |
)
|
| 34 |
|
| 35 |
tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)
|
| 36 |
+
return base_model, tokenizer
|
| 37 |
|
| 38 |
merged_model, tokenizer = load_model(model_id)
|
| 39 |
|
|
|
|
| 70 |
# Initialize chat history
|
| 71 |
if "messages" not in st.session_state:
|
| 72 |
st.session_state.messages = []
|
| 73 |
+
# Add initial welcome message
|
| 74 |
+
initial_message = {"role": "assistant", "content": "Hi, I am Sora, I am your customer support agent."}
|
| 75 |
+
st.session_state.messages.append(initial_message)
|
| 76 |
|
| 77 |
# Display chat messages from history on app rerun
|
| 78 |
for message in st.session_state.messages:
|
|
|
|
| 90 |
# Generate and display chatbot response
|
| 91 |
with st.chat_message("assistant"):
|
| 92 |
message_placeholder = st.empty()
|
| 93 |
+
typing_placeholder = st.empty()
|
| 94 |
+
typing_placeholder.markdown("...")
|
| 95 |
full_response = ""
|
| 96 |
response = get_completion(prompt, merged_model, tokenizer)
|
| 97 |
+
typing_placeholder.empty()
|
| 98 |
|
| 99 |
# Simulate stream of responses with milliseconds delay
|
|
|
|
| 100 |
for chunk in response.split():
|
| 101 |
full_response += chunk + " "
|
| 102 |
time.sleep(0.05)
|