customercare / app.py
Prajjwalng's picture
Update app.py
e3cc7e5 verified
import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import os
from huggingface_hub import login
from peft import PeftModel, PeftConfig
import time
# Login with HF_TOKEN (if available)
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
try:
login(token=hf_token, add_to_git_credential=False)
st.success("Hugging Face login successful!")
except Exception as e:
st.error(f"Hugging Face login failed: {e}")
else:
st.warning("HF_TOKEN environment variable not set. Some features may be limited.")
# Model and Adapter Configuration
model_id = "Prajjwalng/gemma_customer_care" # Base model
adapter_id = "Prajjwalng/gemma_customercare_adapters" # adapter model
# Initialize model and tokenizer (load only once)
@st.cache_resource
def load_model(model_id):
base_model = AutoModelForCausalLM.from_pretrained(
model_id,
low_cpu_mem_usage=True,
return_dict=True,
torch_dtype=torch.float16,
device_map={"": 0} if torch.cuda.is_available() else "cpu"
)
tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)
return base_model, tokenizer
merged_model, tokenizer = load_model(model_id)
# Function to generate chatbot response using the provided template
def get_completion(query: str, model, tokenizer) -> str:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
prompt_template = f"""
<start_of_turn>system You are a support chatbot who helps with user queries chatbot who always responds in the style of a professional.\n<end_of_turn>
<start_of_turn>user
{query}
<end_of_turn>
<start_of_turn>model
"""
prompt = prompt_template.format(query=query)
encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
model_inputs = encodeds.to(device)
model.to(device)
generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
model_response = decoded.split("model\n")[-1].strip()
return model_response
# Streamlit app
st.title("Customer Care ChatBot")
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Add initial welcome message
initial_message = {"role": "assistant", "content": "Hi, I am Sora, I am your customer support agent."}
st.session_state.messages.append(initial_message)
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# Accept user input
if prompt := st.chat_input("How can I help you?"):
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
# Display user message in chat message container
with st.chat_message("user"):
st.markdown(prompt)
# Generate and display chatbot response
with st.chat_message("assistant"):
message_placeholder = st.empty()
typing_placeholder = st.empty()
typing_dots = "" # Initialize empty string for typing dots
# Animate typing dots
for i in range(3):
typing_dots += "."
typing_placeholder.markdown(typing_dots)
time.sleep(0.3) # Adjust speed as needed
typing_placeholder.empty() # Clear typing dots
full_response = ""
response = get_completion(prompt, merged_model, tokenizer)
# Simulate stream of responses with milliseconds delay
for chunk in response.split():
full_response += chunk + " "
time.sleep(0.05)
# Add a placeholder to stream the response
message_placeholder.markdown(full_response + "β–Œ")
message_placeholder.markdown(full_response)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": full_response})