RAG-Chatbot / app.py
pratikshahp's picture
Update app.py
11d58e9 verified
import streamlit as st
import torch
from transformers import BitsAndBytesConfig
from llama_index.llms.huggingface import HuggingFaceLLM
# Function to convert messages to prompt
def messages_to_prompt(messages):
prompt = ""
for message in messages:
if message.role == 'system':
prompt += f"\n{message.content}</s>\n"
elif message.role == 'user':
prompt += f"\n{message.content}</s>\n"
elif message.role == 'assistant':
prompt += f"\n{message.content}</s>\n"
# ensure we start with a system prompt, insert blank if needed
if not prompt.startswith("\n"):
prompt = "\n</s>\n" + prompt
# add final assistant prompt
prompt = prompt + "\n"
return prompt
# Function to convert completion to prompt
def completion_to_prompt(completion):
return f"\n</s>\n\n{completion}</s>\n\n"
# Load the LLM without quantization
@st.cache_resource
def load_llm():
return HuggingFaceLLM(
model_name="HuggingFaceH4/zephyr-7b-beta",
tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
context_window=3900,
max_new_tokens=256,
generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
messages_to_prompt=messages_to_prompt,
completion_to_prompt=completion_to_prompt,
device_map="cpu" # Use CPU
)
llm = load_llm()
# Streamlit app interface
st.title("LLM Text Generation App")
# Text input for the prompt
user_input = st.text_area("Enter your prompt:", "")
# Button to generate response
if st.button("Generate Response"):
if user_input.strip() != "":
# Generate response based on the prompt
with st.spinner("Generating response..."):
response = llm.complete(user_input)
# Display the generated response
st.write("Generated Response:")
st.write(str(response))
else:
st.warning("Please enter a valid prompt.")