Spaces:

pratikshahp
/

RAG-Chatbot

Runtime error

App Files Files Community

RAG-Chatbot / app.py

pratikshahp

Update app.py

11d58e9 verified almost 2 years ago

raw

history blame contribute delete

1.95 kB

	import streamlit as st
	import torch
	from transformers import BitsAndBytesConfig
	from llama_index.llms.huggingface import HuggingFaceLLM

	# Function to convert messages to prompt
	def messages_to_prompt(messages):
	prompt = ""
	for message in messages:
	if message.role == 'system':
	prompt += f"\n{message.content}</s>\n"
	elif message.role == 'user':
	prompt += f"\n{message.content}</s>\n"
	elif message.role == 'assistant':
	prompt += f"\n{message.content}</s>\n"

	# ensure we start with a system prompt, insert blank if needed
	if not prompt.startswith("\n"):
	prompt = "\n</s>\n" + prompt

	# add final assistant prompt
	prompt = prompt + "\n"

	return prompt

	# Function to convert completion to prompt
	def completion_to_prompt(completion):
	return f"\n</s>\n\n{completion}</s>\n\n"

	# Load the LLM without quantization
	@st.cache_resource
	def load_llm():
	return HuggingFaceLLM(
	model_name="HuggingFaceH4/zephyr-7b-beta",
	tokenizer_name="HuggingFaceH4/zephyr-7b-beta",
	context_window=3900,
	max_new_tokens=256,
	generate_kwargs={"temperature": 0.7, "top_k": 50, "top_p": 0.95},
	messages_to_prompt=messages_to_prompt,
	completion_to_prompt=completion_to_prompt,
	device_map="cpu" # Use CPU
	)

	llm = load_llm()

	# Streamlit app interface
	st.title("LLM Text Generation App")

	# Text input for the prompt
	user_input = st.text_area("Enter your prompt:", "")

	# Button to generate response
	if st.button("Generate Response"):
	if user_input.strip() != "":
	# Generate response based on the prompt
	with st.spinner("Generating response..."):
	response = llm.complete(user_input)

	# Display the generated response
	st.write("Generated Response:")
	st.write(str(response))
	else:
	st.warning("Please enter a valid prompt.")