HealsmindAIPetals

Runtime error

App Files Files Community

HealsmindAIPetals / model.py

lavanjv

Update model.py

32da5f6 over 2 years ago

raw

history blame contribute delete

3.37 kB

	import os
	import torch
	from transformers import AutoTokenizer
	from petals import AutoDistributedModelForCausalLM
	import chainlit as cl
	from huggingface_hub import login
	from dotenv import load_dotenv
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.vectorstores import FAISS
	from langchain import PromptTemplate

	# Load environment variables from .env file
	load_dotenv()

	# Retrieve Hugging Face token from environment variables
	hugging_face_token = os.getenv("HUGGINGFACE_TOKEN")

	DB_FAISS_PATH = 'vectorstore/db_faiss'

	# Login with Hugging Face token
	login(token=hugging_face_token)

	# Load SentenceEncoder model
	def load_vector_store():
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2",
	model_kwargs={'device': 'cpu'})
	db = FAISS.load_local(DB_FAISS_PATH, embeddings)
	return db

	# Loading the model
	def load_llm():
	model_name = "meta-llama/Llama-2-70b-chat-hf"
	tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, add_bos_token=False)
	model = AutoDistributedModelForCausalLM.from_pretrained(model_name,torch_dtype=torch.float32)
	model.to('cpu')
	return model, tokenizer

	# QA Model Function
	def qa_bot():
	model, tokenizer = load_llm()
	return model, tokenizer

	# Initialize conversational history
	conversational_history = []

	# chainlit code
	@cl.on_chat_start
	async def start():
	model, tokenizer = qa_bot()
	msg = cl.Message(content="Starting the bot...")
	await msg.send()
	msg.content = "Hi, Welcome to HealsMindAI. What is your query?"
	await msg.update()

	cl.user_session.set("model", model)
	cl.user_session.set("tokenizer", tokenizer)
	cl.user_session.set("history", conversational_history)

	@cl.on_message
	async def main(message):
	model = cl.user_session.get("model")
	tokenizer = cl.user_session.get("tokenizer")
	history = cl.user_session.get("history")
	msg = cl.Message(content="")
	print("The msg obj:")
	print(msg)
	print("The message obj:")
	print(message)
	print("The msg content:")
	print(msg.content)
	print("the end")
	# Use the history to provide context for the query
	query_with_history = " ".join(history + [message])

	custom_prompt_template = """Use the following pieces of information to answer the user's question.
	If you don't know the answer, just say that you don't know, don't try to make up an answer.

	Context: {}
	Question: {}

	Only return the helpful answer below and nothing else.
	Helpful answer:
	""".format(query_with_history, message)
	# Generate text using the LLM model and the custom prompt
	max_generated_length = 7000 # Desired length of the generated text
	total_prefix_length = len(custom_prompt_template.split())
	max_length = total_prefix_length + max_generated_length

	generated_output = model.generate(tokenizer.encode(custom_prompt_template, return_tensors="pt"),max_length=max_length,num_return_sequences=1)

	# Convert generated output to text using the tokenizer
	decoded_output = tokenizer.decode(generated_output[0], skip_special_tokens=True)

	# Update conversational history
	# history.append(msg.content)
	history.append(decoded_output)
	cl.user_session.set("history", history)

	await cl.Message(content=decoded_output).send()