Spaces:
Runtime error
Runtime error
| import os | |
| import torch | |
| from transformers import AutoTokenizer | |
| from petals import AutoDistributedModelForCausalLM | |
| import chainlit as cl | |
| from huggingface_hub import login | |
| from dotenv import load_dotenv | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| from langchain import PromptTemplate | |
| # Load environment variables from .env file | |
| load_dotenv() | |
| # Retrieve Hugging Face token from environment variables | |
| hugging_face_token = os.getenv("HUGGINGFACE_TOKEN") | |
| DB_FAISS_PATH = 'vectorstore/db_faiss' | |
| # Login with Hugging Face token | |
| login(token=hugging_face_token) | |
| # Load SentenceEncoder model | |
| def load_vector_store(): | |
| embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", | |
| model_kwargs={'device': 'cpu'}) | |
| db = FAISS.load_local(DB_FAISS_PATH, embeddings) | |
| return db | |
| # Loading the model | |
| def load_llm(): | |
| model_name = "meta-llama/Llama-2-70b-chat-hf" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False, add_bos_token=False) | |
| model = AutoDistributedModelForCausalLM.from_pretrained(model_name,torch_dtype=torch.float32) | |
| model.to('cpu') | |
| return model, tokenizer | |
| # QA Model Function | |
| def qa_bot(): | |
| model, tokenizer = load_llm() | |
| return model, tokenizer | |
| # Initialize conversational history | |
| conversational_history = [] | |
| # chainlit code | |
| async def start(): | |
| model, tokenizer = qa_bot() | |
| msg = cl.Message(content="Starting the bot...") | |
| await msg.send() | |
| msg.content = "Hi, Welcome to HealsMindAI. What is your query?" | |
| await msg.update() | |
| cl.user_session.set("model", model) | |
| cl.user_session.set("tokenizer", tokenizer) | |
| cl.user_session.set("history", conversational_history) | |
| async def main(message): | |
| model = cl.user_session.get("model") | |
| tokenizer = cl.user_session.get("tokenizer") | |
| history = cl.user_session.get("history") | |
| msg = cl.Message(content="") | |
| print("The msg obj:") | |
| print(msg) | |
| print("The message obj:") | |
| print(message) | |
| print("The msg content:") | |
| print(msg.content) | |
| print("the end") | |
| # Use the history to provide context for the query | |
| query_with_history = " ".join(history + [message]) | |
| custom_prompt_template = """Use the following pieces of information to answer the user's question. | |
| If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
| Context: {} | |
| Question: {} | |
| Only return the helpful answer below and nothing else. | |
| Helpful answer: | |
| """.format(query_with_history, message) | |
| # Generate text using the LLM model and the custom prompt | |
| max_generated_length = 7000 # Desired length of the generated text | |
| total_prefix_length = len(custom_prompt_template.split()) | |
| max_length = total_prefix_length + max_generated_length | |
| generated_output = model.generate(tokenizer.encode(custom_prompt_template, return_tensors="pt"),max_length=max_length,num_return_sequences=1) | |
| # Convert generated output to text using the tokenizer | |
| decoded_output = tokenizer.decode(generated_output[0], skip_special_tokens=True) | |
| # Update conversational history | |
| # history.append(msg.content) | |
| history.append(decoded_output) | |
| cl.user_session.set("history", history) | |
| await cl.Message(content=decoded_output).send() | |