Spaces:
Runtime error
Runtime error
| import os | |
| os.environ["CHAINLIT_CONFIG_DIR"] = "/tmp/.chainlit" | |
| os.environ["CHAINLIT_FILES_PATH"] = "/tmp/.chainlit/files" | |
| import chainlit as cl | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| from langchain.document_loaders import TextLoader | |
| from langchain.text_splitter import CharacterTextSplitter | |
| from langchain.chains import RetrievalQA | |
| from langchain.prompts import PromptTemplate | |
| from langchain.llms import HuggingFacePipeline | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| import torch | |
| import os | |
| # === Step 1: Build or load Vectorstore === | |
| VECTORSTORE_DIR = "vectorstore" | |
| DATA_PATH = "data/textile_notes.txt" # Your textile documents path | |
| def build_vectorstore(): | |
| loader = TextLoader(DATA_PATH) | |
| documents = loader.load() | |
| splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
| chunks = splitter.split_documents(documents) | |
| embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5") | |
| db = FAISS.from_documents(chunks, embeddings) | |
| db.save_local(VECTORSTORE_DIR) | |
| return db | |
| def load_vectorstore(): | |
| embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5") | |
| return FAISS.load_local(VECTORSTORE_DIR, embeddings) | |
| # === Step 2: Load LLM and create QA chain === | |
| def load_qa_chain(): | |
| # Load local vectorstore if exists else build it | |
| if os.path.exists(VECTORSTORE_DIR): | |
| vectorstore = load_vectorstore() | |
| else: | |
| vectorstore = build_vectorstore() | |
| # Load open-source Mistral 7B Instruct model (small & free-ish) | |
| tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "mistralai/Mistral-7B-Instruct-v0.1", | |
| torch_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| max_new_tokens=512, | |
| do_sample=True, | |
| temperature=0.7, | |
| ) | |
| llm = HuggingFacePipeline(pipeline=pipe) | |
| # Prompt template for friendly, user-focused answers | |
| prompt_template = """ | |
| Answer the question using ONLY the context below. | |
| Be clear, helpful, and friendly. | |
| Context: | |
| {context} | |
| Question: | |
| {question} | |
| """ | |
| prompt = PromptTemplate( | |
| template=prompt_template, | |
| input_variables=["context", "question"] | |
| ) | |
| qa_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type="stuff", | |
| retriever=vectorstore.as_retriever(search_kwargs={"k": 4}), | |
| chain_type_kwargs={"prompt": prompt}, | |
| return_source_documents=True, | |
| ) | |
| return qa_chain | |
| # === Chainlit event handlers === | |
| async def on_chat_start(): | |
| qa = load_qa_chain() | |
| cl.user_session.set("qa_chain", qa) | |
| await cl.Message("π Hi! Ask me anything about textile β I'll answer using our custom documents.").send() | |
| async def on_message(message: cl.Message): | |
| qa = cl.user_session.get("qa_chain") | |
| answer = qa.run(message.content) | |
| await cl.Message(answer).send() | |