Spaces:

AgroVision25
/

Textile_Rag_Chatbot

Runtime error

App Files Files Community

Textile_Rag_Chatbot / app.py

navjotk

Update app.py

48caab1 verified 8 months ago

raw

history blame contribute delete

3.1 kB

	import os
	os.environ["CHAINLIT_CONFIG_DIR"] = "/tmp/.chainlit"
	os.environ["CHAINLIT_FILES_PATH"] = "/tmp/.chainlit/files"


	import chainlit as cl
	from langchain.vectorstores import FAISS
	from langchain.embeddings import HuggingFaceEmbeddings
	from langchain.document_loaders import TextLoader
	from langchain.text_splitter import CharacterTextSplitter
	from langchain.chains import RetrievalQA
	from langchain.prompts import PromptTemplate
	from langchain.llms import HuggingFacePipeline
	from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
	import torch
	import os

	# === Step 1: Build or load Vectorstore ===

	VECTORSTORE_DIR = "vectorstore"
	DATA_PATH = "data/textile_notes.txt" # Your textile documents path

	def build_vectorstore():
	loader = TextLoader(DATA_PATH)
	documents = loader.load()
	splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
	chunks = splitter.split_documents(documents)
	embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

	db = FAISS.from_documents(chunks, embeddings)
	db.save_local(VECTORSTORE_DIR)
	return db

	def load_vectorstore():
	embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")
	return FAISS.load_local(VECTORSTORE_DIR, embeddings)

	# === Step 2: Load LLM and create QA chain ===

	def load_qa_chain():
	# Load local vectorstore if exists else build it
	if os.path.exists(VECTORSTORE_DIR):
	vectorstore = load_vectorstore()
	else:
	vectorstore = build_vectorstore()

	# Load open-source Mistral 7B Instruct model (small & free-ish)
	tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")
	model = AutoModelForCausalLM.from_pretrained(
	"mistralai/Mistral-7B-Instruct-v0.1",
	torch_dtype=torch.float16,
	device_map="auto"
	)

	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=512,
	do_sample=True,
	temperature=0.7,
	)
	llm = HuggingFacePipeline(pipeline=pipe)

	# Prompt template for friendly, user-focused answers
	prompt_template = """
	Answer the question using ONLY the context below.
	Be clear, helpful, and friendly.

	Context:
	{context}

	Question:
	{question}
	"""

	prompt = PromptTemplate(
	template=prompt_template,
	input_variables=["context", "question"]
	)

	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=vectorstore.as_retriever(search_kwargs={"k": 4}),
	chain_type_kwargs={"prompt": prompt},
	return_source_documents=True,
	)

	return qa_chain

	# === Chainlit event handlers ===

	@cl.on_chat_start
	async def on_chat_start():
	qa = load_qa_chain()
	cl.user_session.set("qa_chain", qa)
	await cl.Message("👋 Hi! Ask me anything about textile — I'll answer using our custom documents.").send()

	@cl.on_message
	async def on_message(message: cl.Message):
	qa = cl.user_session.get("qa_chain")
	answer = qa.run(message.content)
	await cl.Message(answer).send()