Spaces:

pkraman06
/

Question_Answering_system

Running

App Files Files Community

Question_Answering_system / src /model_processor.py

pkraman06

Update src/model_processor.py

6533cf9 verified 5 days ago

raw

history blame contribute delete

2.43 kB

	import torch
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
	from langchain_community.vectorstores import Chroma
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

	class LlamaProcessor:
	def __init__(self, model_id, hf_token):
	self.model_id = model_id
	self.hf_token = hf_token

	# Initialize the pipeline exactly as you requested
	self.pipe = pipeline(
	"text-generation",
	model=model_id,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	token=hf_token
	)

	# LangChain wrapper for the pipeline
	self.llm = HuggingFacePipeline(pipeline=self.pipe)
	self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

	def process_pdf(self, file_path):
	loader = PyPDFLoader(file_path)
	docs = loader.load()

	text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
	splits = text_splitter.split_documents(docs)

	vectorstore = Chroma.from_documents(documents=splits, embedding=self.embeddings)
	return vectorstore

	def get_answer(self, query, vectorstore):
	# 1. Retrieve context
	relevant_docs = vectorstore.similarity_search(query, k=3)
	context = "\n".join([doc.page_content for doc in relevant_docs])

	# 2. Format as Llama 3.2 messages (Chat Template)
	messages = [
	{
	"role": "system",
	"content": "You are a helpful assistant. Use the provided context to answer the user's question."
	},
	{
	"role": "user",
	"content": f"Context: {context}\n\nQuestion: {query}"
	},
	]

	# 3. Generate response using the pipeline's built-in chat handling
	# We call the pipeline directly to ensure the chat template is applied correctly
	outputs = self.pipe(
	messages,
	max_new_tokens=256,
	temperature=0.1,
	do_sample=True
	)

	# 4. Extract the content of the last message (the assistant's reply)
	return outputs[0]["generated_text"][-1]["content"]