Question_Answering_system / src /model_processor.py
pkraman06's picture
Update src/model_processor.py
6533cf9 verified
import torch
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFacePipeline
from langchain_community.vectorstores import Chroma
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
class LlamaProcessor:
def __init__(self, model_id, hf_token):
self.model_id = model_id
self.hf_token = hf_token
# Initialize the pipeline exactly as you requested
self.pipe = pipeline(
"text-generation",
model=model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
token=hf_token
)
# LangChain wrapper for the pipeline
self.llm = HuggingFacePipeline(pipeline=self.pipe)
self.embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
def process_pdf(self, file_path):
loader = PyPDFLoader(file_path)
docs = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=100)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(documents=splits, embedding=self.embeddings)
return vectorstore
def get_answer(self, query, vectorstore):
# 1. Retrieve context
relevant_docs = vectorstore.similarity_search(query, k=3)
context = "\n".join([doc.page_content for doc in relevant_docs])
# 2. Format as Llama 3.2 messages (Chat Template)
messages = [
{
"role": "system",
"content": "You are a helpful assistant. Use the provided context to answer the user's question."
},
{
"role": "user",
"content": f"Context: {context}\n\nQuestion: {query}"
},
]
# 3. Generate response using the pipeline's built-in chat handling
# We call the pipeline directly to ensure the chat template is applied correctly
outputs = self.pipe(
messages,
max_new_tokens=256,
temperature=0.1,
do_sample=True
)
# 4. Extract the content of the last message (the assistant's reply)
return outputs[0]["generated_text"][-1]["content"]