RoofingRoadmap / Refiner.py
mfallahian's picture
feat: ver 2.0
29f53db
raw
history blame
2.51 kB
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableLambda
from langchain_core.prompts import PromptTemplate
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from pydantic import BaseModel, Field
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_text_splitters import RecursiveCharacterTextSplitter
from operator import itemgetter
from config import NEW_REFINE_SYSTEM_PROMPT_JSON
class Answer(BaseModel):
enhanced_question: str = Field(description="Paraphrased question")
enhanced_answer: str = Field(description="Enhanced answer")
class RefinementPipeline:
def __init__(self, model: str = "gpt-4.1", temperature: float = 0.1):
self.llm = ChatOpenAI(model=model, temperature=temperature)
self.parser = JsonOutputParser(pydantic_object=Answer)
self.prompt = PromptTemplate(
template=NEW_REFINE_SYSTEM_PROMPT_JSON,
input_variables=["question", "answer", "context"],
partial_variables={"format_instructions": self.parser.get_format_instructions()},
)
# Load and process PDF
self.pdf_loader = PyMuPDFLoader("refine.pdf")
self.pdf_docs = self.pdf_loader.load()
# Split the document into chunks
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
self.splits = self.text_splitter.split_documents(self.pdf_docs)
# Create an in-memory vector store from the document splits
self.pdf_vectorstore = InMemoryVectorStore.from_documents(
documents=self.splits, embedding=OpenAIEmbeddings()
)
self.pdf_retriever = self.pdf_vectorstore.as_retriever()
# Define the processing chain
self.chain = (
{
"context": itemgetter("question") | self.pdf_retriever,
"question": itemgetter("question"),
"answer": itemgetter("answer")
}
| RunnableLambda(lambda x: {
"context": "\n".join([doc.page_content for doc in x["context"]]),
"question": x["question"],
"answer": x["answer"]
})
| self.prompt
| self.llm
| self.parser
)
def invoke(self, question: str, answer: str):
return self.chain.invoke({"question": question, "answer": answer})