Spaces:
Sleeping
Sleeping
| from langchain_core.output_parsers import JsonOutputParser | |
| from langchain_core.runnables import RunnableLambda | |
| from langchain_core.prompts import PromptTemplate | |
| from langchain_openai import ChatOpenAI, OpenAIEmbeddings | |
| from pydantic import BaseModel, Field | |
| from langchain_community.document_loaders import PyMuPDFLoader | |
| from langchain_core.vectorstores import InMemoryVectorStore | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from operator import itemgetter | |
| from config import NEW_REFINE_SYSTEM_PROMPT_JSON | |
| class Answer(BaseModel): | |
| enhanced_question: str = Field(description="Paraphrased question") | |
| enhanced_answer: str = Field(description="Enhanced answer") | |
| class RefinementPipeline: | |
| def __init__(self, model: str = "gpt-4.1", temperature: float = 0.1): | |
| self.llm = ChatOpenAI(model=model, temperature=temperature) | |
| self.parser = JsonOutputParser(pydantic_object=Answer) | |
| self.prompt = PromptTemplate( | |
| template=NEW_REFINE_SYSTEM_PROMPT_JSON, | |
| input_variables=["question", "answer", "context"], | |
| partial_variables={"format_instructions": self.parser.get_format_instructions()}, | |
| ) | |
| # Load and process PDF | |
| self.pdf_loader = PyMuPDFLoader("refine.pdf") | |
| self.pdf_docs = self.pdf_loader.load() | |
| # Split the document into chunks | |
| self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200) | |
| self.splits = self.text_splitter.split_documents(self.pdf_docs) | |
| # Create an in-memory vector store from the document splits | |
| self.pdf_vectorstore = InMemoryVectorStore.from_documents( | |
| documents=self.splits, embedding=OpenAIEmbeddings() | |
| ) | |
| self.pdf_retriever = self.pdf_vectorstore.as_retriever() | |
| # Define the processing chain | |
| self.chain = ( | |
| { | |
| "context": itemgetter("question") | self.pdf_retriever, | |
| "question": itemgetter("question"), | |
| "answer": itemgetter("answer") | |
| } | |
| | RunnableLambda(lambda x: { | |
| "context": "\n".join([doc.page_content for doc in x["context"]]), | |
| "question": x["question"], | |
| "answer": x["answer"] | |
| }) | |
| | self.prompt | |
| | self.llm | |
| | self.parser | |
| ) | |
| def invoke(self, question: str, answer: str): | |
| return self.chain.invoke({"question": question, "answer": answer}) |