|
|
|
|
|
""" |
|
|
Created on Mon Dec 30 22:20:13 2024 |
|
|
|
|
|
@author: BM109X32G-10GPU-02 |
|
|
""" |
|
|
from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationalRetrievalChain |
|
|
|
|
|
from langchain import PromptTemplate |
|
|
|
|
|
from langchain.tools import BaseTool |
|
|
import os |
|
|
from langchain_core.messages import HumanMessage, SystemMessage |
|
|
from langchain.base_language import BaseLanguageModel |
|
|
from langchain.text_splitter import CharacterTextSplitter |
|
|
|
|
|
|
|
|
from langchain_community.document_loaders import PyPDFLoader |
|
|
from langchain_community.vectorstores import FAISS |
|
|
from langchain_openai import ChatOpenAI |
|
|
from langchain_openai import OpenAIEmbeddings |
|
|
|
|
|
template = """ |
|
|
|
|
|
You are an expert chemist and your task is to respond to the question or |
|
|
solve the problem to the best of your ability. You need to answer in as much detail as possible. |
|
|
You can only respond with a single "Final Answer" format. |
|
|
Use the following pieces of context to answer the question at the end. |
|
|
If you don't know the answer, just say that you don't know, don't try to make up an answer. |
|
|
<context> |
|
|
{context} |
|
|
</context> |
|
|
|
|
|
Question: {question} |
|
|
Answer: |
|
|
|
|
|
""" |
|
|
|
|
|
class pdfreader(BaseTool): |
|
|
name: str = "pdfreader" |
|
|
description: str = ( |
|
|
|
|
|
"Used to read papers, summarize papers, Q&A based on papers, literature or publication" |
|
|
"Input query , return the response" |
|
|
) |
|
|
|
|
|
llm: BaseLanguageModel = None |
|
|
path : str = None |
|
|
return_direct: bool = True |
|
|
def __init__(self, path: str = None): |
|
|
super().__init__( ) |
|
|
self.llm = ChatOpenAI(model="gpt-4o-2024-11-20",api_key=os.getenv("OPENAI_API_KEY"), |
|
|
base_url=os.getenv("OPENAI_API_BASE")) |
|
|
self.path = path |
|
|
|
|
|
|
|
|
def _run(self, query ) -> str: |
|
|
|
|
|
loader = PyPDFLoader(self.path) |
|
|
documents = loader.load() |
|
|
|
|
|
text_splitter = CharacterTextSplitter(chunk_size=6000, chunk_overlap=1000) |
|
|
docs = text_splitter.split_documents(documents) |
|
|
embeddings = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"), |
|
|
base_url=os.getenv("OPENAI_API_BASE")) |
|
|
|
|
|
|
|
|
vectorstore = FAISS.from_documents(docs, embeddings) |
|
|
prompt = PromptTemplate(template=template, input_variables=[ "question"]) |
|
|
qa_chain = RetrievalQA.from_chain_type( |
|
|
llm= self.llm, |
|
|
chain_type="stuff", |
|
|
retriever=vectorstore.as_retriever(search_kwargs={"k": 2}), |
|
|
return_source_documents=True, |
|
|
chain_type_kwargs={"prompt": prompt}, |
|
|
) |
|
|
|
|
|
result = qa_chain.invoke(query) |
|
|
return result['result'] |
|
|
|
|
|
|
|
|
async def _arun(self, query) -> str: |
|
|
"""Use the tool asynchronously.""" |
|
|
raise NotImplementedError("this tool does not support async") |
|
|
|
|
|
|