TeLLAgent / tool /pdfreader.py
jinysun's picture
Update tool/pdfreader.py
7854f0d verified
# -*- coding: utf-8 -*-
"""
Created on Mon Dec 30 22:20:13 2024
@author: BM109X32G-10GPU-02
"""
from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationalRetrievalChain
from langchain import PromptTemplate
from langchain.tools import BaseTool
import os
from langchain_core.messages import HumanMessage, SystemMessage
from langchain.base_language import BaseLanguageModel
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
template = """
You are an expert chemist and your task is to respond to the question or
solve the problem to the best of your ability. You need to answer in as much detail as possible.
You can only respond with a single "Final Answer" format.
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
<context>
{context}
</context>
Question: {question}
Answer:
"""
class pdfreader(BaseTool):
name: str = "pdfreader"
description: str = (
"Used to read papers, summarize papers, Q&A based on papers, literature or publication"
"Input query , return the response"
)
llm: BaseLanguageModel = None
path : str = None
return_direct: bool = True
def __init__(self, path: str = None):
super().__init__( )
self.llm = ChatOpenAI(model="gpt-4o-2024-11-20",api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_API_BASE"))
self.path = path
# api keys
def _run(self, query ) -> str:
loader = PyPDFLoader(self.path)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=6000, chunk_overlap=1000)
docs = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_API_BASE"))
vectorstore = FAISS.from_documents(docs, embeddings)
prompt = PromptTemplate(template=template, input_variables=[ "question"])
qa_chain = RetrievalQA.from_chain_type(
llm= self.llm,
chain_type="stuff",
retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
return_source_documents=True,
chain_type_kwargs={"prompt": prompt},
)
result = qa_chain.invoke(query)
return result['result']
async def _arun(self, query) -> str:
"""Use the tool asynchronously."""
raise NotImplementedError("this tool does not support async")