# -*- coding: utf-8 -*- """ Created on Mon Dec 30 22:20:13 2024 @author: BM109X32G-10GPU-02 """ from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationalRetrievalChain from langchain import PromptTemplate from langchain.tools import BaseTool import os from langchain_core.messages import HumanMessage, SystemMessage from langchain.base_language import BaseLanguageModel from langchain.text_splitter import CharacterTextSplitter from langchain_community.document_loaders import PyPDFLoader from langchain_community.vectorstores import FAISS from langchain_openai import ChatOpenAI from langchain_openai import OpenAIEmbeddings template = """ You are an expert chemist and your task is to respond to the question or solve the problem to the best of your ability. You need to answer in as much detail as possible. You can only respond with a single "Final Answer" format. Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. {context} Question: {question} Answer: """ class pdfreader(BaseTool): name: str = "pdfreader" description: str = ( "Used to read papers, summarize papers, Q&A based on papers, literature or publication" "Input query , return the response" ) llm: BaseLanguageModel = None path : str = None return_direct: bool = True def __init__(self, path: str = None): super().__init__( ) self.llm = ChatOpenAI(model="gpt-4o-2024-11-20",api_key=os.getenv("OPENAI_API_KEY"), base_url=os.getenv("OPENAI_API_BASE")) self.path = path # api keys def _run(self, query ) -> str: loader = PyPDFLoader(self.path) documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=6000, chunk_overlap=1000) docs = text_splitter.split_documents(documents) embeddings = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"), base_url=os.getenv("OPENAI_API_BASE")) vectorstore = FAISS.from_documents(docs, embeddings) prompt = PromptTemplate(template=template, input_variables=[ "question"]) qa_chain = RetrievalQA.from_chain_type( llm= self.llm, chain_type="stuff", retriever=vectorstore.as_retriever(search_kwargs={"k": 2}), return_source_documents=True, chain_type_kwargs={"prompt": prompt}, ) result = qa_chain.invoke(query) return result['result'] async def _arun(self, query) -> str: """Use the tool asynchronously.""" raise NotImplementedError("this tool does not support async")