Spaces:

jinysun
/

TeLLAgent

Running

App Files Files Community

TeLLAgent / tool /pdfreader.py

jinysun

Update tool/pdfreader.py

7854f0d verified 6 months ago

raw

history blame contribute delete

2.91 kB

	# -- coding: utf-8 --
	"""
	Created on Mon Dec 30 22:20:13 2024

	@author: BM109X32G-10GPU-02
	"""
	from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationalRetrievalChain

	from langchain import PromptTemplate

	from langchain.tools import BaseTool
	import os
	from langchain_core.messages import HumanMessage, SystemMessage
	from langchain.base_language import BaseLanguageModel
	from langchain.text_splitter import CharacterTextSplitter


	from langchain_community.document_loaders import PyPDFLoader
	from langchain_community.vectorstores import FAISS
	from langchain_openai import ChatOpenAI
	from langchain_openai import OpenAIEmbeddings

	template = """

	You are an expert chemist and your task is to respond to the question or
	solve the problem to the best of your ability. You need to answer in as much detail as possible.
	You can only respond with a single "Final Answer" format.
	Use the following pieces of context to answer the question at the end.
	If you don't know the answer, just say that you don't know, don't try to make up an answer.
	<context>
	{context}
	</context>

	Question: {question}
	Answer:

	"""

	class pdfreader(BaseTool):
	name: str = "pdfreader"
	description: str = (

	"Used to read papers, summarize papers, Q&A based on papers, literature or publication"
	"Input query , return the response"
	)

	llm: BaseLanguageModel = None
	path : str = None
	return_direct: bool = True
	def __init__(self, path: str = None):
	super().__init__( )
	self.llm = ChatOpenAI(model="gpt-4o-2024-11-20",api_key=os.getenv("OPENAI_API_KEY"),
	base_url=os.getenv("OPENAI_API_BASE"))
	self.path = path
	# api keys

	def _run(self, query ) -> str:

	loader = PyPDFLoader(self.path)
	documents = loader.load()

	text_splitter = CharacterTextSplitter(chunk_size=6000, chunk_overlap=1000)
	docs = text_splitter.split_documents(documents)
	embeddings = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"),
	base_url=os.getenv("OPENAI_API_BASE"))


	vectorstore = FAISS.from_documents(docs, embeddings)
	prompt = PromptTemplate(template=template, input_variables=[ "question"])
	qa_chain = RetrievalQA.from_chain_type(
	llm= self.llm,
	chain_type="stuff",
	retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
	return_source_documents=True,
	chain_type_kwargs={"prompt": prompt},
	)

	result = qa_chain.invoke(query)
	return result['result']


	async def _arun(self, query) -> str:
	"""Use the tool asynchronously."""
	raise NotImplementedError("this tool does not support async")