jinysun commited on
Commit
7854f0d
·
verified ·
1 Parent(s): f51746a

Update tool/pdfreader.py

Browse files
Files changed (1) hide show
  1. tool/pdfreader.py +85 -85
tool/pdfreader.py CHANGED
@@ -1,86 +1,86 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Created on Mon Dec 30 22:20:13 2024
4
-
5
- @author: BM109X32G-10GPU-02
6
- """
7
- from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationalRetrievalChain
8
-
9
- from langchain import PromptTemplate
10
-
11
- from langchain.tools import BaseTool
12
-
13
- from langchain_core.messages import HumanMessage, SystemMessage
14
- from langchain.base_language import BaseLanguageModel
15
- from langchain.text_splitter import CharacterTextSplitter
16
-
17
-
18
- from langchain_community.document_loaders import PyPDFLoader
19
- from langchain_community.vectorstores import FAISS
20
- from langchain_openai import ChatOpenAI
21
- from langchain_openai import OpenAIEmbeddings
22
-
23
- template = """
24
-
25
- You are an expert chemist and your task is to respond to the question or
26
- solve the problem to the best of your ability. You need to answer in as much detail as possible.
27
- You can only respond with a single "Final Answer" format.
28
- Use the following pieces of context to answer the question at the end.
29
- If you don't know the answer, just say that you don't know, don't try to make up an answer.
30
- <context>
31
- {context}
32
- </context>
33
-
34
- Question: {question}
35
- Answer:
36
-
37
- """
38
-
39
- class pdfreader(BaseTool):
40
- name: str = "pdfreader"
41
- description: str = (
42
-
43
- "Used to read papers, summarize papers, Q&A based on papers, literature or publication"
44
- "Input query , return the response"
45
- )
46
-
47
- llm: BaseLanguageModel = None
48
- path : str = None
49
- return_direct: bool = True
50
- def __init__(self, path: str = None):
51
- super().__init__( )
52
- self.llm = ChatOpenAI(model="gpt-4o-2024-11-20",api_key='sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik',
53
- base_url="https://www.dmxapi.com/v1")
54
- self.path = path
55
- # api keys
56
-
57
- def _run(self, query ) -> str:
58
-
59
- loader = PyPDFLoader(self.path)
60
- documents = loader.load()
61
-
62
- text_splitter = CharacterTextSplitter(chunk_size=6000, chunk_overlap=1000)
63
- docs = text_splitter.split_documents(documents)
64
- embeddings = OpenAIEmbeddings(api_key='sk-itPrztYm9F6XZZpsBMJB9O7Vq0pYUABVVBSoThuBxEGTnDik',
65
- base_url="https://www.dmxapi.com/v1")
66
-
67
-
68
- vectorstore = FAISS.from_documents(docs, embeddings)
69
- prompt = PromptTemplate(template=template, input_variables=[ "question"])
70
- qa_chain = RetrievalQA.from_chain_type(
71
- llm= self.llm,
72
- chain_type="stuff",
73
- retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
74
- return_source_documents=True,
75
- chain_type_kwargs={"prompt": prompt},
76
- )
77
-
78
- result = qa_chain.invoke(query)
79
- return result['result']
80
-
81
-
82
- async def _arun(self, query) -> str:
83
- """Use the tool asynchronously."""
84
- raise NotImplementedError("this tool does not support async")
85
-
86
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Dec 30 22:20:13 2024
4
+
5
+ @author: BM109X32G-10GPU-02
6
+ """
7
+ from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationalRetrievalChain
8
+
9
+ from langchain import PromptTemplate
10
+
11
+ from langchain.tools import BaseTool
12
+ import os
13
+ from langchain_core.messages import HumanMessage, SystemMessage
14
+ from langchain.base_language import BaseLanguageModel
15
+ from langchain.text_splitter import CharacterTextSplitter
16
+
17
+
18
+ from langchain_community.document_loaders import PyPDFLoader
19
+ from langchain_community.vectorstores import FAISS
20
+ from langchain_openai import ChatOpenAI
21
+ from langchain_openai import OpenAIEmbeddings
22
+
23
+ template = """
24
+
25
+ You are an expert chemist and your task is to respond to the question or
26
+ solve the problem to the best of your ability. You need to answer in as much detail as possible.
27
+ You can only respond with a single "Final Answer" format.
28
+ Use the following pieces of context to answer the question at the end.
29
+ If you don't know the answer, just say that you don't know, don't try to make up an answer.
30
+ <context>
31
+ {context}
32
+ </context>
33
+
34
+ Question: {question}
35
+ Answer:
36
+
37
+ """
38
+
39
+ class pdfreader(BaseTool):
40
+ name: str = "pdfreader"
41
+ description: str = (
42
+
43
+ "Used to read papers, summarize papers, Q&A based on papers, literature or publication"
44
+ "Input query , return the response"
45
+ )
46
+
47
+ llm: BaseLanguageModel = None
48
+ path : str = None
49
+ return_direct: bool = True
50
+ def __init__(self, path: str = None):
51
+ super().__init__( )
52
+ self.llm = ChatOpenAI(model="gpt-4o-2024-11-20",api_key=os.getenv("OPENAI_API_KEY"),
53
+ base_url=os.getenv("OPENAI_API_BASE"))
54
+ self.path = path
55
+ # api keys
56
+
57
+ def _run(self, query ) -> str:
58
+
59
+ loader = PyPDFLoader(self.path)
60
+ documents = loader.load()
61
+
62
+ text_splitter = CharacterTextSplitter(chunk_size=6000, chunk_overlap=1000)
63
+ docs = text_splitter.split_documents(documents)
64
+ embeddings = OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"),
65
+ base_url=os.getenv("OPENAI_API_BASE"))
66
+
67
+
68
+ vectorstore = FAISS.from_documents(docs, embeddings)
69
+ prompt = PromptTemplate(template=template, input_variables=[ "question"])
70
+ qa_chain = RetrievalQA.from_chain_type(
71
+ llm= self.llm,
72
+ chain_type="stuff",
73
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
74
+ return_source_documents=True,
75
+ chain_type_kwargs={"prompt": prompt},
76
+ )
77
+
78
+ result = qa_chain.invoke(query)
79
+ return result['result']
80
+
81
+
82
+ async def _arun(self, query) -> str:
83
+ """Use the tool asynchronously."""
84
+ raise NotImplementedError("this tool does not support async")
85
+
86