# -*- coding: utf-8 -*-
"""
Created on Mon Dec 30 22:20:13 2024

@author: BM109X32G-10GPU-02
"""
from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationalRetrievalChain

from langchain import PromptTemplate 
 
from langchain.tools import BaseTool
import os
from langchain_core.messages import HumanMessage, SystemMessage
from langchain.base_language import BaseLanguageModel
from langchain.text_splitter import CharacterTextSplitter
 
 
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings

template = """

        You are an expert chemist and your task is to respond to the question or
        solve the problem to the best of your ability. You need to answer in as much detail as possible.
        You can only respond with a single "Final Answer" format.
        Use the following pieces of context to answer the question at the end. 
        If you don't know the answer, just say that you don't know, don't try to make up an answer.
        <context>
        {context}
        </context>

        Question: {question}
        Answer: 

        """
        
class pdfreader(BaseTool):
    name: str = "pdfreader"
    description: str = (

        "Used to read papers, summarize papers, Q&A based on papers, literature or publication"
        "Input query , return the response"
    )

    llm: BaseLanguageModel = None
    path : str = None 
    return_direct: bool = True
    def __init__(self, path: str = None):
        super().__init__(  )
        self.llm =  ChatOpenAI(model="gpt-4o-2024-11-20",api_key=os.getenv("OPENAI_API_KEY"),
             base_url=os.getenv("OPENAI_API_BASE"))
        self.path = path
        # api keys

    def _run(self, query ) -> str:
       
        loader = PyPDFLoader(self.path)  
        documents = loader.load()  
        
        text_splitter = CharacterTextSplitter(chunk_size=6000, chunk_overlap=1000)
        docs = text_splitter.split_documents(documents) 
        embeddings =  OpenAIEmbeddings(api_key=os.getenv("OPENAI_API_KEY"),
             base_url=os.getenv("OPENAI_API_BASE"))

       
        vectorstore = FAISS.from_documents(docs, embeddings)
        prompt = PromptTemplate(template=template, input_variables=[ "question"])
        qa_chain = RetrievalQA.from_chain_type(
            llm= self.llm,
            chain_type="stuff",
            retriever=vectorstore.as_retriever(search_kwargs={"k": 2}),
            return_source_documents=True,
           chain_type_kwargs={"prompt": prompt},
        )
         
        result = qa_chain.invoke(query)
        return result['result']
        
 
    async def _arun(self, query) -> str:
        """Use the tool asynchronously."""
        raise NotImplementedError("this tool does not support async")