Spaces:

esystems
/

ACReSAL

Sleeping

File size: 3,940 Bytes

import os
import tempfile
import gradio as gr
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.retrieval_qa.base import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import Chroma
from langchain.llms.base import LLM
from groq import Groq
from typing import Any, List, Optional

# Set up Groq client
GROQ_API_KEY = "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"
os.environ["GROQ_API_KEY"] = GROQ_API_KEY
groq_client = Groq(api_key=GROQ_API_KEY)

# Custom LLM class for Groq
class GroqLLM(LLM):
    client: Any
    model: str = "llama3-8b-8192"

    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
        chat_completion = self.client.chat.completions.create(
            messages=[{"role": "user", "content": prompt}],
            model=self.model,
        )
        return chat_completion.choices[0].message.content

    @property
    def _llm_type(self) -> str:
        return "groq"

# Initialize GroqLLM
llm = GroqLLM(client=groq_client)

# Custom prompt template
template = """You are ACReSAL AI Chatbot, a direct and concise assistant. A large language model trained by Brilliant Esystems Limited. Answer the question using only the information provided in the context. Give only the specific answer requested.

Context: {context}

Question: {question}

Answer:"""

PROMPT = PromptTemplate(
    template=template, input_variables=["context", "question"]
)

class PDFQuestionAnswering:
    def __init__(self):
        self.qa_system = None

    def setup_qa_system(self, pdf_file):
        # Create a temporary file
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
            temp_file.write(pdf_file)
            temp_file_path = temp_file.name

        # Load PDF
        loader = PyPDFLoader(temp_file_path)
        documents = loader.load()

        # Remove the temporary file
        os.unlink(temp_file_path)

        # Text splitting
        text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
        texts = text_splitter.split_documents(documents)

        # Embeddings
        embeddings = HuggingFaceEmbeddings()

        # Vector store
        docsearch = Chroma.from_documents(texts, embeddings)

        # Set up RetrievalQA
        self.qa_system = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=docsearch.as_retriever(),
            chain_type_kwargs={"prompt": PROMPT}
        )

        return "PDF processed successfully. You can now ask questions."

    def get_answer(self, question):
        if self.qa_system is None:
            return "Please upload a PDF file first."

        raw_answer = self.qa_system.run(question)
        return raw_answer.strip()

pdf_qa = PDFQuestionAnswering()

def process_pdf(pdf_file):
    if pdf_file is None:
        return "Please upload a PDF file."
    return pdf_qa.setup_qa_system(pdf_file)

def answer_question(question):
    return pdf_qa.get_answer(question)

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Agro-Climatic Resilience in Semi-Arid Landscapes - ACReSAL AI Powered Chatbot ")
    with gr.Row():
        pdf_input = gr.File(label="Upload PDF", type="binary", file_types=[".pdf"])
        pdf_output = gr.Textbox(label="PDF Processing Status")
    pdf_button = gr.Button("Process PDF")
    
    with gr.Row():
        question_input = gr.Textbox(label="Enter your question")
        answer_output = gr.Textbox(label="Answer")
    question_button = gr.Button("Get Answer")

    pdf_button.click(process_pdf, inputs=[pdf_input], outputs=[pdf_output])
    question_button.click(answer_question, inputs=[question_input], outputs=[answer_output])

if __name__ == "__main__":
    demo.launch()