nikhmr1235's picture
fix indentation issue
d64bbb5 verified
raw
history blame
4.85 kB
import gradio as gr
import os
import uuid
import shutil
import fitz
from langchain_community.vectorstores import Chroma
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
import tempfile
# Constants
LLM_MODEL = "gemini-1.5-flash"
EMBEDDING_MODEL = "BAAI/bge-large-en-v1.5"
CHROMA_DB_PATH = tempfile.gettempdir() + "/chroma_db"
class PDFChatbot:
def __init__(self):
self.state = SessionState()
def process_pdf(self, pdf_file):
try:
if self.state.is_db_ready():
print("Database is already ready.")
return
file_size_mb = os.path.getsize(pdf_file.name) / (1024 * 1024)
if file_size_mb >= 75:
print("File size exceeds the 75 MB limit.")
gr.Error("File size exceeds the 75 MB limit. Please upload a smaller PDF.")
return
self.state = SessionState()
doc = fitz.open(pdf_file.name)
text = ""
for page in doc:
text += page.get_text()
doc.close()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.create_documents([text])
embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL)
self.state.db = Chroma.from_documents(
documents=docs,
embedding=embeddings,
persist_directory=self.state.vector_store_path
)
print("PDF processed successfully! Database is ready.")
except Exception as e:
if os.path.exists(self.state.vector_store_path):
shutil.rmtree(self.state.vector_store_path)
print(f"An error occurred: {str(e)}")
def chat_with_pdf(self, message, history):
print("Chat interface called. Checking if database is ready...")
if not self.state.is_db_ready():
print("Database is not ready.")
yield "Error: Database not ready."
return
print("Database is ready. Retrieving relevant documents...")
retriever = self.state.db.as_retriever()
llm = ChatGoogleGenerativeAI(model=LLM_MODEL, temperature=0.7)
prompt_template = PromptTemplate(
template="""
You are a helpful assistant for a PDF document.
Answer the user's question based on the following context.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
----------------
Context: {context}
Question: {question}
""",
input_variables=["context", "question"],
)
rag_chain = (
{"context": retriever, "question": RunnablePassthrough()}
| prompt_template
| llm
| StrOutputParser()
)
response = rag_chain.invoke(message)
yield response
def is_db_ready(self):
return self.state.db is not None
class SessionState:
def __init__(self):
self.session_id = str(uuid.uuid4())
self.db = None
self.vector_store_path = os.path.join(CHROMA_DB_PATH, self.session_id)
def is_db_ready(self):
return self.db is not None
# Set the Google API key from environment variables
if "GOOGLE_API_KEY" not in os.environ:
raise Exception("Please set the GOOGLE_API_KEY environment variable.")
with gr.Blocks(title="PDF Chatbot") as demo:
chatbot = PDFChatbot()
gr.Markdown(
"""
# PDF Chatbot
Upload a PDF to start a conversation with your document.
"""
)
with gr.Row():
file_upload_input = gr.File(
file_types=[".pdf"],
label="Upload your PDF document",
interactive=True
)
with gr.Row(visible=False) as chat_row:
chat_interface = gr.ChatInterface(
fn=chatbot.chat_with_pdf,
chatbot=gr.Chatbot(type="messages"),
textbox=gr.Textbox(placeholder="Type your question here...", scale=7),
examples=[["What is the main topic of the document?"], ["Summarize the key findings."], ["Who are the authors?"]],
title="Chat Interface",
theme="soft"
)
def process_and_show_chat(file):
chatbot.process_pdf(file)
return gr.update(visible=True), gr.update(interactive=False)
file_upload_input.upload(
fn=process_and_show_chat,
inputs=[file_upload_input],
outputs=[chat_row, file_upload_input]
)
demo.launch()