Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import uuid | |
| import shutil | |
| import fitz | |
| from langchain_community.vectorstores import Chroma | |
| from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.prompts import PromptTemplate | |
| from langchain_core.runnables import RunnablePassthrough | |
| from langchain_core.output_parsers import StrOutputParser | |
| import tempfile | |
| # Constants | |
| LLM_MODEL = "gemini-1.5-flash" | |
| EMBEDDING_MODEL = "BAAI/bge-large-en-v1.5" | |
| CHROMA_DB_PATH = tempfile.gettempdir() + "/chroma_db" | |
| class PDFChatbot: | |
| def __init__(self): | |
| self.state = SessionState() | |
| def process_pdf(self, pdf_file): | |
| try: | |
| if self.state.is_db_ready(): | |
| print("Database is already ready.") | |
| return | |
| file_size_mb = os.path.getsize(pdf_file.name) / (1024 * 1024) | |
| if file_size_mb >= 75: | |
| print("File size exceeds the 75 MB limit.") | |
| gr.Error("File size exceeds the 75 MB limit. Please upload a smaller PDF.") | |
| return | |
| self.state = SessionState() | |
| doc = fitz.open(pdf_file.name) | |
| text = "" | |
| for page in doc: | |
| text += page.get_text() | |
| doc.close() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| docs = text_splitter.create_documents([text]) | |
| embeddings = GoogleGenerativeAIEmbeddings(model=EMBEDDING_MODEL) | |
| self.state.db = Chroma.from_documents( | |
| documents=docs, | |
| embedding=embeddings, | |
| persist_directory=self.state.vector_store_path | |
| ) | |
| print("PDF processed successfully! Database is ready.") | |
| except Exception as e: | |
| if os.path.exists(self.state.vector_store_path): | |
| shutil.rmtree(self.state.vector_store_path) | |
| print(f"An error occurred: {str(e)}") | |
| def chat_with_pdf(self, message, history): | |
| print("Chat interface called. Checking if database is ready...") | |
| if not self.state.is_db_ready(): | |
| print("Database is not ready.") | |
| yield "Error: Database not ready." | |
| return | |
| print("Database is ready. Retrieving relevant documents...") | |
| retriever = self.state.db.as_retriever() | |
| llm = ChatGoogleGenerativeAI(model=LLM_MODEL, temperature=0.7) | |
| prompt_template = PromptTemplate( | |
| template=""" | |
| You are a helpful assistant for a PDF document. | |
| Answer the user's question based on the following context. | |
| If you don't know the answer, just say that you don't know, don't try to make up an answer. | |
| ---------------- | |
| Context: {context} | |
| Question: {question} | |
| """, | |
| input_variables=["context", "question"], | |
| ) | |
| rag_chain = ( | |
| {"context": retriever, "question": RunnablePassthrough()} | |
| | prompt_template | |
| | llm | |
| | StrOutputParser() | |
| ) | |
| response = rag_chain.invoke(message) | |
| yield response | |
| def is_db_ready(self): | |
| return self.state.db is not None | |
| class SessionState: | |
| def __init__(self): | |
| self.session_id = str(uuid.uuid4()) | |
| self.db = None | |
| self.vector_store_path = os.path.join(CHROMA_DB_PATH, self.session_id) | |
| def is_db_ready(self): | |
| return self.db is not None | |
| # Set the Google API key from environment variables | |
| if "GOOGLE_API_KEY" not in os.environ: | |
| raise Exception("Please set the GOOGLE_API_KEY environment variable.") | |
| with gr.Blocks(title="PDF Chatbot") as demo: | |
| chatbot = PDFChatbot() | |
| gr.Markdown( | |
| """ | |
| # PDF Chatbot | |
| Upload a PDF to start a conversation with your document. | |
| """ | |
| ) | |
| with gr.Row(): | |
| file_upload_input = gr.File( | |
| file_types=[".pdf"], | |
| label="Upload your PDF document", | |
| interactive=True | |
| ) | |
| with gr.Row(visible=False) as chat_row: | |
| chat_interface = gr.ChatInterface( | |
| fn=chatbot.chat_with_pdf, | |
| chatbot=gr.Chatbot(type="messages"), | |
| textbox=gr.Textbox(placeholder="Type your question here...", scale=7), | |
| examples=[["What is the main topic of the document?"], ["Summarize the key findings."], ["Who are the authors?"]], | |
| title="Chat Interface", | |
| theme="soft" | |
| ) | |
| def process_and_show_chat(file): | |
| chatbot.process_pdf(file) | |
| return gr.update(visible=True), gr.update(interactive=False) | |
| file_upload_input.upload( | |
| fn=process_and_show_chat, | |
| inputs=[file_upload_input], | |
| outputs=[chat_row, file_upload_input] | |
| ) | |
| demo.launch() |