|
|
import gradio as gr |
|
|
import os |
|
|
from langchain_community.document_loaders import PyPDFLoader, UnstructuredPDFLoader |
|
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
|
from langchain_community.vectorstores import Chroma |
|
|
from langchain.chains import ConversationalRetrievalChain |
|
|
from langchain.memory import ConversationBufferMemory |
|
|
from langchain_community.chat_models import ChatOpenAI |
|
|
|
|
|
def process_pdf(file_path): |
|
|
"""Process PDF with fallback strategies""" |
|
|
try: |
|
|
|
|
|
try: |
|
|
loader = PyPDFLoader(file_path) |
|
|
documents = loader.load() |
|
|
except: |
|
|
loader = UnstructuredPDFLoader(file_path, strategy="ocr_only") |
|
|
documents = loader.load() |
|
|
|
|
|
|
|
|
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") |
|
|
return Chroma.from_documents(documents, embeddings) |
|
|
except Exception as e: |
|
|
raise gr.Error(f"Error processing PDF: {str(e)}") |
|
|
|
|
|
def setup_conversation_chain(vector_store, api_key): |
|
|
"""Initialize conversation chain with memory""" |
|
|
try: |
|
|
os.environ["OPENAI_API_KEY"] = api_key |
|
|
memory = ConversationBufferMemory( |
|
|
memory_key="chat_history", |
|
|
return_messages=True |
|
|
) |
|
|
return ConversationalRetrievalChain.from_llm( |
|
|
ChatOpenAI(temperature=0.1), |
|
|
vector_store.as_retriever(search_kwargs={"k": 3}), |
|
|
memory=memory |
|
|
) |
|
|
except Exception as e: |
|
|
raise gr.Error(f"Error initializing chat: {str(e)}") |
|
|
|
|
|
def upload_file(file, api_key, chat_history): |
|
|
"""Handle PDF upload and initialization""" |
|
|
if not api_key.startswith("sk-"): |
|
|
raise gr.Error("Invalid OpenAI API key format") |
|
|
|
|
|
if not file.name.endswith('.pdf'): |
|
|
raise gr.Error("Only PDF files are supported") |
|
|
|
|
|
vector_store = process_pdf(file.name) |
|
|
if not vector_store: |
|
|
raise gr.Error("Failed to process PDF") |
|
|
|
|
|
conversation_chain = setup_conversation_chain(vector_store, api_key) |
|
|
return conversation_chain, [("System", "PDF processed successfully! Ask me anything about the document.")] |
|
|
|
|
|
def respond(query, chat_history, conversation_chain): |
|
|
"""Handle user queries""" |
|
|
if not conversation_chain: |
|
|
raise gr.Error("Please upload a PDF first") |
|
|
|
|
|
try: |
|
|
result = conversation_chain({"question": query}) |
|
|
chat_history.append((query, result["answer"])) |
|
|
return "", chat_history |
|
|
except Exception as e: |
|
|
raise gr.Error(f"Error processing query: {str(e)}") |
|
|
|
|
|
with gr.Blocks(title="PDF Chatbot", theme=gr.themes.Soft()) as app: |
|
|
gr.Markdown("# π DocuBuddy - Ask Me Questions About Your Document") |
|
|
|
|
|
|
|
|
conversation_chain = gr.State(None) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
api_key = gr.Textbox( |
|
|
label="OpenAI API Key", |
|
|
type="password", |
|
|
placeholder="Enter your OpenAI API key (sk-...)" |
|
|
) |
|
|
upload_btn = gr.UploadButton( |
|
|
"π Upload PDF", |
|
|
file_types=[".pdf"], |
|
|
file_count="single" |
|
|
) |
|
|
|
|
|
chatbot = gr.Chatbot(label="Conversation", height=500) |
|
|
query = gr.Textbox(label="Your Question", placeholder="Type your question here...") |
|
|
clear_btn = gr.ClearButton([query, chatbot]) |
|
|
|
|
|
|
|
|
upload_btn.upload( |
|
|
upload_file, |
|
|
[upload_btn, api_key, chatbot], |
|
|
[conversation_chain, chatbot] |
|
|
) |
|
|
|
|
|
query.submit( |
|
|
respond, |
|
|
[query, chatbot, conversation_chain], |
|
|
[query, chatbot] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.launch(share=True) |