chat-with-PDF / app.py
ravindravala's picture
chat with uploaded PDF
77a0f40
import os
import gradio as gr
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
print(f"Gradio version: {gr.__version__}")
# Load env
load_dotenv()
HF_TOKEN = os.getenv("HF_TOKEN")
print("HF_TOKEN loaded:", bool(HF_TOKEN))
# Free HF model
client = InferenceClient(
model="meta-llama/Llama-3.2-1B-Instruct",
token=HF_TOKEN
)
# Global vector store
vector_store = None
def process_pdf(pdf_file):
"""Load and process PDF into vector store"""
global vector_store
if pdf_file is None:
return "Please upload a PDF file."
# Load PDF
loader = PyPDFLoader(pdf_file.name)
documents = loader.load()
# Split into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50
)
chunks = text_splitter.split_documents(documents)
# Create embeddings and vector store
embeddings = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
vector_store = FAISS.from_documents(chunks, embeddings)
return f"✅ PDF processed! {len(chunks)} chunks created. You can now ask questions."
def get_bot_response(message, history):
global vector_store
context = ""
# If PDF is loaded, retrieve relevant context
if vector_store is not None:
docs = vector_store.similarity_search(message, k=3)
context = "\n\n".join([doc.page_content for doc in docs])
# Build the prompt
if context:
system_prompt = f"""You are a helpful AI assistant. Answer questions based on the provided context.
If the answer is not in the context, say so.
Context:
{context}"""
else:
system_prompt = "You are a helpful AI assistant."
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": message}
]
response = client.chat.completions.create(
messages=messages,
max_tokens=500,
temperature=0.7,
)
return response.choices[0].message.content
def clear_pdf():
"""Clear the uploaded PDF"""
global vector_store
vector_store = None
return "PDF cleared. Chat is now in general mode."
# Gradio UI
with gr.Blocks(title="🤖 AI Chatbot with PDF") as demo:
gr.Markdown("# 🤖 AI Chatbot with PDF Support")
gr.Markdown("Upload a PDF to chat about its contents, or just chat normally.")
with gr.Row():
with gr.Column(scale=1):
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
upload_btn = gr.Button("Process PDF", variant="primary")
clear_btn = gr.Button("Clear PDF")
status = gr.Textbox(label="Status", interactive=False)
with gr.Column(scale=2):
chatbot_display = gr.Chatbot(label="Chat", height=400)
msg_input = gr.Textbox(label="Your message", placeholder="Type your message here...")
clear_chat_btn = gr.Button("Clear Chat")
# Chat history state - using messages format for Gradio 6.x
chat_history = gr.State([])
def respond(message, history):
if not message:
return "", history, history
bot_response = get_bot_response(message, history)
# Gradio 6.x requires messages format with role and content
history = history + [
gr.ChatMessage(role="user", content=message),
gr.ChatMessage(role="assistant", content=bot_response)
]
return "", history, history
def clear_chat():
return [], []
msg_input.submit(respond, [msg_input, chat_history], [msg_input, chatbot_display, chat_history])
clear_chat_btn.click(clear_chat, outputs=[chatbot_display, chat_history])
upload_btn.click(process_pdf, inputs=[pdf_input], outputs=[status])
clear_btn.click(clear_pdf, outputs=[status])
demo.launch()