Spaces:

ravindravala
/

chat-with-PDF

Running

App Files Files Community

chat-with-PDF / app.py

ravindravala

chat with uploaded PDF

77a0f40 1 day ago

raw

history blame contribute delete

4.16 kB

	import os
	import gradio as gr
	from dotenv import load_dotenv
	from huggingface_hub import InferenceClient
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS

	print(f"Gradio version: {gr.__version__}")

	# Load env
	load_dotenv()
	HF_TOKEN = os.getenv("HF_TOKEN")
	print("HF_TOKEN loaded:", bool(HF_TOKEN))

	# Free HF model
	client = InferenceClient(
	model="meta-llama/Llama-3.2-1B-Instruct",
	token=HF_TOKEN
	)

	# Global vector store
	vector_store = None

	def process_pdf(pdf_file):
	"""Load and process PDF into vector store"""
	global vector_store

	if pdf_file is None:
	return "Please upload a PDF file."

	# Load PDF
	loader = PyPDFLoader(pdf_file.name)
	documents = loader.load()

	# Split into chunks
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=500,
	chunk_overlap=50
	)
	chunks = text_splitter.split_documents(documents)

	# Create embeddings and vector store
	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/all-MiniLM-L6-v2"
	)
	vector_store = FAISS.from_documents(chunks, embeddings)

	return f"✅ PDF processed! {len(chunks)} chunks created. You can now ask questions."

	def get_bot_response(message, history):
	global vector_store

	context = ""

	# If PDF is loaded, retrieve relevant context
	if vector_store is not None:
	docs = vector_store.similarity_search(message, k=3)
	context = "\n\n".join([doc.page_content for doc in docs])

	# Build the prompt
	if context:
	system_prompt = f"""You are a helpful AI assistant. Answer questions based on the provided context.
	If the answer is not in the context, say so.

	Context:
	{context}"""
	else:
	system_prompt = "You are a helpful AI assistant."

	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": message}
	]

	response = client.chat.completions.create(
	messages=messages,
	max_tokens=500,
	temperature=0.7,
	)

	return response.choices[0].message.content

	def clear_pdf():
	"""Clear the uploaded PDF"""
	global vector_store
	vector_store = None
	return "PDF cleared. Chat is now in general mode."

	# Gradio UI
	with gr.Blocks(title="🤖 AI Chatbot with PDF") as demo:
	gr.Markdown("# 🤖 AI Chatbot with PDF Support")
	gr.Markdown("Upload a PDF to chat about its contents, or just chat normally.")

	with gr.Row():
	with gr.Column(scale=1):
	pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
	upload_btn = gr.Button("Process PDF", variant="primary")
	clear_btn = gr.Button("Clear PDF")
	status = gr.Textbox(label="Status", interactive=False)

	with gr.Column(scale=2):
	chatbot_display = gr.Chatbot(label="Chat", height=400)
	msg_input = gr.Textbox(label="Your message", placeholder="Type your message here...")
	clear_chat_btn = gr.Button("Clear Chat")

	# Chat history state - using messages format for Gradio 6.x
	chat_history = gr.State([])

	def respond(message, history):
	if not message:
	return "", history, history

	bot_response = get_bot_response(message, history)

	# Gradio 6.x requires messages format with role and content
	history = history + [
	gr.ChatMessage(role="user", content=message),
	gr.ChatMessage(role="assistant", content=bot_response)
	]
	return "", history, history

	def clear_chat():
	return [], []

	msg_input.submit(respond, [msg_input, chat_history], [msg_input, chatbot_display, chat_history])
	clear_chat_btn.click(clear_chat, outputs=[chatbot_display, chat_history])
	upload_btn.click(process_pdf, inputs=[pdf_input], outputs=[status])
	clear_btn.click(clear_pdf, outputs=[status])

	demo.launch()