Spaces:

himanshukumar378
/

Mutliple_chat_pdf

Sleeping

App Files Files Community

Mutliple_chat_pdf / app.py

himanshukumar378

Update app.py

cb6ff7e verified 6 months ago

raw

history blame contribute delete

4.88 kB

	import gradio as gr
	from PyPDF2 import PdfReader

	# LangChain components
	from langchain.text_splitter import CharacterTextSplitter
	from langchain_community.vectorstores import FAISS
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_core.prompts import PromptTemplate

	# Hugging Face Transformers
	from transformers import pipeline


	# ---------------- Load LLM ----------------
	def load_llm():
	try:
	# Use a model that's good at instruction following
	pipe = pipeline(
	"text2text-generation",
	model="google/flan-t5-base",
	max_length=512,
	temperature=0.1 # Lower temperature for more focused answers
	)
	print("✅ Successfully loaded model: google/flan-t5-base")
	return pipe
	except Exception as e:
	print(f"⚠️ Failed to load model: {e}")
	return None


	llm = load_llm()


	# ---------------- Process PDF ----------------
	def process_pdf(pdf_files):
	text = ""
	for pdf in pdf_files:
	reader = PdfReader(pdf)
	for page in reader.pages:
	extracted = page.extract_text()
	if extracted:
	text += extracted + "\n"

	if not text.strip():
	return None

	# Split text into chunks
	splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=100)
	texts = splitter.split_text(text)

	# Embeddings & vector store
	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	db = FAISS.from_texts(texts, embeddings)

	return db


	# ---------------- Ask Questions ----------------
	def ask_question(pdf_files, question):
	try:
	if not pdf_files:
	return "⚠️ Please upload at least one PDF file."

	if not llm:
	return "⚠️ Language model failed to load. Please try again later."

	db = process_pdf(pdf_files)
	if not db:
	return "⚠️ No text found in the uploaded PDF(s)."

	retriever = db.as_retriever(search_kwargs={"k": 4})
	docs = retriever.get_relevant_documents(question)

	# Combine retrieved context
	context = "\n".join([doc.page_content for doc in docs])

	# Clean up context to remove excessive whitespace
	context = " ".join(context.split())

	# Better prompt template that forces the model to answer
	prompt = f"""Based on the following information, answer the question clearly and concisely.

	Information:
	{context}

	Question: {question}

	Answer:"""

	# Generate response
	result = llm(
	prompt,
	max_length=300,
	num_return_sequences=1,
	do_sample=False,
	temperature=0.1
	)

	response = result[0]['generated_text'].strip()

	# Clean up the response
	if response.startswith("Answer:"):
	response = response.replace("Answer:", "").strip()

	# If response is empty or just repeats the prompt, provide fallback
	if not response or len(response) < 10:
	return "I couldn't find a clear answer to your question in the provided documents. Please try rephrasing your question or check if the relevant information is in the uploaded PDFs."

	return response

	except Exception as e:
	return f"⚠️ Error: {str(e)}"


	# ---------------- Gradio UI ----------------
	with gr.Blocks() as demo:
	gr.Markdown("## 📚 PDF Question Answering System")
	gr.Markdown("Upload PDF files and ask questions about their content.")

	with gr.Row():
	with gr.Column():
	pdf_input = gr.File(
	label="Upload PDF Files",
	file_types=[".pdf"],
	file_count="multiple"
	)
	with gr.Column():
	question_input = gr.Textbox(
	label="Your Question",
	placeholder="What would you like to know about the document?",
	lines=2
	)
	submit_btn = gr.Button("Ask Question", variant="primary")

	with gr.Row():
	output = gr.Textbox(
	label="Answer",
	lines=4,
	interactive=False
	)

	# Examples
	gr.Examples(
	examples=[
	["What is the main topic of this document?"],
	["Can you summarize the key points?"],
	["What are the main findings or conclusions?"],
	["Who are the authors and what are their credentials?"]
	],
	inputs=question_input,
	label="Example Questions"
	)

	# Handle both button click and enter key
	submit_btn.click(ask_question, inputs=[pdf_input, question_input], outputs=output)
	question_input.submit(ask_question, inputs=[pdf_input, question_input], outputs=output)

	demo.launch()