Spaces:

amira01
/

PDFBot

Runtime error

App Files Files Community

PDFBot / app.py

amira01

Update app.py

4da1bda verified about 1 year ago

raw

history blame contribute delete

5.22 kB

	import gradio as gr
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chains import RetrievalQA
	from langchain.prompts import PromptTemplate
	from langchain_community.chat_models import ChatOpenAI
	import os
	from tempfile import NamedTemporaryFile

	# Load API Key
	def load_api_key():
	if "OPENROUTER_API_KEY" in os.environ:
	return os.getenv("OPENROUTER_API_KEY")
	raise ValueError("API key not found in environment variables")
	OPENROUTER_API_KEY=load_api_key()
	# Process PDF files
	def process_pdfs(files):
	all_chunks = []
	for file_info in files: # file_info is a Gradio File object
	with NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	# Copy file content to temporary file
	with open(file_info.name, "rb") as f:
	tmp_file.write(f.read())
	tmp_file_path = tmp_file.name

	try:
	loader = PyPDFLoader(tmp_file_path)
	pages = loader.load()

	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=200,
	length_function=len
	)
	chunks = text_splitter.split_documents(pages)
	all_chunks.extend(chunks)
	finally:
	os.unlink(tmp_file_path)

	if not all_chunks:
	raise ValueError("No content was loaded from the files")

	embeddings = HuggingFaceEmbeddings(
	model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2"
	)
	vectorstore = FAISS.from_documents(all_chunks, embeddings)
	return vectorstore.as_retriever(search_kwargs={"k": 3})

	# Initialize language model
	def load_model():
	return ChatOpenAI(
	base_url="https://openrouter.ai/api/v1",
	api_key=OPENROUTER_API_KEY,
	model="mistralai/mistral-7b-instruct",
	temperature=0.3
	)

	# Prompt template
	template = """
	You are an intelligent assistant specialized in document analysis.
	Use the following information from PDF files to answer the question:
	answer dependent on the language question arabic or english
	{context}
	Question: {question}
	Answer (in detail and in clear language):
	"""

	prompt = PromptTemplate(
	input_variables=["context", "question"],
	template=template
	)

	# Application state variables
	qa_chain = None
	chat_history = []

	# Function to process messages and respond
	def respond(message, chat_history):
	global qa_chain

	if qa_chain is None:
	return chat_history + [(message, "Please upload PDF files first")]

	try:
	result = qa_chain({"query": message})
	response = result["result"]
	return chat_history + [(message, response)]
	except Exception as e:
	return chat_history + [(message, f"An error occurred: {str(e)}")]

	# Event handling
	def handle_upload(files):
	global qa_chain
	try:
	retriever = process_pdfs(files)
	llm = load_model()

	qa_chain = RetrievalQA.from_chain_type(
	llm=llm,
	retriever=retriever,
	chain_type="stuff",
	chain_type_kwargs={
	"prompt": PromptTemplate(
	template=template,
	input_variables=["context", "question"]
	)
	},
	return_source_documents=False
	)
	return "Files uploaded successfully!"
	except Exception as e:
	return f"Error uploading files: {str(e)}"

	# UI
	with gr.Blocks(title="Smart Document Assistant", theme=gr.themes.Default()) as demo:
	gr.Markdown("# 📄 Smart Document Assistant")
	gr.Markdown("Upload PDF files then start chatting")

	# Chat section
	chatbot = gr.Chatbot(height=500)

	# Input section
	with gr.Row():
	msg = gr.Textbox(
	placeholder="Type your question here...",
	show_label=False,
	scale=4
	)
	submit_btn = gr.Button("Send", scale=1)

	# File section
	with gr.Row():
	file_upload = gr.Files(
	label="Upload PDF files",
	file_types=[".pdf"],
	file_count="multiple"
	)
	upload_status = gr.Textbox(label="Upload Status", interactive=False)

	clear_btn = gr.Button("Clear Chat")



	file_upload.change(
	handle_upload,
	inputs=file_upload,
	outputs=upload_status
	)

	submit_btn.click(
	respond,
	inputs=[msg, chatbot],
	outputs=[chatbot]
	).then(
	lambda: "",
	None,
	[msg]
	)

	msg.submit(
	respond,
	inputs=[msg, chatbot],
	outputs=[chatbot]
	).then(
	lambda: "",
	None,
	[msg]
	)

	clear_btn.click(
	lambda: [],
	None,
	[chatbot]
	)

	if __name__ == "__main__":
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,

	)