Spaces:

pratikshahp
/

Chat_with_HR_Assistant

Sleeping

App Files Files Community

Chat_with_HR_Assistant / app.py

pratikshahp

Update app.py

bde4f3f verified about 1 year ago

raw

history blame contribute delete

3.54 kB

	import fitz # PyMuPDF
	from langchain_community.vectorstores import Chroma
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_text_splitters import RecursiveCharacterTextSplitter
	from openai import OpenAI
	from dotenv import load_dotenv
	import os
	import gradio as gr

	# Load environment variables
	load_dotenv()
	api_key = os.getenv("OPENAI_API_KEY")

	# Initialize OpenAI client
	client = OpenAI(api_key=api_key)

	# File Path (replace "sample.pdf" with the name of your PDF file)
	PDF_FILE = "Resume_Pratiksha.pdf" #"Company_HR_Policy.pdf" # Ensure this file is in the same directory as app.py

	# Utility Functions
	def load_pdf(file_path):
	"""Extract text from a PDF file."""
	try:
	with fitz.open(file_path) as doc:
	return "".join([page.get_text() for page in doc])
	except Exception as e:
	return f"Error reading PDF: {e}"

	def split_text(text, chunk_size=1000, chunk_overlap=20):
	"""Split text into manageable chunks."""
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len, is_separator_regex=False
	)
	return text_splitter.create_documents([text])

	def create_and_load_db(chunks, persist_directory="pdf_embeddings"):
	"""Create and load ChromaDB."""
	embeddings = HuggingFaceEmbeddings()
	vectordb = Chroma.from_documents(documents=chunks, embedding=embeddings, persist_directory=persist_directory)
	vectordb.persist()
	return Chroma(persist_directory=persist_directory, embedding_function=embeddings)

	def generate_response(context, question):
	"""Generate a response using OpenAI."""
	try:
	messages = [
	{"role": "system", "content": "You are an assistant that answers questions based on PDF content."},
	{"role": "user", "content": f"Context: {context}\n\nQuestion: {question}\n\nAnswer:"}
	]
	response = client.chat.completions.create(
	model="gpt-3.5-turbo", # Replace with preferred model
	messages=messages,
	max_tokens=150,
	)
	return response.choices[0].message.content.strip()
	except Exception as e:
	return f"Error generating response: {e}"

	def process_question(question):
	if not question:
	return "Please provide a question."

	# Step 1: Load and extract text from the PDF
	pdf_text = load_pdf(PDF_FILE)
	if pdf_text.startswith("Error"):
	return pdf_text

	# Step 2: Split the text into chunks
	chunks = split_text(pdf_text)

	# Step 3: Create and load ChromaDB
	vectordb = create_and_load_db(chunks)

	# Step 4: Perform similarity search
	try:
	docs = vectordb.similarity_search(question)
	if not docs:
	return "No relevant information found."

	# Step 5: Generate a response using the retrieved context
	context = docs[0].page_content
	response = generate_response(context, question)
	return response
	except Exception as e:
	return f"Error during similarity search or response generation: {str(e)}"

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# PDF Chatbot")

	with gr.Row():
	question_input = gr.Textbox(label="Ask a Question", placeholder="Enter your question here...")
	output = gr.Textbox(label="Answer", lines=5, interactive=False)

	submit_button = gr.Button("Submit")
	submit_button.click(process_question, inputs=[question_input], outputs=output)

	if __name__ == "__main__":
	demo.launch()