Spaces:

ssahal
/

Projects

Sleeping

App Files Files Community

Projects / app.py

ssahal

Upload 3 files

a701a47 verified 6 months ago

raw

history blame contribute delete

5.44 kB


	import streamlit as st
	import os
	import json
	import requests
	from langchain_community.document_loaders import PyMuPDFLoader
	from openai import OpenAI
	import tiktoken
	import pandas as pd
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.embeddings.openai import OpenAIEmbeddings
	from langchain_community.vectorstores import Chroma
	import tempfile


	OPENAI_API_KEY = os.environ.get("API_KEY")
	OPENAI_API_BASE = os.environ.get("API_BASE")

	# Initialize OpenAI client
	client = OpenAI(
	api_key=OPENAI_API_KEY,
	base_url=OPENAI_API_BASE
	)

	# Define the system prompt for the model
	qna_system_message = """
	# You are an AI assistant designed to support the HR team at Flykite Airlines. Your task is to provide evidence-based, concise, and relevant answers to employee queries based on the context provided.

	User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context. The context contains references to specific portions of the official Flykite Airlines HR Policy Handbook and related documentation.

	When crafting your response:
	1. Use only the provided context to answer the question.
	2. If the answer is found in the context, respond with concise and actionable HR policy information.
	3. Include the source reference with the section name, heading, or clause number, as provided in the context.
	4. If the question is unrelated to the context or the context is empty, clearly respond with: "Sorry, this is out of my knowledge base."

	Please adhere to the following response guidelines:
	- Provide clear, direct answers using only the given context.
	- Do not include any additional information outside of the context.
	- Avoid rephrasing or summarizing the context unless explicitly relevant to the question.
	- If no relevant answer exists in the context, respond with: "Sorry, this is out of my knowledge base."
	- If the context is not provided, your response should also be: "Sorry, this is out of my knowledge base."

	Here is an example of how to structure your response:

	Answer:
	[Answer based on context]

	Source:
	[Source details with section, clause, or heading]
	"""

	# Define the user message template
	qna_user_message_template = """
	# ###Context
	Here are some excerpts from the Flykite Airlines HR Policy Handbook and their sources that are relevant to the employee's question mentioned below:
	{context}

	###Question
	{question}
	"""

	# Processing PDF files
	@st.cache_resource
	def load_and_process_pdfs(uploaded_files):
	all_documents = []
	for uploaded_file in uploaded_files:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
	tmp_file.write(uploaded_file.getvalue())
	tmp_file_path = tmp_file.name
	loader = PyMuPDFLoader(tmp_file_path)
	documents = loader.load()
	all_documents.extend(documents)
	os.remove(tmp_file_path) # Clean up the temporary file
	text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
	encoding_name='cl100k_base',
	chunk_size=1000,
	)
	document_chunks = text_splitter.split_documents(all_documents)

	embedding_model = OpenAIEmbeddings(
	openai_api_key=OPENAI_API_KEY,
	openai_api_base=OPENAI_API_BASE
	)

	# Create an in-memory vector store (or use a persistent one if needed)
	vectorstore = Chroma.from_documents(
	document_chunks,
	embedding_model
	)
	return vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 3})

	def generate_rag_response(user_input, retriever, max_tokens=500, temperature=0, top_p=0.95):
	# Retrieve relevant document chunks
	relevant_document_chunks = retriever.get_relevant_documents(query=user_input)
	context_list = [d.page_content for d in relevant_document_chunks]

	# Combine document chunks into a single context
	context_for_query = ". ".join(context_list)

	user_message = qna_user_message_template.replace('{context}', context_for_query)
	user_message = user_message.replace('{question}', user_input)

	# Generate the response
	try:
	response = client.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{"role": "system", "content": qna_system_message},
	{"role": "user", "content": user_message}
	],
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p
	)
	response = response.choices[0].message.content.strip()
	except Exception as e:
	response = f'Sorry, I encountered the following error: \n {e}'

	return response

	# Streamlit App
	st.title("LLM-Powered Support bot")

	uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)

	retriever = None
	if uploaded_files:
	st.info("Processing uploaded PDFs...")
	retriever = load_and_process_pdfs(uploaded_files)
	st.success("PDFs processed and ready for questioning!")


	if retriever:
	user_question = st.text_input("Ask a question about the uploaded documents:")
	if user_question:
	with st.spinner("Generating response..."):
	rag_response = generate_rag_response(user_question, retriever)
	st.write(rag_response)