flykite-hr-bot / app.py
Supreeth15's picture
Upload 3 files
4198540 verified
import streamlit as st
import os
import time
import pdfplumber
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_core.prompts import PromptTemplate
from langchain_groq import ChatGroq
# Page Configuration
st.set_page_config(
page_title="Flykite Airlines HR Q&A Bot",
page_icon="✈️",
layout="wide"
)
# Custom CSS
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
color: #1E88E5;
text-align: center;
margin-bottom: 1rem;
}
.sub-header {
font-size: 1.2rem;
color: #666;
text-align: center;
margin-bottom: 2rem;
}
.answer-box {
background-color: #E3F2FD;
padding: 20px;
border-radius: 10px;
border-left: 5px solid #1E88E5;
margin: 10px 0;
}
.source-box {
background-color: #FFF3E0;
padding: 15px;
border-radius: 10px;
border-left: 5px solid #FF9800;
margin: 10px 0;
font-size: 0.9rem;
}
.metric-box {
background-color: #E8F5E9;
padding: 10px;
border-radius: 5px;
text-align: center;
}
</style>
""", unsafe_allow_html=True)
# Header
st.markdown('<h1 class="main-header">✈️ Flykite Airlines HR Q&A Bot</h1>', unsafe_allow_html=True)
st.markdown('<p class="sub-header">Ask questions about HR policies and get instant answers powered by RAG</p>', unsafe_allow_html=True)
# Sidebar Configuration
with st.sidebar:
st.header("⚙️ Configuration")
# API Key
groq_api_key = st.text_input("🔑 Groq API Key", type="password", help="Get free key from console.groq.com")
st.divider()
# PDF Upload
st.header("📄 Upload HR Policy")
uploaded_file = st.file_uploader("Upload PDF", type=['pdf'])
st.divider()
# RAG Parameters
st.header("🎛️ RAG Parameters")
chunk_size = st.slider("Chunk Size", 500, 1500, 1000, 100)
chunk_overlap = st.slider("Chunk Overlap", 50, 300, 200, 50)
k_value = st.slider("Top-K Documents", 2, 8, 4)
temperature = st.slider("Temperature", 0.0, 1.0, 0.3, 0.1)
st.divider()
# Process Button
process_btn = st.button("🚀 Process Document", type="primary", use_container_width=True)
# Initialize session state
if 'vector_store' not in st.session_state:
st.session_state.vector_store = None
if 'raw_text' not in st.session_state:
st.session_state.raw_text = None
if 'chat_history' not in st.session_state:
st.session_state.chat_history = []
# Functions
def extract_text_from_pdf(pdf_file):
"""Extract text from uploaded PDF."""
text = ""
with pdfplumber.open(pdf_file) as pdf:
for page in pdf.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
return text
def create_vector_store(text, chunk_size, chunk_overlap):
"""Create FAISS vector store from text."""
splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
separators=["\n\n", "\n", ". ", " ", ""]
)
chunks = splitter.split_text(text)
documents = [Document(page_content=chunk, metadata={'chunk_id': i}) for i, chunk in enumerate(chunks)]
embeddings = HuggingFaceEmbeddings(
model_name="all-MiniLM-L6-v2",
model_kwargs={'device': 'cpu'}
)
vector_store = FAISS.from_documents(documents, embeddings)
return vector_store, len(chunks)
def get_answer(question, vector_store, api_key, k_value, temperature):
"""Get answer using RAG."""
os.environ["GROQ_API_KEY"] = api_key
llm = ChatGroq(
model="llama-3.3-70b-versatile",
temperature=temperature,
max_tokens=2048
)
retriever = vector_store.as_retriever(search_kwargs={'k': k_value})
prompt_template = """You are an expert HR Policy Assistant for Flykite Airlines.
Use ONLY the following context to answer the question. Be specific and cite policy sections.
CONTEXT:
{context}
QUESTION: {question}
Provide a helpful, accurate answer with policy references.
ANSWER:"""
prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
rag_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type='stuff',
retriever=retriever,
return_source_documents=True,
chain_type_kwargs={'prompt': prompt}
)
start_time = time.time()
result = rag_chain.invoke({'query': question})
response_time = time.time() - start_time
return result['result'], result['source_documents'], response_time
# Process Document
if process_btn:
if not groq_api_key:
st.sidebar.error("❌ Please enter Groq API Key")
elif not uploaded_file:
st.sidebar.error("❌ Please upload a PDF file")
else:
with st.spinner("Processing document..."):
# Extract text
st.session_state.raw_text = extract_text_from_pdf(uploaded_file)
# Create vector store
st.session_state.vector_store, num_chunks = create_vector_store(
st.session_state.raw_text, chunk_size, chunk_overlap
)
st.sidebar.success(f"✅ Document processed! ({num_chunks} chunks created)")
# Main Content
col1, col2 = st.columns([2, 1])
with col1:
st.header("💬 Ask a Question")
# Sample Questions
st.markdown("**Sample Questions:**")
sample_questions = [
"What are the effects on benefits if my probation is extended?",
"How do I apply for leave due to a family demise?",
"What should I do if I notice harassment of a colleague?"
]
selected_sample = st.selectbox("Select a sample question:", ["-- Select --"] + sample_questions)
# Custom Question
question = st.text_area("Or type your own question:", height=100)
# Use sample if selected
if selected_sample != "-- Select --" and not question:
question = selected_sample
# Ask Button
ask_btn = st.button("🔍 Get Answer", type="primary", use_container_width=True)
if ask_btn:
if not groq_api_key:
st.error("❌ Please enter Groq API Key in sidebar")
elif st.session_state.vector_store is None:
st.error("❌ Please upload and process a document first")
elif not question:
st.error("❌ Please enter a question")
else:
with st.spinner("🤔 Thinking..."):
try:
answer, sources, response_time = get_answer(
question,
st.session_state.vector_store,
groq_api_key,
k_value,
temperature
)
# Display Answer
st.markdown("### 📝 Answer")
st.markdown(f'<div class="answer-box">{answer}</div>', unsafe_allow_html=True)
# Metrics
col_a, col_b, col_c = st.columns(3)
col_a.metric("⏱️ Response Time", f"{response_time:.2f}s")
col_b.metric("📚 Sources Used", len(sources))
col_c.metric("🎯 Top-K", k_value)
# Source Documents
with st.expander("📄 View Source Documents"):
for i, doc in enumerate(sources, 1):
st.markdown(f'<div class="source-box"><strong>Source {i}:</strong><br>{doc.page_content[:500]}...</div>', unsafe_allow_html=True)
# Add to chat history
st.session_state.chat_history.append({
'question': question,
'answer': answer,
'time': response_time
})
except Exception as e:
st.error(f"❌ Error: {str(e)}")
with col2:
st.header("📜 Chat History")
if st.session_state.chat_history:
for i, chat in enumerate(reversed(st.session_state.chat_history[-5:]), 1):
with st.expander(f"Q{len(st.session_state.chat_history) - i + 1}: {chat['question'][:50]}..."):
st.write(f"**Answer:** {chat['answer'][:300]}...")
st.write(f"**Time:** {chat['time']:.2f}s")
else:
st.info("No questions asked yet. Start by asking a question!")
if st.button("🗑️ Clear History"):
st.session_state.chat_history = []
st.rerun()
# Footer
st.divider()
st.markdown("""
<div style="text-align: center; color: #666; font-size: 0.9rem;">
<p>🛫 Flykite Airlines HR Q&A Bot | Powered by RAG + Groq LLama 3.3 70B</p>
<p>Built with Streamlit | Deployed on Hugging Face Spaces</p>
</div>
""", unsafe_allow_html=True)