Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| import time | |
| import pdfplumber | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_core.documents import Document | |
| from langchain_community.embeddings import HuggingFaceEmbeddings | |
| from langchain_community.vectorstores import FAISS | |
| from langchain.chains import RetrievalQA | |
| from langchain_core.prompts import PromptTemplate | |
| from langchain_groq import ChatGroq | |
| # Page Configuration | |
| st.set_page_config( | |
| page_title="Flykite Airlines HR Q&A Bot", | |
| page_icon="✈️", | |
| layout="wide" | |
| ) | |
| # Custom CSS | |
| st.markdown(""" | |
| <style> | |
| .main-header { | |
| font-size: 2.5rem; | |
| color: #1E88E5; | |
| text-align: center; | |
| margin-bottom: 1rem; | |
| } | |
| .sub-header { | |
| font-size: 1.2rem; | |
| color: #666; | |
| text-align: center; | |
| margin-bottom: 2rem; | |
| } | |
| .answer-box { | |
| background-color: #E3F2FD; | |
| padding: 20px; | |
| border-radius: 10px; | |
| border-left: 5px solid #1E88E5; | |
| margin: 10px 0; | |
| } | |
| .source-box { | |
| background-color: #FFF3E0; | |
| padding: 15px; | |
| border-radius: 10px; | |
| border-left: 5px solid #FF9800; | |
| margin: 10px 0; | |
| font-size: 0.9rem; | |
| } | |
| .metric-box { | |
| background-color: #E8F5E9; | |
| padding: 10px; | |
| border-radius: 5px; | |
| text-align: center; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Header | |
| st.markdown('<h1 class="main-header">✈️ Flykite Airlines HR Q&A Bot</h1>', unsafe_allow_html=True) | |
| st.markdown('<p class="sub-header">Ask questions about HR policies and get instant answers powered by RAG</p>', unsafe_allow_html=True) | |
| # Sidebar Configuration | |
| with st.sidebar: | |
| st.header("⚙️ Configuration") | |
| # API Key | |
| groq_api_key = st.text_input("🔑 Groq API Key", type="password", help="Get free key from console.groq.com") | |
| st.divider() | |
| # PDF Upload | |
| st.header("📄 Upload HR Policy") | |
| uploaded_file = st.file_uploader("Upload PDF", type=['pdf']) | |
| st.divider() | |
| # RAG Parameters | |
| st.header("🎛️ RAG Parameters") | |
| chunk_size = st.slider("Chunk Size", 500, 1500, 1000, 100) | |
| chunk_overlap = st.slider("Chunk Overlap", 50, 300, 200, 50) | |
| k_value = st.slider("Top-K Documents", 2, 8, 4) | |
| temperature = st.slider("Temperature", 0.0, 1.0, 0.3, 0.1) | |
| st.divider() | |
| # Process Button | |
| process_btn = st.button("🚀 Process Document", type="primary", use_container_width=True) | |
| # Initialize session state | |
| if 'vector_store' not in st.session_state: | |
| st.session_state.vector_store = None | |
| if 'raw_text' not in st.session_state: | |
| st.session_state.raw_text = None | |
| if 'chat_history' not in st.session_state: | |
| st.session_state.chat_history = [] | |
| # Functions | |
| def extract_text_from_pdf(pdf_file): | |
| """Extract text from uploaded PDF.""" | |
| text = "" | |
| with pdfplumber.open(pdf_file) as pdf: | |
| for page in pdf.pages: | |
| page_text = page.extract_text() | |
| if page_text: | |
| text += page_text + "\n" | |
| return text | |
| def create_vector_store(text, chunk_size, chunk_overlap): | |
| """Create FAISS vector store from text.""" | |
| splitter = RecursiveCharacterTextSplitter( | |
| chunk_size=chunk_size, | |
| chunk_overlap=chunk_overlap, | |
| separators=["\n\n", "\n", ". ", " ", ""] | |
| ) | |
| chunks = splitter.split_text(text) | |
| documents = [Document(page_content=chunk, metadata={'chunk_id': i}) for i, chunk in enumerate(chunks)] | |
| embeddings = HuggingFaceEmbeddings( | |
| model_name="all-MiniLM-L6-v2", | |
| model_kwargs={'device': 'cpu'} | |
| ) | |
| vector_store = FAISS.from_documents(documents, embeddings) | |
| return vector_store, len(chunks) | |
| def get_answer(question, vector_store, api_key, k_value, temperature): | |
| """Get answer using RAG.""" | |
| os.environ["GROQ_API_KEY"] = api_key | |
| llm = ChatGroq( | |
| model="llama-3.3-70b-versatile", | |
| temperature=temperature, | |
| max_tokens=2048 | |
| ) | |
| retriever = vector_store.as_retriever(search_kwargs={'k': k_value}) | |
| prompt_template = """You are an expert HR Policy Assistant for Flykite Airlines. | |
| Use ONLY the following context to answer the question. Be specific and cite policy sections. | |
| CONTEXT: | |
| {context} | |
| QUESTION: {question} | |
| Provide a helpful, accurate answer with policy references. | |
| ANSWER:""" | |
| prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question']) | |
| rag_chain = RetrievalQA.from_chain_type( | |
| llm=llm, | |
| chain_type='stuff', | |
| retriever=retriever, | |
| return_source_documents=True, | |
| chain_type_kwargs={'prompt': prompt} | |
| ) | |
| start_time = time.time() | |
| result = rag_chain.invoke({'query': question}) | |
| response_time = time.time() - start_time | |
| return result['result'], result['source_documents'], response_time | |
| # Process Document | |
| if process_btn: | |
| if not groq_api_key: | |
| st.sidebar.error("❌ Please enter Groq API Key") | |
| elif not uploaded_file: | |
| st.sidebar.error("❌ Please upload a PDF file") | |
| else: | |
| with st.spinner("Processing document..."): | |
| # Extract text | |
| st.session_state.raw_text = extract_text_from_pdf(uploaded_file) | |
| # Create vector store | |
| st.session_state.vector_store, num_chunks = create_vector_store( | |
| st.session_state.raw_text, chunk_size, chunk_overlap | |
| ) | |
| st.sidebar.success(f"✅ Document processed! ({num_chunks} chunks created)") | |
| # Main Content | |
| col1, col2 = st.columns([2, 1]) | |
| with col1: | |
| st.header("💬 Ask a Question") | |
| # Sample Questions | |
| st.markdown("**Sample Questions:**") | |
| sample_questions = [ | |
| "What are the effects on benefits if my probation is extended?", | |
| "How do I apply for leave due to a family demise?", | |
| "What should I do if I notice harassment of a colleague?" | |
| ] | |
| selected_sample = st.selectbox("Select a sample question:", ["-- Select --"] + sample_questions) | |
| # Custom Question | |
| question = st.text_area("Or type your own question:", height=100) | |
| # Use sample if selected | |
| if selected_sample != "-- Select --" and not question: | |
| question = selected_sample | |
| # Ask Button | |
| ask_btn = st.button("🔍 Get Answer", type="primary", use_container_width=True) | |
| if ask_btn: | |
| if not groq_api_key: | |
| st.error("❌ Please enter Groq API Key in sidebar") | |
| elif st.session_state.vector_store is None: | |
| st.error("❌ Please upload and process a document first") | |
| elif not question: | |
| st.error("❌ Please enter a question") | |
| else: | |
| with st.spinner("🤔 Thinking..."): | |
| try: | |
| answer, sources, response_time = get_answer( | |
| question, | |
| st.session_state.vector_store, | |
| groq_api_key, | |
| k_value, | |
| temperature | |
| ) | |
| # Display Answer | |
| st.markdown("### 📝 Answer") | |
| st.markdown(f'<div class="answer-box">{answer}</div>', unsafe_allow_html=True) | |
| # Metrics | |
| col_a, col_b, col_c = st.columns(3) | |
| col_a.metric("⏱️ Response Time", f"{response_time:.2f}s") | |
| col_b.metric("📚 Sources Used", len(sources)) | |
| col_c.metric("🎯 Top-K", k_value) | |
| # Source Documents | |
| with st.expander("📄 View Source Documents"): | |
| for i, doc in enumerate(sources, 1): | |
| st.markdown(f'<div class="source-box"><strong>Source {i}:</strong><br>{doc.page_content[:500]}...</div>', unsafe_allow_html=True) | |
| # Add to chat history | |
| st.session_state.chat_history.append({ | |
| 'question': question, | |
| 'answer': answer, | |
| 'time': response_time | |
| }) | |
| except Exception as e: | |
| st.error(f"❌ Error: {str(e)}") | |
| with col2: | |
| st.header("📜 Chat History") | |
| if st.session_state.chat_history: | |
| for i, chat in enumerate(reversed(st.session_state.chat_history[-5:]), 1): | |
| with st.expander(f"Q{len(st.session_state.chat_history) - i + 1}: {chat['question'][:50]}..."): | |
| st.write(f"**Answer:** {chat['answer'][:300]}...") | |
| st.write(f"**Time:** {chat['time']:.2f}s") | |
| else: | |
| st.info("No questions asked yet. Start by asking a question!") | |
| if st.button("🗑️ Clear History"): | |
| st.session_state.chat_history = [] | |
| st.rerun() | |
| # Footer | |
| st.divider() | |
| st.markdown(""" | |
| <div style="text-align: center; color: #666; font-size: 0.9rem;"> | |
| <p>🛫 Flykite Airlines HR Q&A Bot | Powered by RAG + Groq LLama 3.3 70B</p> | |
| <p>Built with Streamlit | Deployed on Hugging Face Spaces</p> | |
| </div> | |
| """, unsafe_allow_html=True) | |