Supreeth15 commited on
Commit
4198540
·
verified ·
1 Parent(s): cdb26ad

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +39 -13
  2. app.py +276 -0
  3. requirements.txt +12 -3
README.md CHANGED
@@ -1,19 +1,45 @@
1
  ---
2
- title: Flykite Hr Bot
3
- emoji: 🚀
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
  pinned: false
11
- short_description: flykite-hr-bot
12
  ---
13
 
14
- # Welcome to Streamlit!
15
 
16
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
17
 
18
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Flykite Airlines HR Q&A Bot
3
+ emoji: ✈️
4
+ colorFrom: blue
5
+ colorTo: indigo
6
+ sdk: streamlit
7
+ sdk_version: 1.40.0
8
+ app_file: app.py
 
9
  pinned: false
10
+ license: mit
11
  ---
12
 
13
+ # ✈️ Flykite Airlines HR Q&A Bot
14
 
15
+ An intelligent HR Policy Q&A Bot powered by RAG (Retrieval-Augmented Generation) and Groq's LLama 3.3 70B model.
16
 
17
+ ## Features
18
+
19
+ - 📄 Upload HR Policy PDF documents
20
+ - 💬 Ask questions in natural language
21
+ - 🔍 RAG-based retrieval for accurate answers
22
+ - 📚 View source documents for transparency
23
+ - ⚙️ Configurable parameters (chunk size, k-value, temperature)
24
+
25
+ ## How to Use
26
+
27
+ 1. Get a FREE Groq API key from [console.groq.com](https://console.groq.com/keys)
28
+ 2. Enter your API key in the sidebar
29
+ 3. Upload the HR Policy PDF
30
+ 4. Click "Process Document"
31
+ 5. Ask questions!
32
+
33
+ ## Technology Stack
34
+
35
+ - **LLM**: Groq LLama 3.3 70B (FREE)
36
+ - **Embeddings**: all-MiniLM-L6-v2
37
+ - **Vector Store**: FAISS
38
+ - **Framework**: LangChain + Streamlit
39
+ - **Hosting**: Hugging Face Spaces
40
+
41
+ ## Sample Questions
42
+
43
+ - What are the effects on benefits if my probation is extended?
44
+ - How do I apply for leave due to a family demise?
45
+ - What should I do if I notice harassment of a colleague?
app.py ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import time
4
+ import pdfplumber
5
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
6
+ from langchain_core.documents import Document
7
+ from langchain_community.embeddings import HuggingFaceEmbeddings
8
+ from langchain_community.vectorstores import FAISS
9
+ from langchain.chains import RetrievalQA
10
+ from langchain_core.prompts import PromptTemplate
11
+ from langchain_groq import ChatGroq
12
+
13
+ # Page Configuration
14
+ st.set_page_config(
15
+ page_title="Flykite Airlines HR Q&A Bot",
16
+ page_icon="✈️",
17
+ layout="wide"
18
+ )
19
+
20
+ # Custom CSS
21
+ st.markdown("""
22
+ <style>
23
+ .main-header {
24
+ font-size: 2.5rem;
25
+ color: #1E88E5;
26
+ text-align: center;
27
+ margin-bottom: 1rem;
28
+ }
29
+ .sub-header {
30
+ font-size: 1.2rem;
31
+ color: #666;
32
+ text-align: center;
33
+ margin-bottom: 2rem;
34
+ }
35
+ .answer-box {
36
+ background-color: #E3F2FD;
37
+ padding: 20px;
38
+ border-radius: 10px;
39
+ border-left: 5px solid #1E88E5;
40
+ margin: 10px 0;
41
+ }
42
+ .source-box {
43
+ background-color: #FFF3E0;
44
+ padding: 15px;
45
+ border-radius: 10px;
46
+ border-left: 5px solid #FF9800;
47
+ margin: 10px 0;
48
+ font-size: 0.9rem;
49
+ }
50
+ .metric-box {
51
+ background-color: #E8F5E9;
52
+ padding: 10px;
53
+ border-radius: 5px;
54
+ text-align: center;
55
+ }
56
+ </style>
57
+ """, unsafe_allow_html=True)
58
+
59
+ # Header
60
+ st.markdown('<h1 class="main-header">✈️ Flykite Airlines HR Q&A Bot</h1>', unsafe_allow_html=True)
61
+ st.markdown('<p class="sub-header">Ask questions about HR policies and get instant answers powered by RAG</p>', unsafe_allow_html=True)
62
+
63
+ # Sidebar Configuration
64
+ with st.sidebar:
65
+ st.header("⚙️ Configuration")
66
+
67
+ # API Key
68
+ groq_api_key = st.text_input("🔑 Groq API Key", type="password", help="Get free key from console.groq.com")
69
+
70
+ st.divider()
71
+
72
+ # PDF Upload
73
+ st.header("📄 Upload HR Policy")
74
+ uploaded_file = st.file_uploader("Upload PDF", type=['pdf'])
75
+
76
+ st.divider()
77
+
78
+ # RAG Parameters
79
+ st.header("🎛️ RAG Parameters")
80
+ chunk_size = st.slider("Chunk Size", 500, 1500, 1000, 100)
81
+ chunk_overlap = st.slider("Chunk Overlap", 50, 300, 200, 50)
82
+ k_value = st.slider("Top-K Documents", 2, 8, 4)
83
+ temperature = st.slider("Temperature", 0.0, 1.0, 0.3, 0.1)
84
+
85
+ st.divider()
86
+
87
+ # Process Button
88
+ process_btn = st.button("🚀 Process Document", type="primary", use_container_width=True)
89
+
90
+ # Initialize session state
91
+ if 'vector_store' not in st.session_state:
92
+ st.session_state.vector_store = None
93
+ if 'raw_text' not in st.session_state:
94
+ st.session_state.raw_text = None
95
+ if 'chat_history' not in st.session_state:
96
+ st.session_state.chat_history = []
97
+
98
+ # Functions
99
+ def extract_text_from_pdf(pdf_file):
100
+ """Extract text from uploaded PDF."""
101
+ text = ""
102
+ with pdfplumber.open(pdf_file) as pdf:
103
+ for page in pdf.pages:
104
+ page_text = page.extract_text()
105
+ if page_text:
106
+ text += page_text + "\n"
107
+ return text
108
+
109
+ def create_vector_store(text, chunk_size, chunk_overlap):
110
+ """Create FAISS vector store from text."""
111
+ splitter = RecursiveCharacterTextSplitter(
112
+ chunk_size=chunk_size,
113
+ chunk_overlap=chunk_overlap,
114
+ separators=["\n\n", "\n", ". ", " ", ""]
115
+ )
116
+ chunks = splitter.split_text(text)
117
+ documents = [Document(page_content=chunk, metadata={'chunk_id': i}) for i, chunk in enumerate(chunks)]
118
+
119
+ embeddings = HuggingFaceEmbeddings(
120
+ model_name="all-MiniLM-L6-v2",
121
+ model_kwargs={'device': 'cpu'}
122
+ )
123
+
124
+ vector_store = FAISS.from_documents(documents, embeddings)
125
+ return vector_store, len(chunks)
126
+
127
+ def get_answer(question, vector_store, api_key, k_value, temperature):
128
+ """Get answer using RAG."""
129
+ os.environ["GROQ_API_KEY"] = api_key
130
+
131
+ llm = ChatGroq(
132
+ model="llama-3.3-70b-versatile",
133
+ temperature=temperature,
134
+ max_tokens=2048
135
+ )
136
+
137
+ retriever = vector_store.as_retriever(search_kwargs={'k': k_value})
138
+
139
+ prompt_template = """You are an expert HR Policy Assistant for Flykite Airlines.
140
+ Use ONLY the following context to answer the question. Be specific and cite policy sections.
141
+
142
+ CONTEXT:
143
+ {context}
144
+
145
+ QUESTION: {question}
146
+
147
+ Provide a helpful, accurate answer with policy references.
148
+
149
+ ANSWER:"""
150
+
151
+ prompt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
152
+
153
+ rag_chain = RetrievalQA.from_chain_type(
154
+ llm=llm,
155
+ chain_type='stuff',
156
+ retriever=retriever,
157
+ return_source_documents=True,
158
+ chain_type_kwargs={'prompt': prompt}
159
+ )
160
+
161
+ start_time = time.time()
162
+ result = rag_chain.invoke({'query': question})
163
+ response_time = time.time() - start_time
164
+
165
+ return result['result'], result['source_documents'], response_time
166
+
167
+ # Process Document
168
+ if process_btn:
169
+ if not groq_api_key:
170
+ st.sidebar.error("❌ Please enter Groq API Key")
171
+ elif not uploaded_file:
172
+ st.sidebar.error("❌ Please upload a PDF file")
173
+ else:
174
+ with st.spinner("Processing document..."):
175
+ # Extract text
176
+ st.session_state.raw_text = extract_text_from_pdf(uploaded_file)
177
+
178
+ # Create vector store
179
+ st.session_state.vector_store, num_chunks = create_vector_store(
180
+ st.session_state.raw_text, chunk_size, chunk_overlap
181
+ )
182
+
183
+ st.sidebar.success(f"✅ Document processed! ({num_chunks} chunks created)")
184
+
185
+ # Main Content
186
+ col1, col2 = st.columns([2, 1])
187
+
188
+ with col1:
189
+ st.header("💬 Ask a Question")
190
+
191
+ # Sample Questions
192
+ st.markdown("**Sample Questions:**")
193
+ sample_questions = [
194
+ "What are the effects on benefits if my probation is extended?",
195
+ "How do I apply for leave due to a family demise?",
196
+ "What should I do if I notice harassment of a colleague?"
197
+ ]
198
+
199
+ selected_sample = st.selectbox("Select a sample question:", ["-- Select --"] + sample_questions)
200
+
201
+ # Custom Question
202
+ question = st.text_area("Or type your own question:", height=100)
203
+
204
+ # Use sample if selected
205
+ if selected_sample != "-- Select --" and not question:
206
+ question = selected_sample
207
+
208
+ # Ask Button
209
+ ask_btn = st.button("🔍 Get Answer", type="primary", use_container_width=True)
210
+
211
+ if ask_btn:
212
+ if not groq_api_key:
213
+ st.error("❌ Please enter Groq API Key in sidebar")
214
+ elif st.session_state.vector_store is None:
215
+ st.error("❌ Please upload and process a document first")
216
+ elif not question:
217
+ st.error("❌ Please enter a question")
218
+ else:
219
+ with st.spinner("🤔 Thinking..."):
220
+ try:
221
+ answer, sources, response_time = get_answer(
222
+ question,
223
+ st.session_state.vector_store,
224
+ groq_api_key,
225
+ k_value,
226
+ temperature
227
+ )
228
+
229
+ # Display Answer
230
+ st.markdown("### 📝 Answer")
231
+ st.markdown(f'<div class="answer-box">{answer}</div>', unsafe_allow_html=True)
232
+
233
+ # Metrics
234
+ col_a, col_b, col_c = st.columns(3)
235
+ col_a.metric("⏱️ Response Time", f"{response_time:.2f}s")
236
+ col_b.metric("📚 Sources Used", len(sources))
237
+ col_c.metric("🎯 Top-K", k_value)
238
+
239
+ # Source Documents
240
+ with st.expander("📄 View Source Documents"):
241
+ for i, doc in enumerate(sources, 1):
242
+ st.markdown(f'<div class="source-box"><strong>Source {i}:</strong><br>{doc.page_content[:500]}...</div>', unsafe_allow_html=True)
243
+
244
+ # Add to chat history
245
+ st.session_state.chat_history.append({
246
+ 'question': question,
247
+ 'answer': answer,
248
+ 'time': response_time
249
+ })
250
+
251
+ except Exception as e:
252
+ st.error(f"❌ Error: {str(e)}")
253
+
254
+ with col2:
255
+ st.header("📜 Chat History")
256
+
257
+ if st.session_state.chat_history:
258
+ for i, chat in enumerate(reversed(st.session_state.chat_history[-5:]), 1):
259
+ with st.expander(f"Q{len(st.session_state.chat_history) - i + 1}: {chat['question'][:50]}..."):
260
+ st.write(f"**Answer:** {chat['answer'][:300]}...")
261
+ st.write(f"**Time:** {chat['time']:.2f}s")
262
+ else:
263
+ st.info("No questions asked yet. Start by asking a question!")
264
+
265
+ if st.button("🗑️ Clear History"):
266
+ st.session_state.chat_history = []
267
+ st.rerun()
268
+
269
+ # Footer
270
+ st.divider()
271
+ st.markdown("""
272
+ <div style="text-align: center; color: #666; font-size: 0.9rem;">
273
+ <p>🛫 Flykite Airlines HR Q&A Bot | Powered by RAG + Groq LLama 3.3 70B</p>
274
+ <p>Built with Streamlit | Deployed on Hugging Face Spaces</p>
275
+ </div>
276
+ """, unsafe_allow_html=True)
requirements.txt CHANGED
@@ -1,3 +1,12 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
 
 
 
 
1
+ streamlit==1.40.0
2
+ langchain==0.2.16
3
+ langchain-core==0.2.40
4
+ langchain-community==0.2.16
5
+ langchain-text-splitters==0.2.4
6
+ langchain-groq==0.1.9
7
+ groq
8
+ faiss-cpu
9
+ sentence-transformers
10
+ pdfplumber
11
+ tiktoken
12
+ huggingface_hub