saibsund commited on
Commit
8d25d65
Β·
verified Β·
1 Parent(s): 49dfc12

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +33 -19
  2. app.py +78 -0
  3. requirements.txt +6 -3
  4. utils.py +55 -0
README.md CHANGED
@@ -1,19 +1,33 @@
1
- ---
2
- title: Flykite HR RAG Bot
3
- emoji: πŸš€
4
- colorFrom: red
5
- colorTo: red
6
- sdk: docker
7
- app_port: 8501
8
- tags:
9
- - streamlit
10
- pinned: false
11
- short_description: Streamlit template space
12
- ---
13
-
14
- # Welcome to Streamlit!
15
-
16
- Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
17
-
18
- If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
19
- forums](https://discuss.streamlit.io).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # πŸ›« Flykite Airlines β€” HR Policy Assistant (RAG + LLM)
2
+
3
+ This Streamlit application provides grounded, citation-based HR policy answers
4
+ using Retrieval Augmented Generation (RAG). The system is powered by:
5
+
6
+ - Groq LLM (Llama 3.3 70B Versatile)
7
+ - FAISS Vector Index
8
+ - Cleaned and chunked employee policy handbook
9
+ - SentenceTransformer embeddings
10
+
11
+ ## πŸ”§ How It Works
12
+ 1. User enters an HR-related question
13
+ 2. App retrieves top policy chunks using FAISS
14
+ 3. LLM answers using ONLY the retrieved context
15
+ 4. Response includes:
16
+ - Summary
17
+ - Steps (if applicable)
18
+ - Citations (page + chunk)
19
+ - Policy-grounded content
20
+
21
+ ## πŸš€ Deployment
22
+ This app runs on HuggingFace Spaces using Streamlit.
23
+
24
+ ## πŸ” API Keys
25
+ Set the environment variable `GROQ_API_KEY` in your Space Settings.
26
+
27
+ ## πŸ“‚ Project Structure
28
+
29
+
30
+ ## πŸ‘€ Author
31
+ Saibala Sundarajan
32
+ Tiger Analytics
33
+
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from utils import load_index_and_meta, retrieve_top_k
4
+ from langchain_groq import ChatGroq
5
+
6
+ # ---------------------------
7
+ # Paths
8
+ # ---------------------------
9
+ META_PATH = "resources/flyk_chunks_meta.jsonl"
10
+ CHUNKS_PATH = "resources/flyk_chunks_clean.jsonl"
11
+ INDEX_PATH = "resources/flyk_faiss_clean.index"
12
+
13
+ # ---------------------------
14
+ # Load LLM
15
+ # ---------------------------
16
+ llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)
17
+
18
+ # ---------------------------
19
+ # Load FAISS + metadata
20
+ # ---------------------------
21
+ meta_list, mapping, index, embed_model = load_index_and_meta(
22
+ META_PATH, CHUNKS_PATH, INDEX_PATH
23
+ )
24
+
25
+ # ---------------------------
26
+ # Streamlit UI
27
+ # ---------------------------
28
+ st.set_page_config(page_title="Flykite HR Policy Assistant", layout="wide")
29
+
30
+ st.title("πŸ›« Flykite Airlines β€” HR Policy Assistant (RAG)")
31
+ st.write("Ask any HR policy question. Responses are grounded in the official HR Policy Handbook.")
32
+
33
+ question = st.text_input("Enter your question:")
34
+
35
+ if question:
36
+
37
+ with st.spinner("Retrieving information..."):
38
+ retrieved = retrieve_top_k(
39
+ query=question,
40
+ top_k=5,
41
+ min_score=0.25,
42
+ index=index,
43
+ embed_model=embed_model,
44
+ meta_list=meta_list,
45
+ mapping=mapping
46
+ )
47
+
48
+ # Build context
49
+ context = ""
50
+ for r in retrieved:
51
+ context += f"(Page {r['page']} β€’ Chunk {r['chunk_id']}):\n{r['text']}\n\n"
52
+
53
+ prompt = f"""
54
+ You are an HR expert assistant for Flykite Airlines.
55
+ Use ONLY the context below to answer the question.
56
+
57
+ Question: {question}
58
+
59
+ Context:
60
+ {context}
61
+
62
+ Provide answer in:
63
+ 1. Summary
64
+ 2. Steps (if applicable)
65
+ 3. Citations (page + chunk)
66
+ """
67
+
68
+ with st.spinner("Generating grounded answer..."):
69
+ response = llm.invoke(prompt).content
70
+
71
+ st.subheader("πŸ“˜ Answer")
72
+ st.write(response)
73
+
74
+ with st.expander("πŸ“‚ Retrieved Policy Context"):
75
+ for r in retrieved:
76
+ st.markdown(f"**Page {r['page']} | Chunk {r['chunk_id']} | Score {r['score']:.3f}**")
77
+ st.write(r['text'])
78
+ st.markdown("---")
requirements.txt CHANGED
@@ -1,3 +1,6 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
1
+ streamlit
2
+ sentence-transformers
3
+ faiss-cpu
4
+ langchain
5
+ langchain-groq
6
+ python-dotenv
utils.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import faiss
3
+ import numpy as np
4
+ import re
5
+ from sentence_transformers import SentenceTransformer
6
+
7
+ EMAIL_PATTERN = re.compile(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}", re.IGNORECASE)
8
+
9
+ def load_index_and_meta(meta_path, chunks_path, index_path):
10
+ meta_list = []
11
+ with open(meta_path, "r", encoding="utf-8") as f:
12
+ for line in f:
13
+ meta_list.append(json.loads(line))
14
+
15
+ mapping = {}
16
+ with open(chunks_path, "r", encoding="utf-8") as f:
17
+ for line in f:
18
+ obj = json.loads(line)
19
+ text = EMAIL_PATTERN.sub("[REDACTED_EMAIL]", obj["text"])
20
+ mapping[(obj["page"], obj["chunk_id"])] = text
21
+
22
+ index = faiss.read_index(index_path)
23
+ embed_model = SentenceTransformer("all-MiniLM-L6-v2")
24
+
25
+ return meta_list, mapping, index, embed_model
26
+
27
+
28
+ def retrieve_top_k(query, top_k, min_score, index, embed_model, meta_list, mapping):
29
+
30
+ qvec = embed_model.encode([query], convert_to_numpy=True).astype("float32")
31
+ faiss.normalize_L2(qvec)
32
+
33
+ D, I = index.search(qvec, top_k)
34
+
35
+ results = []
36
+ for dist, idx in zip(D[0], I[0]):
37
+ if dist < min_score:
38
+ continue
39
+ if idx < 0 or idx >= len(meta_list):
40
+ continue
41
+
42
+ m = meta_list[idx]
43
+ page = m["page"]
44
+ chunk = m["chunk_id"]
45
+ text = mapping.get((page, chunk), "")
46
+ text = EMAIL_PATTERN.sub("[REDACTED_EMAIL]", text)
47
+
48
+ results.append({
49
+ "score": float(dist),
50
+ "page": page,
51
+ "chunk_id": chunk,
52
+ "text": text
53
+ })
54
+
55
+ return results