Spaces:

saibsund
/

Flykite-HR-RAG-Bot

Sleeping

App Files Files Community

saibsund commited on Dec 18, 2025

Commit

8d25d65

verified ·

1 Parent(s): 49dfc12

Upload 4 files

Browse files

Files changed (4) hide show

README.md +33 -19
app.py +78 -0
requirements.txt +6 -3
utils.py +55 -0

README.md CHANGED Viewed

@@ -1,19 +1,33 @@
----
-title: Flykite HR RAG Bot
-emoji: 🚀
-colorFrom: red
-colorTo: red
-sdk: docker
-app_port: 8501
-tags:
-- streamlit
-pinned: false
-short_description: Streamlit template space
----
-# Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

+# 🛫 Flykite Airlines — HR Policy Assistant (RAG + LLM)
+This Streamlit application provides grounded, citation-based HR policy answers
+using Retrieval Augmented Generation (RAG). The system is powered by:
+- Groq LLM (Llama 3.3 70B Versatile)
+- FAISS Vector Index
+- Cleaned and chunked employee policy handbook
+- SentenceTransformer embeddings
+## 🔧 How It Works
+1. User enters an HR-related question
+2. App retrieves top policy chunks using FAISS
+3. LLM answers using ONLY the retrieved context
+4. Response includes:
+   - Summary
+   - Steps (if applicable)
+   - Citations (page + chunk)
+   - Policy-grounded content
+## 🚀 Deployment
+This app runs on HuggingFace Spaces using Streamlit.
+## 🔐 API Keys
+Set the environment variable `GROQ_API_KEY` in your Space Settings.
+## 📂 Project Structure
+## 👤 Author
+Saibala Sundarajan
+Tiger Analytics

app.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import streamlit as st
+import os
+from utils import load_index_and_meta, retrieve_top_k
+from langchain_groq import ChatGroq
+# ---------------------------
+# Paths
+# ---------------------------
+META_PATH = "resources/flyk_chunks_meta.jsonl"
+CHUNKS_PATH = "resources/flyk_chunks_clean.jsonl"
+INDEX_PATH = "resources/flyk_faiss_clean.index"
+# ---------------------------
+# Load LLM
+# ---------------------------
+llm = ChatGroq(model="llama-3.3-70b-versatile", temperature=0)
+# ---------------------------
+# Load FAISS + metadata
+# ---------------------------
+meta_list, mapping, index, embed_model = load_index_and_meta(
+    META_PATH, CHUNKS_PATH, INDEX_PATH
+)
+# ---------------------------
+# Streamlit UI
+# ---------------------------
+st.set_page_config(page_title="Flykite HR Policy Assistant", layout="wide")
+st.title("🛫 Flykite Airlines — HR Policy Assistant (RAG)")
+st.write("Ask any HR policy question. Responses are grounded in the official HR Policy Handbook.")
+question = st.text_input("Enter your question:")
+if question:
+    with st.spinner("Retrieving information..."):
+        retrieved = retrieve_top_k(
+            query=question,
+            top_k=5,
+            min_score=0.25,
+            index=index,
+            embed_model=embed_model,
+            meta_list=meta_list,
+            mapping=mapping
+        )
+    # Build context
+    context = ""
+    for r in retrieved:
+        context += f"(Page {r['page']} • Chunk {r['chunk_id']}):\n{r['text']}\n\n"
+    prompt = f"""
+    You are an HR expert assistant for Flykite Airlines.
+    Use ONLY the context below to answer the question.
+    Question: {question}
+    Context:
+    {context}
+    Provide answer in:
+    1. Summary
+    2. Steps (if applicable)
+    3. Citations (page + chunk)
+    """
+    with st.spinner("Generating grounded answer..."):
+        response = llm.invoke(prompt).content
+    st.subheader("📘 Answer")
+    st.write(response)
+    with st.expander("📂 Retrieved Policy Context"):
+        for r in retrieved:
+            st.markdown(f"**Page {r['page']} | Chunk {r['chunk_id']} | Score {r['score']:.3f}**")
+            st.write(r['text'])
+            st.markdown("---")

requirements.txt CHANGED Viewed

@@ -1,3 +1,6 @@
-altair
-pandas
-streamlit

+streamlit
+sentence-transformers
+faiss-cpu
+langchain
+langchain-groq
+python-dotenv

utils.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import json
+import faiss
+import numpy as np
+import re
+from sentence_transformers import SentenceTransformer
+EMAIL_PATTERN = re.compile(r"[A-Za-z0-9._%+\-]+@[A-Za-z0-9.\-]+\.[A-Za-z]{2,}", re.IGNORECASE)
+def load_index_and_meta(meta_path, chunks_path, index_path):
+    meta_list = []
+    with open(meta_path, "r", encoding="utf-8") as f:
+        for line in f:
+            meta_list.append(json.loads(line))
+    mapping = {}
+    with open(chunks_path, "r", encoding="utf-8") as f:
+        for line in f:
+            obj = json.loads(line)
+            text = EMAIL_PATTERN.sub("[REDACTED_EMAIL]", obj["text"])
+            mapping[(obj["page"], obj["chunk_id"])] = text
+    index = faiss.read_index(index_path)
+    embed_model = SentenceTransformer("all-MiniLM-L6-v2")
+    return meta_list, mapping, index, embed_model
+def retrieve_top_k(query, top_k, min_score, index, embed_model, meta_list, mapping):
+    qvec = embed_model.encode([query], convert_to_numpy=True).astype("float32")
+    faiss.normalize_L2(qvec)
+    D, I = index.search(qvec, top_k)
+    results = []
+    for dist, idx in zip(D[0], I[0]):
+        if dist < min_score:
+            continue
+        if idx < 0 or idx >= len(meta_list):
+            continue
+        m = meta_list[idx]
+        page = m["page"]
+        chunk = m["chunk_id"]
+        text = mapping.get((page, chunk), "")
+        text = EMAIL_PATTERN.sub("[REDACTED_EMAIL]", text)
+        results.append({
+            "score": float(dist),
+            "page": page,
+            "chunk_id": chunk,
+            "text": text
+        })
+    return results