QNARag / app.py
mrshibly's picture
Upload app.py
b59430e verified
import streamlit as st
import faiss
import pickle
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
# -----------------------------------------------------------------------------
# 1. PAGE CONFIGURATION
# -----------------------------------------------------------------------------
st.set_page_config(
page_title="BD-Insight | Sovereign Intelligence",
page_icon="πŸ‡§πŸ‡©",
layout="wide",
initial_sidebar_state="expanded"
)
# Manage Session State for Quick Prompts
if 'query' not in st.session_state:
st.session_state['query'] = ""
def use_template(q):
st.session_state['query'] = q
# -----------------------------------------------------------------------------
# 2. CLEAN & MINIMAL CSS
# -----------------------------------------------------------------------------
st.markdown("""
<style>
/* 1. Hide Streamlit Branding only */
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
header {visibility: hidden;}
/* 2. Professional Sidebar (Bangladesh Green Gradient) */
section[data-testid="stSidebar"] {
background-image: linear-gradient(#006a4e, #004d38);
color: white !important;
}
/* 3. Global Text Cleanup */
.block-container {
padding-top: 2rem;
}
/* 4. Custom Source Card */
.source-card {
background-color: #f8f9fa;
border-left: 5px solid #006a4e;
padding: 15px;
margin-bottom: 10px;
border-radius: 5px;
font-size: 0.9rem;
}
</style>
""", unsafe_allow_html=True)
# -----------------------------------------------------------------------------
# 3. SIDEBAR
# -----------------------------------------------------------------------------
with st.sidebar:
st.image("https://upload.wikimedia.org/wikipedia/commons/f/f9/Flag_of_Bangladesh.svg", width=50)
st.title("BD-Insight")
st.markdown("### Sovereign Analytical Engine")
st.info("""
**Verified Knowledge Base:**
- πŸ“œ Constitution
- 🏰 History & 1971 War
- πŸ“Š Economy & Stats
""")
st.markdown("---")
st.caption("Developed by @mrshibly")
# -----------------------------------------------------------------------------
# 4. MODEL LOADING
# -----------------------------------------------------------------------------
@st.cache_resource
def load_models():
embedder = SentenceTransformer("intfloat/e5-base-v2")
tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
return embedder, tokenizer, model
@st.cache_resource
def load_index():
index = faiss.read_index("faiss.index")
with open("metadata.pkl", "rb") as f:
docs = pickle.load(f)
return index, docs
# -----------------------------------------------------------------------------
# 5. MAIN INTERFACE
# -----------------------------------------------------------------------------
# Header
st.title("πŸ‡§πŸ‡© BD-Insight")
st.markdown("##### Ask questions about Bangladesh. Answers sourced strictly from official documents.")
st.markdown("---")
# Quick Questions (Direct & Easy)
st.subheader("πŸ“ Quick Questions")
col1, col2, col3, col4 = st.columns(4)
with col1:
if st.button("Questions about Rights?", use_container_width=True):
use_template("What are the fundamental rights of a citizen?")
with col2:
if st.button("How is the Economy?", use_container_width=True):
use_template("What is the current GDP growth rate?")
with col3:
if st.button("What happened in 1971?", use_container_width=True):
use_template("Summarize the key events of the 1971 Liberation War.")
with col4:
if st.button("What does the PM do?", use_container_width=True):
use_template("What are the powers of the Prime Minister?")
# Interactive Query
with st.spinner("Initializing System..."):
embedder, tokenizer, llm = load_models()
try:
index, documents = load_index()
ready = True
except:
st.error("⚠️ Index missing. Please upload files.")
ready = False
if ready:
# Input Area
st.markdown("### πŸ” Your Question")
query = st.text_input("Type your query below:", value=st.session_state['query'], placeholder="Ex: How can the constitution be amended?", label_visibility="collapsed")
if query:
with st.spinner("Scanning Archives..."):
# 1. Embed & Search
q_emb = embedder.encode(["query: " + query])
D, I = index.search(q_emb, k=5)
retrieved = [documents[i] for i in I[0]]
# 2. Context
context_str = "\n\n".join([d['text'] for d in retrieved])
# 3. Prompt
system = f"""Question: {query}
Based on the context below, write a detailed and professional answer. Use full sentences.
If the answer is unknown, say "I cannot find this info in the documents."
Context:
{context_str}
Answer:"""
# 4. Generate
inputs = tokenizer(system, return_tensors="pt", truncation=True, max_length=1536).input_ids
outputs = llm.generate(inputs, max_new_tokens=500, min_length=60, num_beams=4, early_stopping=True)
final_answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Display Results
st.markdown("---")
c_ans, c_ref = st.columns([2, 1])
with c_ans:
st.success("Analysis Result")
st.markdown(f"**{final_answer}**")
with c_ref:
st.info("Source References")
seen = set()
for doc in retrieved:
if doc['source'] not in seen:
seen.add(doc['source'])
st.markdown(f"""
<div class="source-card">
<b>πŸ“„ {doc['source']}</b><br>
<i style="color:gray">"{doc['text'][:80]}..."</i>
</div>
""", unsafe_allow_html=True)