| import streamlit as st
|
| import faiss
|
| import pickle
|
| from sentence_transformers import SentenceTransformer
|
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
|
|
|
|
|
|
|
| st.set_page_config(
|
| page_title="BD-Insight | Sovereign Intelligence",
|
| page_icon="π§π©",
|
| layout="wide",
|
| initial_sidebar_state="expanded"
|
| )
|
|
|
|
|
| if 'query' not in st.session_state:
|
| st.session_state['query'] = ""
|
|
|
| def use_template(q):
|
| st.session_state['query'] = q
|
|
|
|
|
|
|
|
|
| st.markdown("""
|
| <style>
|
| /* 1. Hide Streamlit Branding only */
|
| #MainMenu {visibility: hidden;}
|
| footer {visibility: hidden;}
|
| header {visibility: hidden;}
|
|
|
| /* 2. Professional Sidebar (Bangladesh Green Gradient) */
|
| section[data-testid="stSidebar"] {
|
| background-image: linear-gradient(#006a4e, #004d38);
|
| color: white !important;
|
| }
|
|
|
| /* 3. Global Text Cleanup */
|
| .block-container {
|
| padding-top: 2rem;
|
| }
|
|
|
| /* 4. Custom Source Card */
|
| .source-card {
|
| background-color: #f8f9fa;
|
| border-left: 5px solid #006a4e;
|
| padding: 15px;
|
| margin-bottom: 10px;
|
| border-radius: 5px;
|
| font-size: 0.9rem;
|
| }
|
| </style>
|
| """, unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
| with st.sidebar:
|
| st.image("https://upload.wikimedia.org/wikipedia/commons/f/f9/Flag_of_Bangladesh.svg", width=50)
|
| st.title("BD-Insight")
|
| st.markdown("### Sovereign Analytical Engine")
|
|
|
| st.info("""
|
| **Verified Knowledge Base:**
|
| - π Constitution
|
| - π° History & 1971 War
|
| - π Economy & Stats
|
| """)
|
|
|
| st.markdown("---")
|
| st.caption("Developed by @mrshibly")
|
|
|
|
|
|
|
|
|
| @st.cache_resource
|
| def load_models():
|
| embedder = SentenceTransformer("intfloat/e5-base-v2")
|
| tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
|
| model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
|
| return embedder, tokenizer, model
|
|
|
| @st.cache_resource
|
| def load_index():
|
| index = faiss.read_index("faiss.index")
|
| with open("metadata.pkl", "rb") as f:
|
| docs = pickle.load(f)
|
| return index, docs
|
|
|
|
|
|
|
|
|
|
|
|
|
| st.title("π§π© BD-Insight")
|
| st.markdown("##### Ask questions about Bangladesh. Answers sourced strictly from official documents.")
|
|
|
| st.markdown("---")
|
|
|
|
|
| st.subheader("π Quick Questions")
|
| col1, col2, col3, col4 = st.columns(4)
|
|
|
| with col1:
|
| if st.button("Questions about Rights?", use_container_width=True):
|
| use_template("What are the fundamental rights of a citizen?")
|
| with col2:
|
| if st.button("How is the Economy?", use_container_width=True):
|
| use_template("What is the current GDP growth rate?")
|
| with col3:
|
| if st.button("What happened in 1971?", use_container_width=True):
|
| use_template("Summarize the key events of the 1971 Liberation War.")
|
| with col4:
|
| if st.button("What does the PM do?", use_container_width=True):
|
| use_template("What are the powers of the Prime Minister?")
|
|
|
|
|
| with st.spinner("Initializing System..."):
|
| embedder, tokenizer, llm = load_models()
|
| try:
|
| index, documents = load_index()
|
| ready = True
|
| except:
|
| st.error("β οΈ Index missing. Please upload files.")
|
| ready = False
|
|
|
| if ready:
|
|
|
| st.markdown("### π Your Question")
|
| query = st.text_input("Type your query below:", value=st.session_state['query'], placeholder="Ex: How can the constitution be amended?", label_visibility="collapsed")
|
|
|
| if query:
|
| with st.spinner("Scanning Archives..."):
|
|
|
| q_emb = embedder.encode(["query: " + query])
|
| D, I = index.search(q_emb, k=5)
|
| retrieved = [documents[i] for i in I[0]]
|
|
|
|
|
| context_str = "\n\n".join([d['text'] for d in retrieved])
|
|
|
|
|
| system = f"""Question: {query}
|
|
|
| Based on the context below, write a detailed and professional answer. Use full sentences.
|
| If the answer is unknown, say "I cannot find this info in the documents."
|
|
|
| Context:
|
| {context_str}
|
|
|
| Answer:"""
|
|
|
|
|
| inputs = tokenizer(system, return_tensors="pt", truncation=True, max_length=1536).input_ids
|
| outputs = llm.generate(inputs, max_new_tokens=500, min_length=60, num_beams=4, early_stopping=True)
|
| final_answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
|
| st.markdown("---")
|
| c_ans, c_ref = st.columns([2, 1])
|
|
|
| with c_ans:
|
| st.success("Analysis Result")
|
| st.markdown(f"**{final_answer}**")
|
|
|
| with c_ref:
|
| st.info("Source References")
|
| seen = set()
|
| for doc in retrieved:
|
| if doc['source'] not in seen:
|
| seen.add(doc['source'])
|
| st.markdown(f"""
|
| <div class="source-card">
|
| <b>π {doc['source']}</b><br>
|
| <i style="color:gray">"{doc['text'][:80]}..."</i>
|
| </div>
|
| """, unsafe_allow_html=True)
|
|
|