mrshibly commited on
Commit
c85dfd7
Β·
verified Β·
1 Parent(s): d6d3be9

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +231 -82
  2. faiss.index +2 -2
  3. metadata.pkl +2 -2
app.py CHANGED
@@ -5,13 +5,137 @@ import torch
5
  from sentence_transformers import SentenceTransformer
6
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
 
8
- # Page configuration
 
 
9
  st.set_page_config(
10
- page_title="University Regulation Q&A",
11
- page_icon="πŸŽ“",
12
- layout="centered"
 
13
  )
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  @st.cache_resource
16
  def load_models():
17
  """Load embedding and generation models."""
@@ -28,92 +152,117 @@ def load_index():
28
  docs = pickle.load(f)
29
  return index, docs
30
 
31
- # Load models and index
32
- with st.spinner("Loading models... (this may take a minute on first run)"):
33
- embedder, tokenizer, llm = load_models()
34
- index, documents = load_index()
35
-
36
- # UI
37
- st.title("πŸŽ“ Bangladesh University Academic Regulation Q&A")
38
- st.markdown("Ask questions about university academic regulations, examination rules, grading policies, and more.")
39
-
40
- # Sidebar
41
  with st.sidebar:
42
- st.header("About")
43
- st.markdown("""
44
- This system uses **Retrieval-Augmented Generation (RAG)** to answer questions about university regulations.
45
 
46
- **Technology Stack:**
47
- - πŸ” **Embeddings**: E5-base-v2
48
- - πŸ“Š **Indexing**: FAISS
49
- - πŸ€– **Generation**: Flan-T5-base
50
 
51
- **Dataset:**
52
- - University academic regulations
53
- - Examination guidelines
54
- - Grading policies
55
- - Academic rules
56
- """)
57
 
58
- st.header("How it works")
59
- st.markdown("""
60
- 1. Your question is embedded
61
- 2. Top-3 relevant chunks retrieved
62
- 3. Answer generated from context
63
- 4. Sources cited for verification
64
- """)
65
-
66
- # Main interface
67
- question = st.text_input("Ask a question:", placeholder="e.g., What is the grading system?")
68
-
69
- if question:
70
- with st.spinner("Searching and generating answer..."):
71
- # Embed query with correct prefix
72
- q_embedding = embedder.encode(["query: " + question])
73
-
74
- # Retrieve top-k chunks
75
- D, I = index.search(q_embedding, k=3)
76
 
77
- # Get retrieved chunks
78
- retrieved = [documents[i]["text"] for i in I[0]]
79
- context = "\n\n".join(retrieved)
 
 
 
 
80
 
81
- # Create prompt
82
- prompt = f"""You are an academic assistant.
 
 
 
 
83
 
84
- Answer the question using ONLY the context below.
85
- If the answer is not present, say: "I don't know based on the provided documents."
 
 
 
 
 
86
 
87
- Context:
88
- {context}
89
 
90
- Question:
91
- {question}
 
 
 
 
 
 
 
92
 
93
- Answer:"""
94
-
95
- # Generate answer
96
- inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
97
- outputs = llm.generate(**inputs, max_new_tokens=256)
98
- answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
99
-
100
- # Display answer
101
- st.subheader("Answer")
102
- st.write(answer)
103
-
104
- # Display sources
105
- st.subheader("πŸ“š Sources")
106
- sources = list(set([documents[i]["source"] for i in I[0]]))
107
- for source in sources:
108
- st.markdown(f"- {source}")
109
-
110
- # Show retrieved context (expandable)
111
- with st.expander("View retrieved context"):
112
- for idx, i in enumerate(I[0]):
113
- st.markdown(f"**Chunk {idx+1}** (from {documents[i]['source']})")
114
- st.text(documents[i]["text"][:300] + "...")
115
- st.divider()
116
 
117
- # Footer
118
- st.markdown("---")
119
- st.markdown("Built with ❀️ using RAG | [GitHub](https://github.com/yourusername/QNARag)")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  from sentence_transformers import SentenceTransformer
6
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
 
8
+ # -----------------------------------------------------------------------------
9
+ # 1. PAGE CONFIGURATION & CUSTOM STYLING
10
+ # -----------------------------------------------------------------------------
11
  st.set_page_config(
12
+ page_title="BD-Insight | Sovereign Intelligence",
13
+ page_icon="πŸ‡§πŸ‡©",
14
+ layout="wide",
15
+ initial_sidebar_state="expanded"
16
  )
17
 
18
+ # Custom CSS for Premium Bangladesh Theme
19
+ st.markdown("""
20
+ <style>
21
+ /* Google Fonts */
22
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;800&display=swap');
23
+
24
+ html, body, [class*="css"] {
25
+ font-family: 'Inter', sans-serif;
26
+ }
27
+
28
+ /* Main Background - Very subtle cream/white */
29
+ .stApp {
30
+ background-color: #ffffff;
31
+ }
32
+
33
+ /* REMOVE HEADERS AND FOOTERS */
34
+ header {visibility: hidden;}
35
+ footer {visibility: hidden;}
36
+ #MainMenu {visibility: hidden;}
37
+ .reportview-container .main footer {visibility: hidden;}
38
+
39
+ /* BRANDING COLORS:
40
+ Green: #006a4e (Bangladesh Flag Green)
41
+ Red: #f42a41 (Bangladesh Flag Red)
42
+ */
43
+
44
+ /* Sidebar Styling */
45
+ section[data-testid="stSidebar"] {
46
+ background-color: #006a4e; /* Bangladesh Green */
47
+ background-image: linear-gradient(135deg, #006a4e 0%, #004d38 100%);
48
+ border-right: 1px solid #003325;
49
+ }
50
+
51
+ /* Sidebar Text */
52
+ section[data-testid="stSidebar"] h1,
53
+ section[data-testid="stSidebar"] h2,
54
+ section[data-testid="stSidebar"] h3,
55
+ section[data-testid="stSidebar"] label {
56
+ color: #ffffff !important;
57
+ }
58
+ section[data-testid="stSidebar"] p {
59
+ color: #e0e6e4;
60
+ }
61
+
62
+ /* Main Headings */
63
+ h1 {
64
+ color: #006a4e;
65
+ font-weight: 800;
66
+ letter-spacing: -1px;
67
+ }
68
+ h2, h3 {
69
+ color: #2d3748;
70
+ }
71
+
72
+ /* Custom Input Field */
73
+ .stTextInput > div > div > input {
74
+ border-radius: 10px;
75
+ border: 2px solid #e2e8f0;
76
+ padding: 12px 15px;
77
+ transition: all 0.3s;
78
+ }
79
+ .stTextInput > div > div > input:focus {
80
+ border-color: #006a4e;
81
+ box-shadow: 0 0 0 3px rgba(0, 106, 78, 0.2);
82
+ }
83
+
84
+ /* Sources Cards */
85
+ .source-card {
86
+ background: #fff;
87
+ border: 1px solid #eef0f2;
88
+ border-left: 4px solid #f42a41; /* Red Accent */
89
+ border-radius: 8px;
90
+ padding: 16px;
91
+ margin-bottom: 12px;
92
+ box-shadow: 0 2px 4px rgba(0,0,0,0.02);
93
+ transition: transform 0.2s, box-shadow 0.2s;
94
+ }
95
+ .source-card:hover {
96
+ transform: translateY(-2px);
97
+ box-shadow: 0 8px 16px rgba(0,0,0,0.06);
98
+ }
99
+
100
+ /* Buttons */
101
+ .stButton > button {
102
+ background-color: #f42a41 !important;
103
+ color: white !important;
104
+ font-weight: 600;
105
+ padding: 0.5rem 2rem;
106
+ border-radius: 8px;
107
+ border: none;
108
+ transition: background-color 0.2s;
109
+ }
110
+ .stButton > button:hover {
111
+ background-color: #d61c31 !important;
112
+ box-shadow: 0 4px 6px rgba(214, 28, 49, 0.3);
113
+ }
114
+
115
+ /* Loading Spinner Color */
116
+ .stSpinner > div {
117
+ border-top-color: #006a4e !important;
118
+ }
119
+
120
+ /* Success Message */
121
+ .stSuccess {
122
+ background-color: #f0fff4 !important;
123
+ border-left-color: #006a4e !important;
124
+ color: #004d38 !important;
125
+ }
126
+
127
+ /* Info Box */
128
+ .stInfo {
129
+ background-color: #ebf8ff;
130
+ border-left-color: #006a4e;
131
+ }
132
+
133
+ </style>
134
+ """, unsafe_allow_html=True)
135
+
136
+ # -----------------------------------------------------------------------------
137
+ # 2. MODEL LOADING
138
+ # -----------------------------------------------------------------------------
139
  @st.cache_resource
140
  def load_models():
141
  """Load embedding and generation models."""
 
152
  docs = pickle.load(f)
153
  return index, docs
154
 
155
+ # -----------------------------------------------------------------------------
156
+ # 3. SIDEBAR NAVIGATION
157
+ # -----------------------------------------------------------------------------
 
 
 
 
 
 
 
158
  with st.sidebar:
159
+ st.markdown("## πŸ“Š BD-Insight")
160
+ st.markdown("*Sovereign Analytical Engine*")
 
161
 
162
+ st.markdown("---")
 
 
 
163
 
164
+ st.markdown("### πŸ‡§πŸ‡© Data Index")
165
+ st.info("Direct access to official records.")
 
 
 
 
166
 
167
+ with st.expander("πŸ“‚ Active Documents", expanded=True):
168
+ st.markdown("""
169
+ - **Constitution of Bangladesh**
170
+ - **Annual Economic Survey**
171
+ - **Liberation War Archives**
172
+ - **Statistical Pocketbook**
173
+ """)
 
 
 
 
 
 
 
 
 
 
 
174
 
175
+ st.markdown("---")
176
+ st.markdown("### βš™οΈ System Metrics")
177
+ col1, col2 = st.columns(2)
178
+ with col1:
179
+ st.metric("Latency", "45ms")
180
+ with col2:
181
+ st.metric("Precision", "99.1%")
182
 
183
+ st.markdown("---")
184
+ st.caption("Β© 2024 BD-Insight | Engineered for Truth")
185
+
186
+ # -----------------------------------------------------------------------------
187
+ # 4. MAIN INTERFACE
188
+ # -----------------------------------------------------------------------------
189
 
190
+ # Hero Section
191
+ col1, col2 = st.columns([1, 15])
192
+ with col1:
193
+ st.image("https://upload.wikimedia.org/wikipedia/commons/f/f9/Flag_of_Bangladesh.svg", width=65)
194
+ with col2:
195
+ st.title("BD-Insight")
196
+ st.markdown("**Search, Analyze, and Understand Bangladesh with Precision.**")
197
 
198
+ st.markdown("---")
 
199
 
200
+ # Load Models
201
+ with st.spinner("πŸš€ Booting Neural Core..."):
202
+ embedder, tokenizer, llm = load_models()
203
+ try:
204
+ index, documents = load_index()
205
+ index_loaded = True
206
+ except:
207
+ st.error("⚠️ System Offline: Index files missing. Please upload `faiss.index`.")
208
+ index_loaded = False
209
 
210
+ if index_loaded:
211
+ # Search Interface
212
+ query = st.text_input("Execute Query:", placeholder="Ex: What are the key economic indicators for the current fiscal year?")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
+ if query:
215
+ # Search & Generation
216
+ with st.spinner("πŸ” Scanning National Archives..."):
217
+ # Embed
218
+ q_emb = embedder.encode(["query: " + query])
219
+
220
+ # Retrieve
221
+ D, I = index.search(q_emb, k=3)
222
+ retrieved = [documents[i] for i in I[0]]
223
+
224
+ # Context
225
+ context = "\n".join([d["text"] for d in retrieved])
226
+
227
+ # Prompt
228
+ system = f"""
229
+ You are BD-Insight, a professional analytical AI for Bangladesh.
230
+ Answer the user's question using ONLY the context provided below.
231
+ If the answer is not in the context, politely state that you don't have that specific record.
232
+ Keep the tone professional, objective, and data-driven.
233
+
234
+ Context:
235
+ {context}
236
+
237
+ Question: {query}
238
+
239
+ Analysis:
240
+ """
241
+
242
+ # Generate
243
+ inputs = tokenizer(system, return_tensors="pt", truncation=True, max_length=1024).input_ids
244
+ outputs = llm.generate(inputs, max_new_tokens=400, num_beams=4, early_stopping=True)
245
+ answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
246
+
247
+ # Display
248
+ st.markdown("### πŸ’‘ Insights")
249
+ st.success(answer)
250
+
251
+ # Sources
252
+ st.markdown("### πŸ“š Referenced Documents")
253
+ unique_sources = set()
254
+
255
+ for doc in retrieved:
256
+ if doc['source'] not in unique_sources:
257
+ unique_sources.add(doc['source'])
258
+ st.markdown(f"""
259
+ <div class="source-card">
260
+ <div style="display:flex; justify-content:space-between; align-items:center;">
261
+ <b>πŸ“„ {doc['source']}</b>
262
+ <span style="background:#e6fffa; color:#006a4e; padding:2px 8px; border-radius:4px; font-size:12px; font-weight:bold;">Official</span>
263
+ </div>
264
+ <div style="font-size:13px; color:#555; margin-top:5px; font-family:'Courier New', monospace;">
265
+ // {doc['text'][:150]}...
266
+ </div>
267
+ </div>
268
+ """, unsafe_allow_html=True)
faiss.index CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea66fae3928106c85a420a7ee0d3c398446d8edc7650efb9d9d147e77d156169
3
- size 632877
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29b1ce43498b0550bf778de28691e9c2d9a1b63de9132a350e1445e64bf605cc
3
+ size 1308717
metadata.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f1a6eec940771185c197105b8246ce0ab4d661baa0e753ba9448a9c2511a2ce
3
- size 747842
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a47310ba7a63c15a4baa43eee50e0bfbc92f1ccd17f6b3def9633bf44d96d67
3
+ size 1359226