sofzcc commited on
Commit
9670a0e
·
verified ·
1 Parent(s): ef49b02

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -74
app.py CHANGED
@@ -4,19 +4,17 @@ from typing import List, Tuple
4
 
5
  import gradio as gr
6
  import numpy as np
7
- from sentence_transformers import SentenceTransformer
8
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
9
- import torch
10
 
11
  # -----------------------------
12
  # CONFIG
13
  # -----------------------------
14
  KB_DIR = "./kb" # folder with .txt or .md files
15
  EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
16
- GEN_MODEL_NAME = "google/flan-t5-base"
17
  TOP_K = 3
18
  CHUNK_SIZE = 500 # characters
19
  CHUNK_OVERLAP = 100 # characters
 
20
 
21
  # -----------------------------
22
  # UTILITIES
@@ -151,85 +149,118 @@ class KBIndex:
151
  # Initialize KB index
152
  print("Initializing KB index...")
153
  kb_index = KBIndex()
154
-
155
- # Initialize generation model
156
- print("Loading generation model...")
157
- gen_tokenizer = AutoTokenizer.from_pretrained(GEN_MODEL_NAME)
158
- gen_model = AutoModelForSeq2SeqLM.from_pretrained(GEN_MODEL_NAME)
159
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
160
- gen_model.to(device)
161
- gen_model.eval()
162
- print(f"Generation model ready on {device}.")
163
 
164
  # -----------------------------
165
- # CHAT LOGIC
166
  # -----------------------------
167
 
168
- def build_context_from_results(results: List[Tuple[str, str, float]]) -> str:
169
- """
170
- Turn retrieved chunks into a compact context string for the LLM.
171
  """
172
- context_parts = []
173
- for chunk, source, score in results:
174
- cleaned = chunk.strip()
175
- context_parts.append(f"From {source}:\n{cleaned}")
176
- return "\n\n".join(context_parts)
177
-
178
-
179
- def build_answer(query: str) -> str:
180
  """
181
- Use the KB index to retrieve relevant chunks,
182
- then ask FLAN-T5 to write a natural answer based ONLY on that context.
183
- """
184
- results = kb_index.search(query, top_k=TOP_K)
185
-
186
  if not results:
187
  return (
188
- "I couldn't find anything relevant in the knowledge base for this query yet.\n\n"
189
- "If this were connected to your real KB, this would be a good moment to:\n"
190
- "- Create a new article, or\n"
191
- "- Improve the existing documentation for this topic."
 
 
192
  )
193
-
194
- # Build context for the model
195
- context = build_context_from_results(results)
196
-
197
- # Short list of sources for citation
198
- source_names = list({src for _, src, _ in results})
199
- source_line = "Based on: " + ", ".join(source_names)
200
-
201
- # Prompt for FLAN-T5
202
- prompt = (
203
- "You are a helpful knowledge base assistant.\n"
204
- "Using ONLY the information in the context below, answer the user's question "
205
- "in a clear, concise, and natural way. Focus on practical guidance.\n\n"
206
- f"Context:\n{context}\n\n"
207
- f"Question: {query}\n\n"
208
- "Answer in 2–5 short paragraphs. If something is not covered in the context, say that.\n"
209
- )
210
-
211
- inputs = gen_tokenizer(
212
- prompt,
213
- return_tensors="pt",
214
- truncation=True,
215
- max_length=2048,
216
- ).to(device)
217
-
218
- with torch.no_grad():
219
- output_ids = gen_model.generate(
220
- **inputs,
221
- max_length=512,
222
- temperature=0.7,
223
- top_p=0.95,
224
- num_beams=4,
225
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
- answer_text = gen_tokenizer.decode(output_ids[0], skip_special_tokens=True).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
- # Add source citation at the end
230
- final_answer = f"{answer_text}\n\n— {source_line}"
231
 
232
- return final_answer
 
 
 
 
 
 
 
 
 
233
 
234
 
235
  def chat_respond(message: str, history):
@@ -259,10 +290,15 @@ def chat_respond(message: str, history):
259
  # -----------------------------
260
 
261
  description = """
262
- Ask questions as if you were talking to a knowledge base assistant.
263
- In a real scenario, this assistant would be connected to your own
264
- help center or internal documentation. Here, it's using a small demo
265
- knowledge base to show how retrieval-based self-service can work.
 
 
 
 
 
266
  """
267
 
268
  # Create ChatInterface (without 'type' parameter for compatibility)
 
4
 
5
  import gradio as gr
6
  import numpy as np
7
+ from sentence_transformers import SentenceTransformer
 
 
8
 
9
  # -----------------------------
10
  # CONFIG
11
  # -----------------------------
12
  KB_DIR = "./kb" # folder with .txt or .md files
13
  EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 
14
  TOP_K = 3
15
  CHUNK_SIZE = 500 # characters
16
  CHUNK_OVERLAP = 100 # characters
17
+ MIN_SIMILARITY_THRESHOLD = 0.3 # Minimum similarity score to include results
18
 
19
  # -----------------------------
20
  # UTILITIES
 
149
  # Initialize KB index
150
  print("Initializing KB index...")
151
  kb_index = KBIndex()
152
+ print("✅ KB Assistant ready!")
 
 
 
 
 
 
 
 
153
 
154
  # -----------------------------
155
+ # CHAT LOGIC (Retrieval-Only, No LLM)
156
  # -----------------------------
157
 
158
+ def format_answer_from_results(query: str, results: List[Tuple[str, str, float]]) -> str:
 
 
159
  """
160
+ Format a helpful answer from retrieved chunks without using an LLM.
161
+ This is much faster and works well for knowledge base lookup.
 
 
 
 
 
 
162
  """
 
 
 
 
 
163
  if not results:
164
  return (
165
+ "❌ **I couldn't find anything relevant in the knowledge base for this query.**\n\n"
166
+ "**Suggestions:**\n"
167
+ "- Try rephrasing your question\n"
168
+ "- Use different keywords\n"
169
+ "- Check if the information exists in the knowledge base\n\n"
170
+ "If this information should be available, consider adding it to the KB."
171
  )
172
+
173
+ # Filter by similarity threshold
174
+ filtered_results = [(chunk, src, score) for chunk, src, score in results if score >= MIN_SIMILARITY_THRESHOLD]
175
+
176
+ if not filtered_results:
177
+ return (
178
+ "⚠️ **I found some related content, but it doesn't seem very relevant to your question.**\n\n"
179
+ "**Try:**\n"
180
+ "- Being more specific in your question\n"
181
+ "- Using different terminology\n"
182
+ "- Breaking down complex questions into simpler parts"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  )
184
+
185
+ # Build a concise, readable answer
186
+ answer_parts = []
187
+
188
+ # Get the best (highest scoring) result
189
+ best_chunk, best_source, best_score = filtered_results[0]
190
+
191
+ # Clean and format the content
192
+ cleaned_content = clean_markdown(best_chunk)
193
+
194
+ # Create header
195
+ relevance_emoji = "🟢" if best_score > 0.7 else "🟡" if best_score > 0.5 else "🟠"
196
+ answer_parts.append(f"{relevance_emoji} **Answer from: {best_source}**\n")
197
+
198
+ # Add the main content
199
+ answer_parts.append(cleaned_content)
200
+
201
+ # If there are additional relevant sources, mention them
202
+ if len(filtered_results) > 1:
203
+ other_sources = [src for _, src, _ in filtered_results[1:]]
204
+ unique_sources = list(set(other_sources))
205
+ if unique_sources:
206
+ answer_parts.append(f"\n\n💡 **Additional information available in:** {', '.join(unique_sources)}")
207
+
208
+ # Add footer
209
+ answer_parts.append("\n\n---")
210
+ all_sources = list(set([src for _, src, _ in filtered_results]))
211
+ answer_parts.append(f"📚 **Sources:** {', '.join(all_sources)}")
212
+
213
+ return "\n".join(answer_parts)
214
+
215
 
216
+ def clean_markdown(text: str) -> str:
217
+ """
218
+ Clean up markdown text for better readability.
219
+ Removes excessive formatting while keeping structure.
220
+ """
221
+ lines = text.split('\n')
222
+ cleaned_lines = []
223
+
224
+ for line in lines:
225
+ line = line.strip()
226
+ if not line:
227
+ continue
228
+
229
+ # Convert markdown headers to bold text
230
+ if line.startswith('#'):
231
+ # Remove # symbols and make bold
232
+ header_text = line.lstrip('#').strip()
233
+ if header_text:
234
+ cleaned_lines.append(f"\n**{header_text}**")
235
+ # Keep list items
236
+ elif line.startswith('-') or line.startswith('*'):
237
+ cleaned_lines.append(line)
238
+ # Keep numbered lists
239
+ elif line[0].isdigit() and '.' in line[:3]:
240
+ cleaned_lines.append(line)
241
+ # Regular text
242
+ else:
243
+ cleaned_lines.append(line)
244
+
245
+ # Join and clean up excessive newlines
246
+ result = '\n'.join(cleaned_lines)
247
+ # Remove multiple consecutive newlines
248
+ while '\n\n\n' in result:
249
+ result = result.replace('\n\n\n', '\n\n')
250
+
251
+ return result.strip()
252
 
 
 
253
 
254
+ def build_answer(query: str) -> str:
255
+ """
256
+ Fast retrieval-based answer without LLM generation.
257
+ Returns formatted results from the knowledge base.
258
+ """
259
+ # Search the KB
260
+ results = kb_index.search(query, top_k=TOP_K)
261
+
262
+ # Format and return the answer
263
+ return format_answer_from_results(query, results)
264
 
265
 
266
  def chat_respond(message: str, history):
 
290
  # -----------------------------
291
 
292
  description = """
293
+ 🚀 **Fast Knowledge Base Search Assistant**
294
+
295
+ Ask questions and get instant answers from the knowledge base.
296
+ This assistant uses semantic search to find the most relevant information quickly.
297
+
298
+ **Tips for better results:**
299
+ - Be specific in your questions
300
+ - Use keywords related to your topic
301
+ - Ask one question at a time
302
  """
303
 
304
  # Create ChatInterface (without 'type' parameter for compatibility)