Spaces:
Sleeping
Sleeping
vthamaraikannan1@gmail.com commited on
Commit ·
646b9b3
1
Parent(s): eab9192
Enhance streamlit_app.py with improved document context handling and UI updates; add .gitignore for environment and build files
Browse files- .gitignore +37 -0
- src/streamlit_app.py +141 -118
.gitignore
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ignore Python cache and virtual environments
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
*.pyd
|
| 6 |
+
*.db
|
| 7 |
+
*.sqlite3
|
| 8 |
+
|
| 9 |
+
# Ignore virtual environment folders
|
| 10 |
+
venv/
|
| 11 |
+
env/
|
| 12 |
+
ENV/
|
| 13 |
+
.venv/
|
| 14 |
+
.idea/
|
| 15 |
+
.vscode/
|
| 16 |
+
|
| 17 |
+
# Ignore OS generated files
|
| 18 |
+
.DS_Store
|
| 19 |
+
Thumbs.db
|
| 20 |
+
|
| 21 |
+
# Ignore logs and temp files
|
| 22 |
+
*.log
|
| 23 |
+
*.tmp
|
| 24 |
+
|
| 25 |
+
# Ignore test and coverage files
|
| 26 |
+
.coverage
|
| 27 |
+
htmlcov/
|
| 28 |
+
*.egg-info/
|
| 29 |
+
dist/
|
| 30 |
+
build/
|
| 31 |
+
|
| 32 |
+
# Ignore node modules if present
|
| 33 |
+
node_modules/
|
| 34 |
+
|
| 35 |
+
# Ignore Docker files themselves
|
| 36 |
+
Dockerfile
|
| 37 |
+
.dockerignore
|
src/streamlit_app.py
CHANGED
|
@@ -16,10 +16,12 @@ nltk.download("punkt_tab", quiet=True)
|
|
| 16 |
|
| 17 |
|
| 18 |
|
|
|
|
| 19 |
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
|
| 20 |
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
|
| 21 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 22 |
|
|
|
|
| 23 |
# -------------------------------
|
| 24 |
# Page Configuration
|
| 25 |
# -------------------------------
|
|
@@ -214,7 +216,7 @@ def initialize_pinecone():
|
|
| 214 |
|
| 215 |
@st.cache_resource(show_spinner=False)
|
| 216 |
def initialize_bm25():
|
| 217 |
-
with open("src
|
| 218 |
bm25 = pickle.load(f)
|
| 219 |
return bm25
|
| 220 |
|
|
@@ -273,20 +275,47 @@ def generate_ai_response(query, relevant_docs):
|
|
| 273 |
|
| 274 |
# Prepare context from relevant documents
|
| 275 |
context_parts = []
|
|
|
|
| 276 |
for i, doc in enumerate(relevant_docs, 1):
|
| 277 |
metadata = doc["metadata"]
|
| 278 |
text = metadata.get("text")
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 284 |
|
| 285 |
# Create the prompt for Groq
|
| 286 |
prompt = f"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
CONTEXT DOCUMENTS:
|
| 288 |
{context}
|
|
|
|
| 289 |
USER QUESTION: {query}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 290 |
"""
|
| 291 |
|
| 292 |
try:
|
|
@@ -295,7 +324,7 @@ def generate_ai_response(query, relevant_docs):
|
|
| 295 |
messages=[
|
| 296 |
{
|
| 297 |
"role": "system",
|
| 298 |
-
"content": """You are a professional assistant that answers user questions based **only on the content of provided document excerpts**. The user will ask a question, and you will also receive related text chunks retrieved from company documents or PDFs.
|
| 299 |
|
| 300 |
Instructions:
|
| 301 |
1. Use **only** the retrieved chunks to answer the user’s question. Do **not** add information from memory or outside sources.
|
|
@@ -327,9 +356,9 @@ def generate_ai_response(query, relevant_docs):
|
|
| 327 |
# -------------------------------
|
| 328 |
st.markdown("""
|
| 329 |
<div class="main-header">
|
| 330 |
-
<h1 style="margin: 0; font-size: 1.9rem;">
|
| 331 |
<p style="margin: 0.5rem 0 0 0; font-size: 1.1rem; opacity: 0.9;">
|
| 332 |
-
|
| 333 |
</p>
|
| 334 |
</div>
|
| 335 |
""", unsafe_allow_html=True)
|
|
@@ -338,57 +367,63 @@ st.markdown("""
|
|
| 338 |
# Sidebar for filters and mode toggle
|
| 339 |
# -------------------------------
|
| 340 |
def clear_all_filters():
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
st.session_state.company_filter = ""
|
| 343 |
st.session_state.fiscal_year_filter = ""
|
| 344 |
-
st.session_state.
|
| 345 |
-
st.session_state.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
|
| 347 |
with st.sidebar:
|
| 348 |
-
# Mode toggle
|
| 349 |
-
st.markdown("### 🤖 Search Mode")
|
| 350 |
-
|
| 351 |
-
chat_mode = st.toggle(
|
| 352 |
-
"💬 AI Chat Mode",
|
| 353 |
-
value=st.session_state.chat_mode,
|
| 354 |
-
help="Enable AI chat responses based on document content"
|
| 355 |
-
)
|
| 356 |
-
st.session_state.chat_mode = chat_mode
|
| 357 |
-
|
| 358 |
-
if chat_mode:
|
| 359 |
-
st.success("🤖 AI Chat Mode: ON\nGet AI-generated responses based on document content")
|
| 360 |
-
else:
|
| 361 |
-
st.info("📋 Search Mode: Document results only")
|
| 362 |
-
|
| 363 |
-
st.markdown("---")
|
| 364 |
st.markdown("### 🎯 Search Filters")
|
| 365 |
|
| 366 |
doc_type = st.selectbox(
|
| 367 |
-
"
|
| 368 |
-
["
|
| 369 |
key="doc_type_filter"
|
| 370 |
)
|
| 371 |
|
| 372 |
-
#
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
| 380 |
-
|
| 381 |
-
#
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 389 |
|
| 390 |
-
# Clear filters button
|
| 391 |
-
st.button("🗑️ Clear All Filters", on_click=clear_all_filters)
|
| 392 |
|
| 393 |
# Model info
|
| 394 |
st.markdown("---")
|
|
@@ -431,18 +466,49 @@ if search_clicked or (query and len(query.strip()) > 0):
|
|
| 431 |
else:
|
| 432 |
# Build filter dictionary
|
| 433 |
filter_dict = {}
|
|
|
|
|
|
|
| 434 |
if doc_type and doc_type != "All Types":
|
| 435 |
filter_dict["doc_type"] = {"$eq": doc_type}
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
# if fiscal_year.strip():
|
| 439 |
-
# filter_dict["fiscal_year"] = {"$eq": fiscal_year.strip()}
|
| 440 |
-
if page_no.strip():
|
| 441 |
try:
|
| 442 |
filter_dict["page_no"] = {"$eq": int(page_no.strip())}
|
| 443 |
except ValueError:
|
| 444 |
st.error("⚠️ Page number must be a valid integer.")
|
| 445 |
st.stop()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 446 |
|
| 447 |
|
| 448 |
|
|
@@ -459,67 +525,35 @@ if search_clicked or (query and len(query.strip()) > 0):
|
|
| 459 |
ai_response = generate_ai_response(query, relevant_docs)
|
| 460 |
|
| 461 |
# Display AI response
|
| 462 |
-
st.markdown(ai_response,unsafe_allow_html=True)
|
|
|
|
| 463 |
|
| 464 |
|
| 465 |
|
| 466 |
st.markdown("---")
|
| 467 |
-
|
| 468 |
-
if filter_dict:
|
| 469 |
-
st.markdown("### 📌 Applied Filters")
|
| 470 |
-
filter_chips = ""
|
| 471 |
-
for key, value in filter_dict.items():
|
| 472 |
-
filter_value = value.get("$eq", "")
|
| 473 |
-
filter_chips += f'<span class="metadata-chip">{key}: {filter_value}</span>'
|
| 474 |
-
st.markdown(filter_chips, unsafe_allow_html=True)
|
| 475 |
|
| 476 |
if relevant_docs:
|
| 477 |
search_time = time.time() - start_time
|
| 478 |
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
<div class="stats-container">
|
| 482 |
-
<div style="display: flex; justify-content: space-between; align-items: center;">
|
| 483 |
-
<div>
|
| 484 |
-
<strong>🎯 Found {len(relevant_docs)} relevant results</strong>
|
| 485 |
-
</div>
|
| 486 |
-
<div>
|
| 487 |
-
<strong>⚡ {search_time:.2f}s</strong>
|
| 488 |
-
</div>
|
| 489 |
-
</div>
|
| 490 |
-
</div>
|
| 491 |
-
""", unsafe_allow_html=True)
|
| 492 |
# Display source documents
|
| 493 |
if st.session_state.chat_mode:
|
| 494 |
-
st.markdown("###
|
| 495 |
-
else:
|
| 496 |
-
|
| 497 |
|
| 498 |
for i, result in enumerate(relevant_docs, start=1):
|
| 499 |
metadata = result["metadata"]
|
| 500 |
text_content = metadata.get("text", "No text available")
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
<div >
|
| 506 |
-
<div style="display: flex; justify-content: between; align-items: flex-start; margin-bottom: 1rem;">
|
| 507 |
-
<h4 style="margin: 0; color: #f2f5f7; flex-grow: 1;">{"" if st.session_state.chat_mode else "Result"} #{i}</h4>
|
| 508 |
-
</div>
|
| 509 |
-
""", unsafe_allow_html=True)
|
| 510 |
-
|
| 511 |
-
# Display metadata as chips
|
| 512 |
-
st.markdown("#### 📊 Metadata:")
|
| 513 |
-
metadata_chips = ""
|
| 514 |
-
for key, value in metadata.items():
|
| 515 |
-
if key != "text": # Don't show text in metadata chips
|
| 516 |
-
metadata_chips += f'<span class="metadata-chip">{key}: {value}</span>'
|
| 517 |
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
# Display text content
|
| 522 |
-
st.markdown(f"#### 📝 Content:")
|
| 523 |
st.markdown(f'<div style="background: #303336; padding: 1rem; border-radius: 8px; margin: 1rem 0; line-height: 1.6;">{text_content}</div>', unsafe_allow_html=True)
|
| 524 |
|
| 525 |
|
|
@@ -554,24 +588,13 @@ if search_clicked or (query and len(query.strip()) > 0):
|
|
| 554 |
if not query:
|
| 555 |
st.markdown("---")
|
| 556 |
st.markdown("### 💡 How to Use")
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
- Results show document excerpts and metadata
|
| 565 |
-
- Use filters to narrow down results
|
| 566 |
-
""")
|
| 567 |
-
|
| 568 |
-
with col2:
|
| 569 |
-
st.markdown("""
|
| 570 |
-
**💬 AI Chat Mode:**
|
| 571 |
-
- Ask natural language questions
|
| 572 |
-
- Get AI-generated answers based on documents
|
| 573 |
-
- View source documents used for the response
|
| 574 |
-
""")
|
| 575 |
|
| 576 |
# -------------------------------
|
| 577 |
# Footer
|
|
|
|
| 16 |
|
| 17 |
|
| 18 |
|
| 19 |
+
|
| 20 |
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
|
| 21 |
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
|
| 22 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 23 |
|
| 24 |
+
|
| 25 |
# -------------------------------
|
| 26 |
# Page Configuration
|
| 27 |
# -------------------------------
|
|
|
|
| 216 |
|
| 217 |
@st.cache_resource(show_spinner=False)
|
| 218 |
def initialize_bm25():
|
| 219 |
+
with open(r"D:\rag_hugging\AI-DocumentSearch\src\bm25_model.pkl", "rb") as f:
|
| 220 |
bm25 = pickle.load(f)
|
| 221 |
return bm25
|
| 222 |
|
|
|
|
| 275 |
|
| 276 |
# Prepare context from relevant documents
|
| 277 |
context_parts = []
|
| 278 |
+
sources = []
|
| 279 |
for i, doc in enumerate(relevant_docs, 1):
|
| 280 |
metadata = doc["metadata"]
|
| 281 |
text = metadata.get("text")
|
| 282 |
+
doc_id = metadata.get("doc_id")
|
| 283 |
+
title = metadata.get("title")
|
| 284 |
+
fiscal_year = metadata.get("fiscal_year")
|
| 285 |
+
page_no = metadata.get("page_no")
|
| 286 |
+
|
| 287 |
+
# Context for LLM
|
| 288 |
+
context_parts.append(f"[CHUNK {i} DOC {doc_id} {title} fiscal year {fiscal_year} ] (Page {page_no})\n{text}")
|
| 289 |
+
|
| 290 |
+
# Collect for UI
|
| 291 |
+
sources.append({
|
| 292 |
+
"id": i,
|
| 293 |
+
"title": title,
|
| 294 |
+
"page": page_no,
|
| 295 |
+
"doc_type": metadata.get("doc_type", ""),
|
| 296 |
+
})
|
| 297 |
+
|
| 298 |
+
context = "\n\n".join(context_parts)
|
| 299 |
|
| 300 |
# Create the prompt for Groq
|
| 301 |
prompt = f"""
|
| 302 |
+
|
| 303 |
+
You will answer the question using ONLY the provided document excerpts.
|
| 304 |
+
|
| 305 |
+
When you use information from a document, cite it with the format [DOC i],
|
| 306 |
+
where i corresponds to the document number given in CONTEXT DOCUMENTS.
|
| 307 |
+
|
| 308 |
+
If multiple docs are relevant, cite all of them (e.g., [DOC 1][DOC 3]).
|
| 309 |
+
|
| 310 |
+
|
| 311 |
CONTEXT DOCUMENTS:
|
| 312 |
{context}
|
| 313 |
+
|
| 314 |
USER QUESTION: {query}
|
| 315 |
+
|
| 316 |
+
ANSWER : " "
|
| 317 |
+
|
| 318 |
+
|
| 319 |
"""
|
| 320 |
|
| 321 |
try:
|
|
|
|
| 324 |
messages=[
|
| 325 |
{
|
| 326 |
"role": "system",
|
| 327 |
+
"content": """You are a professional assistant that answers user questions based **only on the content of provided document excerpts**. The user will ask a question, and you will also receive related text chunks retrieved from company documents or PDFs.
|
| 328 |
|
| 329 |
Instructions:
|
| 330 |
1. Use **only** the retrieved chunks to answer the user’s question. Do **not** add information from memory or outside sources.
|
|
|
|
| 356 |
# -------------------------------
|
| 357 |
st.markdown("""
|
| 358 |
<div class="main-header">
|
| 359 |
+
<h1 style="margin: 0; font-size: 1.9rem;"> Hybrid Search RAG </h1>
|
| 360 |
<p style="margin: 0.5rem 0 0 0; font-size: 1.1rem; opacity: 0.9;">
|
| 361 |
+
Using Groq LLM, Pinecone, and Sentence Transformers
|
| 362 |
</p>
|
| 363 |
</div>
|
| 364 |
""", unsafe_allow_html=True)
|
|
|
|
| 367 |
# Sidebar for filters and mode toggle
|
| 368 |
# -------------------------------
|
| 369 |
def clear_all_filters():
|
| 370 |
+
# Common
|
| 371 |
+
st.session_state.search_query = ""
|
| 372 |
+
st.session_state.page_no_filter = ""
|
| 373 |
+
|
| 374 |
+
# Annual Report
|
| 375 |
st.session_state.company_filter = ""
|
| 376 |
st.session_state.fiscal_year_filter = ""
|
| 377 |
+
st.session_state.currency_filter = ""
|
| 378 |
+
st.session_state.segment_filter = ""
|
| 379 |
+
|
| 380 |
+
# Contract Report
|
| 381 |
+
st.session_state.agreement_date_filter = ""
|
| 382 |
+
st.session_state.promoter_filter = ""
|
| 383 |
+
st.session_state.allottee_filter = ""
|
| 384 |
+
st.session_state.project_name_filter = ""
|
| 385 |
+
st.session_state.apartment_block_filter = ""
|
| 386 |
+
st.session_state.apartment_floor_filter = ""
|
| 387 |
+
st.session_state.apartment_type_filter = ""
|
| 388 |
+
# st.session_state.carpet_area_filter = "" # if you add this back
|
| 389 |
+
st.session_state.jurisdiction_filter = ""
|
| 390 |
+
|
| 391 |
|
| 392 |
with st.sidebar:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
st.markdown("### 🎯 Search Filters")
|
| 394 |
|
| 395 |
doc_type = st.selectbox(
|
| 396 |
+
"Document Type",
|
| 397 |
+
["annual_report", "contract_report"],
|
| 398 |
key="doc_type_filter"
|
| 399 |
)
|
| 400 |
|
| 401 |
+
# Annual Report filters
|
| 402 |
+
if doc_type == "annual_report":
|
| 403 |
+
with st.expander("Annual Report Filters", expanded=False):
|
| 404 |
+
company = st.text_input("Company", placeholder="Enter company name...", key="company_filter")
|
| 405 |
+
fiscal_year = st.text_input("Fiscal Year", placeholder="e.g., 2024", key="fiscal_year_filter")
|
| 406 |
+
currency = st.text_input("Currency", placeholder="e.g., USD", key="currency_filter")
|
| 407 |
+
segment = st.text_input("Segment", placeholder="e.g., Paint Stores Group", key="segment_filter")
|
| 408 |
+
page_no = st.text_input("Page Number", placeholder="e.g., 15", key="page_no_filter")
|
| 409 |
+
|
| 410 |
+
# Contract Report filters
|
| 411 |
+
elif doc_type == "contract_report":
|
| 412 |
+
with st.expander("Contract Report Filters", expanded=False):
|
| 413 |
+
agreement_date = st.text_input("Agreement Date", placeholder="YYYY-MM-DD", key="agreement_date_filter")
|
| 414 |
+
promoter = st.text_input("Promoter / Developer", placeholder="Enter promoter name...", key="promoter_filter")
|
| 415 |
+
allottee = st.text_input("Allottee (Buyer)", placeholder="Enter allottee name...", key="allottee_filter")
|
| 416 |
+
project_name = st.text_input("Project Name", placeholder="Enter project name...", key="project_name_filter")
|
| 417 |
+
apartment_block = st.text_input("Block", placeholder="e.g., Tower A", key="apartment_block_filter")
|
| 418 |
+
apartment_floor = st.text_input("Floor", placeholder="e.g., 10th floor", key="apartment_floor_filter")
|
| 419 |
+
apartment_type = st.text_input("Apartment Type", placeholder="e.g., 2BHK", key="apartment_type_filter")
|
| 420 |
+
jurisdiction = st.text_input("Jurisdiction", placeholder="e.g., Madras High Court", key="jurisdiction_filter")
|
| 421 |
+
page_no = st.text_input("Page Number", placeholder="e.g., 15", key="page_no_filter")
|
| 422 |
+
|
| 423 |
+
# Reset button
|
| 424 |
+
st.button("Clear All Filters", on_click=clear_all_filters)
|
| 425 |
+
|
| 426 |
|
|
|
|
|
|
|
| 427 |
|
| 428 |
# Model info
|
| 429 |
st.markdown("---")
|
|
|
|
| 466 |
else:
|
| 467 |
# Build filter dictionary
|
| 468 |
filter_dict = {}
|
| 469 |
+
|
| 470 |
+
# Common filters
|
| 471 |
if doc_type and doc_type != "All Types":
|
| 472 |
filter_dict["doc_type"] = {"$eq": doc_type}
|
| 473 |
+
|
| 474 |
+
if page_no and page_no.strip():
|
|
|
|
|
|
|
|
|
|
| 475 |
try:
|
| 476 |
filter_dict["page_no"] = {"$eq": int(page_no.strip())}
|
| 477 |
except ValueError:
|
| 478 |
st.error("⚠️ Page number must be a valid integer.")
|
| 479 |
st.stop()
|
| 480 |
+
|
| 481 |
+
# Annual Report filters
|
| 482 |
+
if doc_type == "annual_report":
|
| 483 |
+
if company and company.strip():
|
| 484 |
+
filter_dict["company"] = {"$eq": company.strip()}
|
| 485 |
+
if fiscal_year and fiscal_year.strip():
|
| 486 |
+
filter_dict["fiscal_year"] = {"$eq": fiscal_year.strip()}
|
| 487 |
+
if currency and currency.strip():
|
| 488 |
+
filter_dict["currency"] = {"$eq": currency.strip()}
|
| 489 |
+
if segment and segment.strip():
|
| 490 |
+
filter_dict["segment"] = {"$eq": segment.strip()}
|
| 491 |
+
|
| 492 |
+
# Contract Report filters
|
| 493 |
+
elif doc_type == "contract_report":
|
| 494 |
+
if agreement_date and agreement_date.strip():
|
| 495 |
+
filter_dict["agreement_date"] = {"$eq": agreement_date.strip()}
|
| 496 |
+
if promoter and promoter.strip():
|
| 497 |
+
filter_dict["promoter_legal_name"] = {"$eq": promoter.strip()}
|
| 498 |
+
if allottee and allottee.strip():
|
| 499 |
+
filter_dict["allottee_name"] = {"$eq": allottee.strip()}
|
| 500 |
+
if project_name and project_name.strip():
|
| 501 |
+
filter_dict["project_name"] = {"$eq": project_name.strip()}
|
| 502 |
+
if apartment_block and apartment_block.strip():
|
| 503 |
+
filter_dict["apartment_block"] = {"$eq": apartment_block.strip()}
|
| 504 |
+
if apartment_floor and apartment_floor.strip():
|
| 505 |
+
filter_dict["apartment_floor"] = {"$eq": apartment_floor.strip()}
|
| 506 |
+
if apartment_type and apartment_type.strip():
|
| 507 |
+
filter_dict["apartment_type"] = {"$eq": apartment_type.strip()}
|
| 508 |
+
if jurisdiction and jurisdiction.strip():
|
| 509 |
+
filter_dict["jurisdiction"] = {"$eq": jurisdiction.strip()}
|
| 510 |
+
|
| 511 |
+
|
| 512 |
|
| 513 |
|
| 514 |
|
|
|
|
| 525 |
ai_response = generate_ai_response(query, relevant_docs)
|
| 526 |
|
| 527 |
# Display AI response
|
| 528 |
+
# st.markdown(ai_response,unsafe_allow_html=True)
|
| 529 |
+
st.markdown(f'<div style="background: #303336; padding: 1rem; border-radius: 8px; margin: 1rem 0; line-height: 1.6;">{ai_response}</div>', unsafe_allow_html=True)
|
| 530 |
|
| 531 |
|
| 532 |
|
| 533 |
st.markdown("---")
|
| 534 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 535 |
|
| 536 |
if relevant_docs:
|
| 537 |
search_time = time.time() - start_time
|
| 538 |
|
| 539 |
+
|
| 540 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 541 |
# Display source documents
|
| 542 |
if st.session_state.chat_mode:
|
| 543 |
+
st.markdown("### Evidence")
|
| 544 |
+
# else:
|
| 545 |
+
# st.markdown("### 📋 Search Results")
|
| 546 |
|
| 547 |
for i, result in enumerate(relevant_docs, start=1):
|
| 548 |
metadata = result["metadata"]
|
| 549 |
text_content = metadata.get("text", "No text available")
|
| 550 |
+
doc_id = metadata.get("doc_id", "N/A")
|
| 551 |
+
page_no = metadata.get("page_no", "N/A")
|
| 552 |
+
title = metadata.get("title")
|
| 553 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 554 |
|
| 555 |
+
st.markdown("#### [{i}] DOC : {doc_id} | Page: {page_no} | Title {title}".format(i=i, doc_id=doc_id, page_no=page_no, title=title))
|
| 556 |
+
|
|
|
|
|
|
|
|
|
|
| 557 |
st.markdown(f'<div style="background: #303336; padding: 1rem; border-radius: 8px; margin: 1rem 0; line-height: 1.6;">{text_content}</div>', unsafe_allow_html=True)
|
| 558 |
|
| 559 |
|
|
|
|
| 588 |
if not query:
|
| 589 |
st.markdown("---")
|
| 590 |
st.markdown("### 💡 How to Use")
|
| 591 |
+
|
| 592 |
+
st.markdown("""
|
| 593 |
+
**💬 AI Chat Mode:**
|
| 594 |
+
- Ask natural language questions
|
| 595 |
+
- Get AI-generated answers based on documents
|
| 596 |
+
- View source documents used for the response
|
| 597 |
+
""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 598 |
|
| 599 |
# -------------------------------
|
| 600 |
# Footer
|