Update app.py
Browse files
app.py
CHANGED
|
@@ -59,43 +59,56 @@ def semantic_search_ui(search_text: str):
|
|
| 59 |
formatted, top_docs, combined_context = semantic_search(search_text, n_results=3)
|
| 60 |
print(f"DEBUG: Retrieved {len(top_docs)} documents")
|
| 61 |
|
| 62 |
-
#
|
| 63 |
summaries = []
|
| 64 |
for idx, item in enumerate(top_docs, start=1):
|
| 65 |
doc_text = item["document"]
|
|
|
|
| 66 |
print(f"DEBUG: Processing document {idx}, length: {len(doc_text)}")
|
| 67 |
|
| 68 |
-
# Create a
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
else:
|
| 72 |
-
prompt = f"संक्षेपमा नेपालीमा सारांश बनाउनुहोस्: {doc_text[:1000]}"
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
-
#
|
| 93 |
-
|
| 94 |
-
summaries.append(
|
|
|
|
| 95 |
|
| 96 |
# Build compact combined context for the answerer (limited length)
|
| 97 |
compact_context = build_compact_context(summaries)
|
| 98 |
print(f"DEBUG: Built compact context, length: {len(compact_context)}")
|
|
|
|
| 99 |
|
| 100 |
# Save last context for Ask flow
|
| 101 |
_last_combined_context = compact_context
|
|
|
|
| 59 |
formatted, top_docs, combined_context = semantic_search(search_text, n_results=3)
|
| 60 |
print(f"DEBUG: Retrieved {len(top_docs)} documents")
|
| 61 |
|
| 62 |
+
# Skip model-based summarization for now - use direct text extraction instead
|
| 63 |
summaries = []
|
| 64 |
for idx, item in enumerate(top_docs, start=1):
|
| 65 |
doc_text = item["document"]
|
| 66 |
+
meta = item["metadata"]
|
| 67 |
print(f"DEBUG: Processing document {idx}, length: {len(doc_text)}")
|
| 68 |
|
| 69 |
+
# Create a manual summary using metadata and document text
|
| 70 |
+
# This is more reliable than model-based summarization
|
| 71 |
+
summary_parts = []
|
|
|
|
|
|
|
| 72 |
|
| 73 |
+
# Add key metadata
|
| 74 |
+
if meta.get('mudda_type'):
|
| 75 |
+
summary_parts.append(f"मुद्दाको किसिम: {meta['mudda_type']}")
|
| 76 |
+
if meta.get('subject'):
|
| 77 |
+
summary_parts.append(f"विषय: {meta['subject']}")
|
| 78 |
+
if meta.get('nibedak'):
|
| 79 |
+
summary_parts.append(f"निवेदक: {meta['nibedak'][:100]}...")
|
| 80 |
+
if meta.get('vipakshi'):
|
| 81 |
+
summary_parts.append(f"विपक्षी: {meta['vipakshi'][:100]}...")
|
| 82 |
+
|
| 83 |
+
# Add relevant text snippets (look for key legal terms)
|
| 84 |
+
doc_clean = doc_text.replace('["', '').replace('"]', '').replace('\\n', ' ')
|
| 85 |
+
|
| 86 |
+
# Extract sentences that contain important legal terms
|
| 87 |
+
important_sentences = []
|
| 88 |
+
sentences = doc_clean.split('।') # Split by Nepali sentence delimiter
|
| 89 |
+
|
| 90 |
+
for sentence in sentences[:5]: # Take first 5 sentences
|
| 91 |
+
sentence = sentence.strip()
|
| 92 |
+
if len(sentence) > 20 and any(term in sentence.lower() for term in ['फैसला', 'ठहर', 'अदालत', 'मुद्दा', 'कानुन']):
|
| 93 |
+
important_sentences.append(sentence[:200]) # Limit sentence length
|
| 94 |
+
|
| 95 |
+
if important_sentences:
|
| 96 |
+
summary_parts.append("मुख्य बुँदाहरू: " + "। ".join(important_sentences[:2]) + "।")
|
| 97 |
+
else:
|
| 98 |
+
# Fallback to first part of document
|
| 99 |
+
clean_start = doc_clean[:300].strip()
|
| 100 |
+
if clean_start:
|
| 101 |
+
summary_parts.append(f"विवरण: {clean_start}...")
|
| 102 |
|
| 103 |
+
# Combine all parts
|
| 104 |
+
manual_summary = " | ".join(summary_parts)
|
| 105 |
+
summaries.append(manual_summary)
|
| 106 |
+
print(f"DEBUG: Created manual summary {idx}: {manual_summary[:100]}...")
|
| 107 |
|
| 108 |
# Build compact combined context for the answerer (limited length)
|
| 109 |
compact_context = build_compact_context(summaries)
|
| 110 |
print(f"DEBUG: Built compact context, length: {len(compact_context)}")
|
| 111 |
+
print(f"DEBUG: Context preview: {compact_context[:200]}...")
|
| 112 |
|
| 113 |
# Save last context for Ask flow
|
| 114 |
_last_combined_context = compact_context
|