rbbist commited on
Commit
d6b3fb4
·
verified ·
1 Parent(s): 6299703

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -26
app.py CHANGED
@@ -59,43 +59,56 @@ def semantic_search_ui(search_text: str):
59
  formatted, top_docs, combined_context = semantic_search(search_text, n_results=3)
60
  print(f"DEBUG: Retrieved {len(top_docs)} documents")
61
 
62
- # Summarize each top doc (short)
63
  summaries = []
64
  for idx, item in enumerate(top_docs, start=1):
65
  doc_text = item["document"]
 
66
  print(f"DEBUG: Processing document {idx}, length: {len(doc_text)}")
67
 
68
- # Create a simpler prompt that works better with T5/MT5
69
- if "flan-t5" in SUMMARY_MODEL.lower():
70
- prompt = f"Summarize this legal case in Nepali: {doc_text[:1000]}"
71
- else:
72
- prompt = f"संक्षेपमा नेपालीमा सारांश बनाउनुहोस्: {doc_text[:1000]}"
73
 
74
- try:
75
- # Better generation parameters
76
- summary_out = summarizer(
77
- prompt,
78
- max_length=150,
79
- min_length=20,
80
- do_sample=False,
81
- temperature=0.7,
82
- pad_token_id=summarizer.tokenizer.eos_token_id
83
- )[0]["generated_text"]
84
-
85
- print(f"DEBUG: Generated summary {idx}: {summary_out[:100]}...")
86
-
87
- except Exception as e:
88
- print(f"DEBUG: Error generating summary {idx}: {e}")
89
- # fallback: truncated raw text
90
- summary_out = (doc_text[:300] + "...")
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- # Clean/truncate extra whitespace
93
- summary_out = summary_out.strip()
94
- summaries.append(summary_out)
 
95
 
96
  # Build compact combined context for the answerer (limited length)
97
  compact_context = build_compact_context(summaries)
98
  print(f"DEBUG: Built compact context, length: {len(compact_context)}")
 
99
 
100
  # Save last context for Ask flow
101
  _last_combined_context = compact_context
 
59
  formatted, top_docs, combined_context = semantic_search(search_text, n_results=3)
60
  print(f"DEBUG: Retrieved {len(top_docs)} documents")
61
 
62
+ # Skip model-based summarization for now - use direct text extraction instead
63
  summaries = []
64
  for idx, item in enumerate(top_docs, start=1):
65
  doc_text = item["document"]
66
+ meta = item["metadata"]
67
  print(f"DEBUG: Processing document {idx}, length: {len(doc_text)}")
68
 
69
+ # Create a manual summary using metadata and document text
70
+ # This is more reliable than model-based summarization
71
+ summary_parts = []
 
 
72
 
73
+ # Add key metadata
74
+ if meta.get('mudda_type'):
75
+ summary_parts.append(f"मुद्दाको किसिम: {meta['mudda_type']}")
76
+ if meta.get('subject'):
77
+ summary_parts.append(f"विषय: {meta['subject']}")
78
+ if meta.get('nibedak'):
79
+ summary_parts.append(f"निवेदक: {meta['nibedak'][:100]}...")
80
+ if meta.get('vipakshi'):
81
+ summary_parts.append(f"विपक्षी: {meta['vipakshi'][:100]}...")
82
+
83
+ # Add relevant text snippets (look for key legal terms)
84
+ doc_clean = doc_text.replace('["', '').replace('"]', '').replace('\\n', ' ')
85
+
86
+ # Extract sentences that contain important legal terms
87
+ important_sentences = []
88
+ sentences = doc_clean.split('।') # Split by Nepali sentence delimiter
89
+
90
+ for sentence in sentences[:5]: # Take first 5 sentences
91
+ sentence = sentence.strip()
92
+ if len(sentence) > 20 and any(term in sentence.lower() for term in ['फैसला', 'ठहर', 'अदालत', 'मुद्दा', 'कानुन']):
93
+ important_sentences.append(sentence[:200]) # Limit sentence length
94
+
95
+ if important_sentences:
96
+ summary_parts.append("मुख्य बुँदाहरू: " + "। ".join(important_sentences[:2]) + "।")
97
+ else:
98
+ # Fallback to first part of document
99
+ clean_start = doc_clean[:300].strip()
100
+ if clean_start:
101
+ summary_parts.append(f"विवरण: {clean_start}...")
102
 
103
+ # Combine all parts
104
+ manual_summary = " | ".join(summary_parts)
105
+ summaries.append(manual_summary)
106
+ print(f"DEBUG: Created manual summary {idx}: {manual_summary[:100]}...")
107
 
108
  # Build compact combined context for the answerer (limited length)
109
  compact_context = build_compact_context(summaries)
110
  print(f"DEBUG: Built compact context, length: {len(compact_context)}")
111
+ print(f"DEBUG: Context preview: {compact_context[:200]}...")
112
 
113
  # Save last context for Ask flow
114
  _last_combined_context = compact_context