rbbist commited on
Commit
c369c5f
·
verified ·
1 Parent(s): c099b4f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +192 -65
app.py CHANGED
@@ -1,112 +1,239 @@
 
1
  import gradio as gr
2
  from chromadb_semantic_search_for_dataset import semantic_search, build_compact_context
3
- from transformers import pipeline
4
  import time
 
5
 
6
- # Instantiate summarizer + answerer once
7
- SUMMARY_MODEL = "google/mt5-small" # Switched to smaller model for Spaces
8
- ANSWER_MODEL = "google/mt5-small" # Switched to smaller model for Spaces
 
 
 
 
9
 
10
- # Create pipelines (text2text-generation interface)
 
 
 
 
 
11
  try:
12
- summarizer = pipeline("text2text-generation", model=SUMMARY_MODEL, device=-1, tokenizer=SUMMARY_MODEL, use_fast=False)
13
- answerer = pipeline("text2text-generation", model=ANSWER_MODEL, device=-1, tokenizer=ANSWER_MODEL, use_fast=False)
14
- print("Summarizer pipeline initialized:", summarizer is not None)
15
- print("Answerer pipeline initialized:", answerer is not None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  except Exception as e:
17
- print(f"Pipeline initialization error: {e}")
18
  raise
19
 
20
- # Keep last search context in memory for RAG
21
  _last_combined_context = ""
22
  _last_search_query = ""
23
 
24
  def semantic_search_ui(search_text: str):
25
  """Runs semantic search and returns formatted results. Also stores summarized context for RAG."""
26
  global _last_combined_context, _last_search_query
27
- if not search_text.strip():
28
- return "Error: Please provide a search query.", ""
29
-
30
- formatted, top_docs, combined_context = semantic_search(search_text, n_results=2) # Reduced to 2 for performance
31
-
32
- # Summarize each top doc (short)
33
- summaries = []
34
- for idx, item in enumerate(top_docs, start=1):
35
- doc_text = item["document"][:1000] # Limit input length
36
- prompt = f"नेपालीमा संक्षिप्त सारांश बनाउनुहोस्: {doc_text}"
37
- try:
38
- summary_out = summarizer(prompt, max_length=100, do_sample=False)[0]["generated_text"].strip() # Reduced max_length
39
- print(f"Summary for doc {idx}: {summary_out}")
40
- except Exception as e:
41
- print(f"Summary error for doc {idx}: {e}")
42
- summary_out = (doc_text[:200] + "...").strip()
43
- summaries.append(summary_out)
44
-
45
- # Build compact combined context
46
- compact_context = build_compact_context(summaries)
47
- print("Compact context:", compact_context)
48
-
49
- # Save last context for RAG
50
- _last_combined_context = compact_context
51
- _last_search_query = search_text
52
-
53
- return formatted, compact_context
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  def rag_answer(question: str, search_text_for_context: str = ""):
56
  """
57
  Answer the user's question using RAG:
58
  - If search_text_for_context provided, run semantic search for it and use its summaries.
59
- - Otherwise, use the last search context stored in memory.
60
  """
61
  global _last_combined_context, _last_search_query
62
 
 
63
  start_time = time.time()
64
 
65
- # If user provided a search string, refresh context
66
- if search_text_for_context.strip():
 
67
  _, compact_context = semantic_search_ui(search_text_for_context)
68
  context = compact_context
69
  else:
70
  context = _last_combined_context
 
71
 
72
  if not context:
73
- return "Error: No context available. Please run a semantic search first or provide a search query."
 
 
 
 
 
 
 
 
 
 
 
74
 
75
- # Simplified prompt for mT5
76
- prompt = f"सन्दर्भ: {context}\nप्रश्न: {question}\nजवाफ:"
77
- print("RAG prompt:", prompt)
78
 
79
  try:
80
- out = answerer(prompt, max_length=200, do_sample=False)[0]["generated_text"].strip() # Reduced max_length
81
- print("RAG output:", out)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  except Exception as e:
83
- out = f"LLM generation error: {e}"
84
- print(f"RAG error: {e}")
85
 
86
  elapsed = time.time() - start_time
87
- footer = f"\n\n---\n(Generated in {elapsed:.2f}s using summaries of top-2 cases.)"
88
  return out + footer
89
 
 
90
  # --- Gradio UI ---
91
  with gr.Blocks() as demo:
92
- gr.Markdown("# 📚 Semantic Search + RAG (auto-summarize top-2) — Nepali cases")
 
93
 
94
  with gr.Tab("🔍 Semantic Search"):
95
- search_input = gr.Textbox(label="Search for a case (use Nepali preferred)", placeholder="मुद्दाको संक्षेप वा कीवर्ड टाइप गर्नुहोस्...")
 
 
 
96
  search_button = gr.Button("Search")
97
- search_results = gr.Markdown(label="Top 2 Similar Cases (formatted)")
98
- context_preview = gr.Textbox(label="Combined Summarized Context (for RAG)", interactive=False)
99
-
100
- search_button.click(fn=semantic_search_ui, inputs=search_input, outputs=[search_results, context_preview])
 
 
 
 
 
 
 
 
101
 
102
  with gr.Tab("🤖 Ask a Question (RAG)"):
103
- question_input = gr.Textbox(label="Your question (Nepali)", placeholder="यहाँ प्रश्न लेख्नुहोस्...")
104
- optional_search_input = gr.Textbox(label="Optional: Search query to refresh context", placeholder="(Optional) provide a search query to refresh top-2 context")
 
 
 
 
 
 
105
  ask_button = gr.Button("Get Answer")
106
- rag_output = gr.Markdown(label="LLM Answer (based on summarized top-2)")
107
-
108
- ask_button.click(fn=rag_answer, inputs=[question_input, optional_search_input], outputs=rag_output)
109
-
110
- gr.Markdown("Notes: The system summarizes the top-2 semantic results and uses those summaries as context for the LLM.")
111
-
112
- demo.launch(server_timeout=300) # Increased timeout for Spaces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
  import gradio as gr
3
  from chromadb_semantic_search_for_dataset import semantic_search, build_compact_context
4
+ from transformers import pipeline, AutoTokenizer, MT5ForConditionalGeneration
5
  import time
6
+ import torch
7
 
8
+ # Try different models - MT5 can be problematic for this task
9
+ # Consider using these alternatives:
10
+ MODELS_TO_TRY = [
11
+ "google/flan-t5-base", # Better for instruction following
12
+ "google/mt5-base", # Smaller, more stable than large
13
+ # "google/mt5-large" # Your original choice - may have issues
14
+ ]
15
 
16
+ SUMMARY_MODEL = MODELS_TO_TRY[0] # Start with flan-t5-base
17
+ ANSWER_MODEL = MODELS_TO_TRY[0] # Use same model for consistency
18
+
19
+ print(f"Loading models: {SUMMARY_MODEL}")
20
+
21
+ # Create pipelines with better parameters
22
  try:
23
+ summarizer = pipeline(
24
+ "text2text-generation",
25
+ model=SUMMARY_MODEL,
26
+ device=-1, # CPU
27
+ model_kwargs={
28
+ "torch_dtype": torch.float32,
29
+ "low_cpu_mem_usage": True
30
+ }
31
+ )
32
+
33
+ answerer = pipeline(
34
+ "text2text-generation",
35
+ model=ANSWER_MODEL,
36
+ device=-1, # CPU
37
+ model_kwargs={
38
+ "torch_dtype": torch.float32,
39
+ "low_cpu_mem_usage": True
40
+ }
41
+ )
42
+ print("Models loaded successfully!")
43
+
44
  except Exception as e:
45
+ print(f"Error loading models: {e}")
46
  raise
47
 
48
+ # Keep last search context in memory so RAG can use previous search if user doesn't provide a new search
49
  _last_combined_context = ""
50
  _last_search_query = ""
51
 
52
  def semantic_search_ui(search_text: str):
53
  """Runs semantic search and returns formatted results. Also stores summarized context for RAG."""
54
  global _last_combined_context, _last_search_query
55
+
56
+ print(f"DEBUG: Starting semantic search for: {search_text}")
57
+
58
+ try:
59
+ formatted, top_docs, combined_context = semantic_search(search_text, n_results=3)
60
+ print(f"DEBUG: Retrieved {len(top_docs)} documents")
61
+
62
+ # Summarize each top doc (short)
63
+ summaries = []
64
+ for idx, item in enumerate(top_docs, start=1):
65
+ doc_text = item["document"]
66
+ print(f"DEBUG: Processing document {idx}, length: {len(doc_text)}")
67
+
68
+ # Create a simpler prompt that works better with T5/MT5
69
+ if "flan-t5" in SUMMARY_MODEL.lower():
70
+ prompt = f"Summarize this legal case in Nepali: {doc_text[:1000]}"
71
+ else:
72
+ prompt = f"संक्षेपमा नेपालीमा सारांश बनाउनुहोस्: {doc_text[:1000]}"
73
+
74
+ try:
75
+ # Better generation parameters
76
+ summary_out = summarizer(
77
+ prompt,
78
+ max_length=150,
79
+ min_length=20,
80
+ do_sample=False,
81
+ temperature=0.7,
82
+ pad_token_id=summarizer.tokenizer.eos_token_id
83
+ )[0]["generated_text"]
84
+
85
+ print(f"DEBUG: Generated summary {idx}: {summary_out[:100]}...")
86
+
87
+ except Exception as e:
88
+ print(f"DEBUG: Error generating summary {idx}: {e}")
89
+ # fallback: truncated raw text
90
+ summary_out = (doc_text[:300] + "...")
91
+
92
+ # Clean/truncate extra whitespace
93
+ summary_out = summary_out.strip()
94
+ summaries.append(summary_out)
95
+
96
+ # Build compact combined context for the answerer (limited length)
97
+ compact_context = build_compact_context(summaries)
98
+ print(f"DEBUG: Built compact context, length: {len(compact_context)}")
99
+
100
+ # Save last context for Ask flow
101
+ _last_combined_context = compact_context
102
+ _last_search_query = search_text
103
+
104
+ return formatted, compact_context
105
+
106
+ except Exception as e:
107
+ error_msg = f"Error in semantic search: {e}"
108
+ print(f"DEBUG: {error_msg}")
109
+ return error_msg, ""
110
 
111
  def rag_answer(question: str, search_text_for_context: str = ""):
112
  """
113
  Answer the user's question using RAG:
114
  - If search_text_for_context provided, run semantic search for it and use its summaries.
115
+ - Otherwise, use the last search context stored in memory (_last_combined_context).
116
  """
117
  global _last_combined_context, _last_search_query
118
 
119
+ print(f"DEBUG: RAG answer called with question: {question[:50]}...")
120
  start_time = time.time()
121
 
122
+ # If user provided a search string in the RAG tab, refresh context
123
+ if search_text_for_context and search_text_for_context.strip():
124
+ print("DEBUG: Refreshing context with new search")
125
  _, compact_context = semantic_search_ui(search_text_for_context)
126
  context = compact_context
127
  else:
128
  context = _last_combined_context
129
+ print(f"DEBUG: Using cached context, length: {len(context)}")
130
 
131
  if not context:
132
+ return "No context available. Please run a semantic search first or provide a search query."
133
+
134
+ # Construct a simpler prompt that works better with the models
135
+ if "flan-t5" in ANSWER_MODEL.lower():
136
+ prompt = f"Based on these legal case summaries, answer the question in Nepali:\n\nContext: {context[:2000]}\n\nQuestion: {question}\n\nAnswer:"
137
+ else:
138
+ prompt = (
139
+ "तपाईं एक कानुनी सहायक हुनुहुन्छ। तलका केस संक्षेप प्रयोग गरी प्रश्नको जवाफ नेपालीमा दिनुहोस्।\n\n"
140
+ f"सन्दर्भ: {context[:2000]}\n\n"
141
+ f"प्रश्न: {question}\n\n"
142
+ "जवाफ:"
143
+ )
144
 
145
+ print(f"DEBUG: Generated prompt length: {len(prompt)}")
146
+ print(f"DEBUG: Prompt preview: {prompt[:200]}...")
 
147
 
148
  try:
149
+ # Generate answer with better parameters
150
+ result = answerer(
151
+ prompt,
152
+ max_length=400,
153
+ min_length=30,
154
+ do_sample=False,
155
+ temperature=0.7,
156
+ pad_token_id=answerer.tokenizer.eos_token_id,
157
+ early_stopping=True
158
+ )
159
+
160
+ out = result[0]["generated_text"].strip()
161
+ print(f"DEBUG: Generated answer: {out[:100]}...")
162
+
163
+ if not out or len(out) < 10:
164
+ out = "माफ गर्नुहोस्, मैले प्रश्नको उपयुक्त जवाफ उत्पन्न गर्न सकिन। कृपया फरक तरिकाले प्रश्न सोध्नुहोस्।"
165
+
166
  except Exception as e:
167
+ print(f"DEBUG: LLM generation error: {e}")
168
+ out = f"Error generating response: {e}. Please try with a simpler question."
169
 
170
  elapsed = time.time() - start_time
171
+ footer = f"\n\n---\n(Generated in {elapsed:.2f}s using summaries of top-3 cases.)"
172
  return out + footer
173
 
174
+
175
  # --- Gradio UI ---
176
  with gr.Blocks() as demo:
177
+ gr.Markdown("# 📚 Semantic Search + RAG (auto-summarize top-3) — Nepali cases")
178
+ gr.Markdown("**Debug Info**: Using models: " + SUMMARY_MODEL)
179
 
180
  with gr.Tab("🔍 Semantic Search"):
181
+ search_input = gr.Textbox(
182
+ label="Search for a case (use Nepali preferred)",
183
+ placeholder="मुद्दाको संक्षेप वा कीवर्ड टाइप गर्नुहोस्..."
184
+ )
185
  search_button = gr.Button("Search")
186
+ search_results = gr.Markdown(label="Top 3 Similar Cases (formatted)")
187
+ context_preview = gr.Textbox(
188
+ label="Combined Summarized Context (for RAG)",
189
+ interactive=False,
190
+ max_lines=10
191
+ )
192
+
193
+ search_button.click(
194
+ fn=semantic_search_ui,
195
+ inputs=search_input,
196
+ outputs=[search_results, context_preview]
197
+ )
198
 
199
  with gr.Tab("🤖 Ask a Question (RAG)"):
200
+ question_input = gr.Textbox(
201
+ label="Your question (Nepali)",
202
+ placeholder="यहाँ प्रश्न लेख्नुहोस्..."
203
+ )
204
+ optional_search_input = gr.Textbox(
205
+ label="Optional: Search query to refresh context",
206
+ placeholder="(Optional) provide a search query to refresh top-3 context"
207
+ )
208
  ask_button = gr.Button("Get Answer")
209
+ rag_output = gr.Markdown(label="LLM Answer (based on summarized top-3)")
210
+
211
+ ask_button.click(
212
+ fn=rag_answer,
213
+ inputs=[question_input, optional_search_input],
214
+ outputs=rag_output
215
+ )
216
+
217
+ with gr.Tab("🐛 Test Model"):
218
+ test_input = gr.Textbox(label="Test input", placeholder="Enter test text...")
219
+ test_button = gr.Button("Test Model")
220
+ test_output = gr.Textbox(label="Model output")
221
+
222
+ def test_model(text):
223
+ try:
224
+ result = answerer(f"Translate to Nepali: {text}", max_length=100, do_sample=False)
225
+ return result[0]["generated_text"]
226
+ except Exception as e:
227
+ return f"Model test failed: {e}"
228
+
229
+ test_button.click(fn=test_model, inputs=test_input, outputs=test_output)
230
+
231
+ gr.Markdown("""
232
+ **Notes**:
233
+ - The system summarizes the top-3 semantic results and uses those summaries as context for the LLM
234
+ - If you experience issues, try the Test Model tab first
235
+ - Check the console logs for debugging information
236
+ """)
237
+
238
+ if __name__ == "__main__":
239
+ demo.launch(debug=True)