Sazid2 commited on
Commit
8ba6650
·
verified ·
1 Parent(s): 7f26a44

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +98 -153
app.py CHANGED
@@ -1,15 +1,6 @@
1
- # app.py
2
  """
3
  Jajabor – SEBA Assamese Class 10 Tutor (Free-tier CPU-ready)
4
- - PDF reading: PyPDF2
5
- - CPU LLM: google/flan-t5-small (transformers pipeline)
6
- - Embeddings: sentence-transformers/all-MiniLM-L6-v2
7
- - FAISS for retrieval
8
- - OCR via pytesseract
9
- - SymPy for math solving
10
- - Gradio UI (gr.Image uses type="filepath")
11
- Notes:
12
- - requirements.txt must include: PyPDF2 (capitalized), gradio==4.44.0, gradio-client==0.4.3, sentence-transformers, faiss-cpu, transformers, torch, pytesseract, pillow, sympy
13
  """
14
 
15
  import os
@@ -37,7 +28,7 @@ PDF_DIR = os.path.join(BASE_DIR, "pdfs", "class10")
37
  DB_PATH = os.path.join(BASE_DIR, "jajabor_users.db")
38
 
39
  EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
40
- USE_HF_INFERENCE = False # Free plan: use local small model
41
  LLM_LOCAL_NAME = "google/flan-t5-small"
42
  LLM_MAX_TOKENS = 128
43
 
@@ -122,7 +113,7 @@ def get_user_stats(user_id):
122
 
123
  init_db()
124
 
125
- # -------------------- PDF reading (PyPDF2) --------------------
126
  def extract_text_from_pdf(pdf_path: str) -> str:
127
  text_pages = []
128
  try:
@@ -185,7 +176,7 @@ for text, meta in zip(all_texts, all_metas):
185
  print("Total chunks:", len(corpus_chunks))
186
  index = None
187
  if len(corpus_chunks) > 0:
188
- print("Encoding chunks (this may take some seconds)...")
189
  try:
190
  embs = embedding_model.encode(corpus_chunks, batch_size=32, show_progress_bar=False).astype("float32")
191
  dim = embs.shape[1]
@@ -199,14 +190,14 @@ else:
199
  print("No corpus chunks found: upload PDFs to ./pdfs/class10")
200
 
201
  def rag_search(query: str, k: int = TOP_K):
202
- if index is None:
203
  return []
204
  try:
205
  q_vec = embedding_model.encode([query]).astype("float32")
206
  D, I = index.search(q_vec, k)
207
  results = []
208
  for dist, idx in zip(D[0], I[0]):
209
- if idx == -1:
210
  continue
211
  results.append(
212
  {
@@ -220,16 +211,16 @@ def rag_search(query: str, k: int = TOP_K):
220
  print("RAG search error:", e)
221
  return []
222
 
223
- # -------------------- Local CPU LLM (flan-t5-small) --------------------
224
  print("Loading local CPU LLM:", LLM_LOCAL_NAME)
225
  llm_pipe = None
226
  try:
227
  tokenizer = AutoTokenizer.from_pretrained(LLM_LOCAL_NAME)
228
  model = AutoModelForSeq2SeqLM.from_pretrained(LLM_LOCAL_NAME)
229
- llm_pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device_map=None)
230
  print("Local LLM loaded.")
231
  except Exception as e:
232
- print("Failed to load local LLM (will return notice):", e)
233
  llm_pipe = None
234
 
235
  SYSTEM_PROMPT = """
@@ -250,7 +241,7 @@ def build_rag_prompt(context_blocks, question, chat_history):
250
  ctx += f"\n[Context {i} – {src}]\n{block['text']}\n"
251
 
252
  hist = ""
253
- for role, msg in chat_history:
254
  hist += f"{role}: {msg}\n"
255
 
256
  prompt = f"""{SYSTEM_PROMPT}
@@ -270,41 +261,37 @@ def build_rag_prompt(context_blocks, question, chat_history):
270
 
271
  def llm_answer_with_rag(question: str, chat_history):
272
  retrieved = rag_search(question, TOP_K)
 
 
 
273
  prompt = build_rag_prompt(retrieved, question, chat_history)
274
- if USE_HF_INFERENCE:
275
- return "HF inference disabled in free plan."
276
- else:
277
- if llm_pipe is None:
278
- return "Local LLM not loaded. Ensure model weights are available on first run."
279
- try:
280
- out = llm_pipe(prompt, max_new_tokens=LLM_MAX_TOKENS, do_sample=False)
281
- if isinstance(out, list) and len(out) > 0 and "generated_text" in out[0]:
282
  return out[0]["generated_text"]
283
- if isinstance(out, list) and len(out) > 0 and isinstance(out[0], str):
284
- return out[0]
285
- if isinstance(out, dict) and "generated_text" in out:
286
- return out["generated_text"]
287
- return str(out)
288
- except Exception as e:
289
- traceback.print_exc()
290
- return f"LLM generation failed: {e}"
291
 
292
  # -------------------- OCR + Math helpers --------------------
293
- def ocr_from_image(img: Image.Image):
294
- if img is None:
295
  return ""
296
  try:
 
297
  img = img.convert("RGB")
298
- except Exception:
299
- pass
300
- try:
301
- text = pytesseract.image_to_string(img, lang="asm+eng")
302
- except Exception:
303
- try:
304
- text = pytesseract.image_to_string(img)
305
- except Exception:
306
- text = ""
307
- return text.strip()
308
 
309
  def is_likely_math(text: str) -> bool:
310
  if not text:
@@ -312,46 +299,32 @@ def is_likely_math(text: str) -> bool:
312
  math_chars = set("0123456789+-*/=^()%")
313
  if any(ch in text for ch in math_chars):
314
  return True
315
- kws = ["গণিত", "সমীকৰণ", "উদাহৰণ", "প্ৰশ্ন", "বীজগণিত", "solve", "equation"]
316
- return any(k in text for k in kws)
317
 
318
  def solve_math_expression(expr: str):
319
  try:
320
  expr = expr.replace("^", "**")
321
  if "=" in expr:
322
  left, right = expr.split("=", 1)
323
- left_s = sp.sympify(left)
324
- right_s = sp.sympify(right)
325
  eq = sp.Eq(left_s, right_s)
326
  sol = sp.solve(eq)
327
- steps = [
328
- "প্ৰথমে সমীকৰণ লওঁ:",
329
- f"{sp.pretty(eq)}",
330
- "Sympy ৰ সহায়ত সমাধান পোৱা যায়:",
331
- str(sol),
332
- ]
333
- explanation = "ধাপ-ধাপে সমাধান (সংক্ষেপে):\n" + "\n".join(f"- {s}" for s in steps)
334
- explanation += f"\n\nসেয়ে সমাধান: {sol}"
335
  else:
336
  expr_s = sp.sympify(expr)
337
  simp = sp.simplify(expr_s)
338
- explanation = (
339
- "প্ৰদত্ত গণিতীয় অভিব্যক্তি:\n"
340
- f"{expr}\n\nসরলীকৰণ কৰাৰ পিছত পোৱা যায়:\n{simp}"
341
- )
342
  return explanation
343
- except Exception:
344
- return (
345
- "মই সঠিকভাৱে গণিতীয় অভিব্যক্তি চিনাক্ত কৰিব নোৱাৰিলোঁ। "
346
- "দয়া কৰি সমীকৰণটো অলপ বেছি স্পষ্ট কৰি লিখক: উদাহৰণ – 2*x + 3 = 7"
347
- )
348
 
349
  def speech_to_text(audio):
350
- return ""
351
 
352
  def text_to_speech(text: str):
353
- # stub: return empty string to avoid None in Gradio outputs
354
- return ""
355
 
356
  # -------------------- Chat logic --------------------
357
  def login_user(username, user_state):
@@ -368,104 +341,74 @@ def login_user(username, user_state):
368
  )
369
  return user_state, stats
370
 
371
- def chat_logic(
372
- username,
373
- text_input,
374
- image_input,
375
- audio_input,
376
- chat_history,
377
- user_state,
378
- ):
379
  if chat_history is None:
380
  chat_history = []
381
 
382
  if not user_state or not user_state.get("user_id"):
383
  sys_msg = "⚠️ প্ৰথমে ওপৰত আপোনাৰ নাম লিখি **Login / লগিন** টিপক।"
384
- chat_history = chat_history + [[text_input or "", sys_msg]]
385
- return chat_history, user_state, ""
386
 
387
  user_id = user_state["user_id"]
388
  final_query_parts = []
389
 
390
- voice_text = speech_to_text(audio_input)
391
- if voice_text:
392
- final_query_parts.append(voice_text)
393
-
394
  ocr_text = ""
395
- if image_input is not None and image_input != "":
396
- img = None
397
- try:
398
- if isinstance(image_input, str):
399
- img = Image.open(image_input)
400
- else:
401
- read_method = getattr(image_input, "read", None)
402
- if callable(read_method):
403
- raw = image_input.read()
404
- img = Image.open(io.BytesIO(raw))
405
- if img is None and isinstance(image_input, Image.Image):
406
- img = image_input
407
- except Exception:
408
- img = None
409
-
410
- if img is not None:
411
- try:
412
- ocr_text = ocr_from_image(img)
413
- if ocr_text:
414
- final_query_parts.append(ocr_text)
415
- except Exception:
416
- pass
417
 
418
  if text_input:
419
  final_query_parts.append(text_input)
420
 
421
  if not final_query_parts:
422
  sys_msg = "⚠️ অনুগ্ৰহ কৰি প্ৰশ্ন লিখক, কিম্বা ছবি আপলোড কৰক।"
423
- chat_history = chat_history + [["", sys_msg]]
424
- return chat_history, user_state, ""
425
 
426
  full_query = "\n".join(final_query_parts)
427
 
 
428
  conv = []
429
  for u, b in chat_history:
430
- if u:
431
- conv.append(("Student", u))
432
- if b:
433
- conv.append(("Tutor", b))
434
 
435
  is_math = is_likely_math(full_query)
436
 
437
  if is_math:
438
  math_answer = solve_math_expression(full_query)
439
  combined_question = (
440
- full_query
441
- + "\n\nগণিত প্ৰোগ্ৰামে এই ফলাফল দিছে:\n"
442
- + math_answer
443
- + "\n\nঅনুগ্ৰহ কৰি শ্রেণী ১০ ৰ শিক্ষাৰ্থীৰ বাবে সহজ ভাষাত ব্যাখ্যা কৰক।"
444
  )
445
  final_answer = llm_answer_with_rag(combined_question, conv)
446
  else:
447
  final_answer = llm_answer_with_rag(full_query, conv)
448
 
449
- if final_answer is None:
450
- final_answer = "মাফ কৰক — মই ইয়াৰ উত্তর দিব পৰা নাই।"
451
-
452
  log_interaction(user_id, full_query, final_answer, is_math)
453
- audio_out = text_to_speech(final_answer) or ""
454
- display_question = text_input or voice_text or ocr_text or "(empty)"
455
- chat_history = chat_history + [[display_question, final_answer]]
456
 
457
- return chat_history, user_state, audio_out
458
 
459
  # -------------------- Gradio UI --------------------
460
- with gr.Blocks(title=APP_NAME, css=None) as demo:
 
 
461
  gr.Markdown(
462
- """
463
- # 🧭 জাজাবৰ – SEBA অসমীয়া ক্লাছ ১০ AI Tutor (Free CPU)
464
 
465
- - Upload your SEBA Class 10 PDFs to `pdfs/class10` in this repo (or when running locally, ensure folder exists)
466
- - Text + Image (OCR) input
467
  - Math step-by-step solutions
468
- - User login + progress
469
  """
470
  )
471
 
@@ -500,42 +443,44 @@ with gr.Blocks(title=APP_NAME, css=None) as demo:
500
 
501
  with gr.Row():
502
  image_inp = gr.Image(label="📷 প্ৰশ্নৰ ছবি (Optional)", type="filepath")
503
- audio_inp = gr.Audio(label="🎙️ কণ্ঠস্বৰ প্ৰশ্ন (Stub — not used now)", type="numpy")
504
-
505
  with gr.Row():
506
  ask_btn = gr.Button("🤖 জাজাবৰক সোধক")
507
- audio_out = gr.Audio(
508
- label="🔊 উত্তৰৰ অডিঅ’ (TTS – future upgrade)",
509
- interactive=False,
510
- type="filepath"
511
- )
512
 
 
513
  login_btn.click(
514
  login_user,
515
  inputs=[username_inp, user_state],
516
  outputs=[user_state, stats_md],
517
  )
518
 
519
- def wrapped_chat(text, image, audio, history, user_state_inner, username_inner):
520
- if user_state_inner is None:
521
- user_state_inner = {}
522
- if username_inner and not user_state_inner.get("username"):
523
- user_state_inner["username"] = username_inner
524
- return chat_logic(username_inner, text, image, audio, history, user_state_inner)
525
-
526
  ask_btn.click(
527
- wrapped_chat,
528
- inputs=[text_inp, image_inp, audio_inp, chat, user_state, username_inp],
529
- outputs=[chat, user_state, audio_out],
 
 
 
 
530
  )
531
 
 
532
  text_inp.submit(
533
- wrapped_chat,
534
- inputs=[text_inp, image_inp, audio_inp, chat, user_state, username_inp],
535
- outputs=[chat, user_state, audio_out],
 
 
 
 
536
  )
537
 
538
- # -------------------- Launch --------------------
 
 
 
 
539
  if __name__ == "__main__":
540
- # bind to 0.0.0.0 and allow share link for hosted environments where localhost may be blocked
541
- demo.launch(server_name="0.0.0.0", server_port=7860, share=True)
 
 
1
  """
2
  Jajabor – SEBA Assamese Class 10 Tutor (Free-tier CPU-ready)
3
+ Fixed version with correct Gradio version and improved error handling
 
 
 
 
 
 
 
 
4
  """
5
 
6
  import os
 
28
  DB_PATH = os.path.join(BASE_DIR, "jajabor_users.db")
29
 
30
  EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
31
+ USE_HF_INFERENCE = False
32
  LLM_LOCAL_NAME = "google/flan-t5-small"
33
  LLM_MAX_TOKENS = 128
34
 
 
113
 
114
  init_db()
115
 
116
+ # -------------------- PDF reading --------------------
117
  def extract_text_from_pdf(pdf_path: str) -> str:
118
  text_pages = []
119
  try:
 
176
  print("Total chunks:", len(corpus_chunks))
177
  index = None
178
  if len(corpus_chunks) > 0:
179
+ print("Encoding chunks...")
180
  try:
181
  embs = embedding_model.encode(corpus_chunks, batch_size=32, show_progress_bar=False).astype("float32")
182
  dim = embs.shape[1]
 
190
  print("No corpus chunks found: upload PDFs to ./pdfs/class10")
191
 
192
  def rag_search(query: str, k: int = TOP_K):
193
+ if index is None or len(corpus_chunks) == 0:
194
  return []
195
  try:
196
  q_vec = embedding_model.encode([query]).astype("float32")
197
  D, I = index.search(q_vec, k)
198
  results = []
199
  for dist, idx in zip(D[0], I[0]):
200
+ if idx == -1 or idx >= len(corpus_chunks):
201
  continue
202
  results.append(
203
  {
 
211
  print("RAG search error:", e)
212
  return []
213
 
214
+ # -------------------- Local CPU LLM --------------------
215
  print("Loading local CPU LLM:", LLM_LOCAL_NAME)
216
  llm_pipe = None
217
  try:
218
  tokenizer = AutoTokenizer.from_pretrained(LLM_LOCAL_NAME)
219
  model = AutoModelForSeq2SeqLM.from_pretrained(LLM_LOCAL_NAME)
220
+ llm_pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device=-1) # CPU
221
  print("Local LLM loaded.")
222
  except Exception as e:
223
+ print("Failed to load local LLM:", e)
224
  llm_pipe = None
225
 
226
  SYSTEM_PROMPT = """
 
241
  ctx += f"\n[Context {i} – {src}]\n{block['text']}\n"
242
 
243
  hist = ""
244
+ for role, msg in chat_history[-4:]: # Keep last 4 exchanges
245
  hist += f"{role}: {msg}\n"
246
 
247
  prompt = f"""{SYSTEM_PROMPT}
 
261
 
262
  def llm_answer_with_rag(question: str, chat_history):
263
  retrieved = rag_search(question, TOP_K)
264
+ if not retrieved:
265
+ return "মই এই প্ৰশ্নৰ উত্তৰ দিবলৈ প্ৰয়োজনীয় তথ্য বিচাৰি পোৱা নাই। দয়া কৰি নিশ্চিত কৰক যে আপোনাৰ পাঠ্যপুথিৰ PDF ফাইলসমূহ সঠিকভাৱে আপলোড কৰা হৈছে।"
266
+
267
  prompt = build_rag_prompt(retrieved, question, chat_history)
268
+
269
+ if llm_pipe is None:
270
+ return "AI মডেল ল'ড হোৱা নাই। দয়া কৰি পুনৰ চেষ্টা কৰক।"
271
+
272
+ try:
273
+ out = llm_pipe(prompt, max_new_tokens=LLM_MAX_TOKENS, do_sample=False)
274
+ if isinstance(out, list) and len(out) > 0:
275
+ if "generated_text" in out[0]:
276
  return out[0]["generated_text"]
277
+ return str(out[0])
278
+ return str(out)
279
+ except Exception as e:
280
+ print("LLM generation error:", e)
281
+ return f"উত্তৰ তৈয়াৰ কৰোঁতে সমস্যা: {e}"
 
 
 
282
 
283
  # -------------------- OCR + Math helpers --------------------
284
+ def ocr_from_image(img_path: str):
285
+ if not img_path:
286
  return ""
287
  try:
288
+ img = Image.open(img_path)
289
  img = img.convert("RGB")
290
+ text = pytesseract.image_to_string(img, lang="eng+asm")
291
+ return text.strip()
292
+ except Exception as e:
293
+ print("OCR error:", e)
294
+ return ""
 
 
 
 
 
295
 
296
  def is_likely_math(text: str) -> bool:
297
  if not text:
 
299
  math_chars = set("0123456789+-*/=^()%")
300
  if any(ch in text for ch in math_chars):
301
  return True
302
+ math_kws = ["গণিত", "সমীকৰণ", "উদাহৰণ", "প্ৰশ্ন", "বীজগণিত", "solve", "equation", "math"]
303
+ return any(k in text.lower() for k in math_kws)
304
 
305
  def solve_math_expression(expr: str):
306
  try:
307
  expr = expr.replace("^", "**")
308
  if "=" in expr:
309
  left, right = expr.split("=", 1)
310
+ left_s = sp.sympify(left.strip())
311
+ right_s = sp.sympify(right.strip())
312
  eq = sp.Eq(left_s, right_s)
313
  sol = sp.solve(eq)
314
+ explanation = f"সমীকৰণ: {eq}\n\nসমাধান: {sol}"
 
 
 
 
 
 
 
315
  else:
316
  expr_s = sp.sympify(expr)
317
  simp = sp.simplify(expr_s)
318
+ explanation = f"প্ৰকাশ: {expr}\n\nসৰলীকৃত: {simp}"
 
 
 
319
  return explanation
320
+ except Exception as e:
321
+ return f"গণিত সমাধানত সমস্যা: {e}"
 
 
 
322
 
323
  def speech_to_text(audio):
324
+ return "" # Stub for future implementation
325
 
326
  def text_to_speech(text: str):
327
+ return None # Stub for future implementation
 
328
 
329
  # -------------------- Chat logic --------------------
330
  def login_user(username, user_state):
 
341
  )
342
  return user_state, stats
343
 
344
+ def chat_logic(text_input, image_input, chat_history, user_state):
 
 
 
 
 
 
 
345
  if chat_history is None:
346
  chat_history = []
347
 
348
  if not user_state or not user_state.get("user_id"):
349
  sys_msg = "⚠️ প্ৰথমে ওপৰত আপোনাৰ নাম লিখি **Login / লগিন** টিপক।"
350
+ chat_history.append([text_input or "", sys_msg])
351
+ return chat_history, user_state, None
352
 
353
  user_id = user_state["user_id"]
354
  final_query_parts = []
355
 
356
+ # Process image OCR
 
 
 
357
  ocr_text = ""
358
+ if image_input is not None:
359
+ ocr_text = ocr_from_image(image_input)
360
+ if ocr_text:
361
+ final_query_parts.append(f"ছবিৰ পৰা পাঠ: {ocr_text}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
 
363
  if text_input:
364
  final_query_parts.append(text_input)
365
 
366
  if not final_query_parts:
367
  sys_msg = "⚠️ অনুগ্ৰহ কৰি প্ৰশ্ন লিখক, কিম্বা ছবি আপলোড কৰক।"
368
+ chat_history.append(["", sys_msg])
369
+ return chat_history, user_state, None
370
 
371
  full_query = "\n".join(final_query_parts)
372
 
373
+ # Convert chat history to conversation format
374
  conv = []
375
  for u, b in chat_history:
376
+ if u and u.strip():
377
+ conv.append(("Student", u.strip()))
378
+ if b and b.strip():
379
+ conv.append(("Tutor", b.strip()))
380
 
381
  is_math = is_likely_math(full_query)
382
 
383
  if is_math:
384
  math_answer = solve_math_expression(full_query)
385
  combined_question = (
386
+ full_query + "\n\nগণিত সমাধান:\n" + math_answer +
387
+ "\n\nঅনুগ্ৰহ কৰি শ্রেণী ১০ ৰ শিক্ষাৰ্থীৰ বাবে সহজ ভাষাত ব্যাখ্যা কৰক।"
 
 
388
  )
389
  final_answer = llm_answer_with_rag(combined_question, conv)
390
  else:
391
  final_answer = llm_answer_with_rag(full_query, conv)
392
 
 
 
 
393
  log_interaction(user_id, full_query, final_answer, is_math)
394
+
395
+ display_question = text_input or ocr_text or "(ছবিৰ প্ৰশ্ন)"
396
+ chat_history.append([display_question, final_answer])
397
 
398
+ return chat_history, user_state, None
399
 
400
  # -------------------- Gradio UI --------------------
401
+ with gr.Blocks(title=APP_NAME, css="""
402
+ .stats-box { background: #f0f8ff; padding: 10px; border-radius: 5px; }
403
+ """) as demo:
404
  gr.Markdown(
405
+ f"""
406
+ # 🧭 {APP_NAME}
407
 
408
+ - SEBA Class 10 PDFs upload to `pdfs/class10` folder
409
+ - Text + Image (OCR) input support
410
  - Math step-by-step solutions
411
+ - User login + progress tracking
412
  """
413
  )
414
 
 
443
 
444
  with gr.Row():
445
  image_inp = gr.Image(label="📷 প্ৰশ্নৰ ছবি (Optional)", type="filepath")
446
+
 
447
  with gr.Row():
448
  ask_btn = gr.Button("🤖 জাজাবৰক সোধক")
449
+ clear_btn = gr.Button("🧹 পৰিষ্কাৰ কৰক")
 
 
 
 
450
 
451
+ # Login handler
452
  login_btn.click(
453
  login_user,
454
  inputs=[username_inp, user_state],
455
  outputs=[user_state, stats_md],
456
  )
457
 
458
+ # Chat handler
 
 
 
 
 
 
459
  ask_btn.click(
460
+ chat_logic,
461
+ inputs=[text_inp, image_inp, chat, user_state],
462
+ outputs=[chat, user_state, image_inp],
463
+ ).then(
464
+ lambda: "", None, text_inp
465
+ ).then(
466
+ lambda: None, None, image_inp
467
  )
468
 
469
+ # Text submit handler
470
  text_inp.submit(
471
+ chat_logic,
472
+ inputs=[text_inp, image_inp, chat, user_state],
473
+ outputs=[chat, user_state, image_inp],
474
+ ).then(
475
+ lambda: "", None, text_inp
476
+ ).then(
477
+ lambda: None, None, image_inp
478
  )
479
 
480
+ # Clear chat
481
+ def clear_chat():
482
+ return [], None
483
+ clear_btn.click(clear_chat, outputs=[chat, image_inp])
484
+
485
  if __name__ == "__main__":
486
+ demo.launch(server_name="0.0.0.0", server_port=7860, share=True)