Sazid2 commited on
Commit
bf281e4
·
verified ·
1 Parent(s): 1e98313

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +202 -81
app.py CHANGED
@@ -1,8 +1,20 @@
1
  # app.py
 
 
 
 
 
 
 
 
 
 
2
  import os
3
  import io
4
  import sqlite3
5
  from datetime import datetime
 
 
6
  import fitz # PyMuPDF
7
  import numpy as np
8
  from PIL import Image
@@ -12,35 +24,40 @@ import pytesseract
12
  from sentence_transformers import SentenceTransformer
13
  import sympy as sp
14
 
15
- # Optional: huggingface inference
16
  from huggingface_hub import InferenceApi
17
 
18
- # ------------- CONFIG -------------
19
- APP_NAME = "Jajabor – SEBA Assamese Class 10 Tutor (Spaces)"
 
20
  BASE_DIR = os.path.abspath(os.path.dirname(__file__))
21
  PDF_DIR = os.path.join(BASE_DIR, "pdfs", "class10")
22
  DB_PATH = os.path.join(BASE_DIR, "jajabor_users.db")
23
 
24
- # Embedding model - compact for Spaces. Swap if you run on stronger infra.
25
  EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
26
 
27
- # LLM model to call via Inference API (optional)
28
- # WARNING: not all large models will run under a free plan; see docs.
29
- LLM_MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct" # can change to a hosted model
30
- USE_HF_INFERENCE = True # set False if you plan to load a local small model
31
 
32
  CHUNK_SIZE = 600
33
  CHUNK_OVERLAP = 120
34
  TOP_K = 5
35
 
36
  HUGGINGFACE_API_TOKEN = os.environ.get("HF_API_TOKEN", None)
37
- if USE_HF_INFERENCE:
38
- if not HUGGINGFACE_API_TOKEN:
39
- print("Warning: HF API token not found in env (HF_API_TOKEN). LLM calls will fail.")
40
- else:
 
 
41
  inference = InferenceApi(repo_id=LLM_MODEL_NAME, token=HUGGINGFACE_API_TOKEN)
 
 
 
42
 
43
- # ------------- DB helpers -------------
44
  def init_db(db_path=DB_PATH):
45
  os.makedirs(os.path.dirname(db_path), exist_ok=True)
46
  conn = sqlite3.connect(db_path)
@@ -117,14 +134,20 @@ def get_user_stats(user_id):
117
 
118
  init_db()
119
 
120
- # ------------- PDF loading + RAG -------------
121
  def extract_text_from_pdf(pdf_path: str) -> str:
122
- doc = fitz.open(pdf_path)
 
 
 
123
  pages = []
124
  for page in doc:
125
- txt = page.get_text("text")
126
- if txt:
127
- pages.append(txt)
 
 
 
128
  return "\n".join(pages)
129
 
130
  def load_all_pdfs(pdf_dir: str):
@@ -133,7 +156,7 @@ def load_all_pdfs(pdf_dir: str):
133
  if not os.path.isdir(pdf_dir):
134
  print("PDF_DIR not found:", pdf_dir)
135
  return texts, metas
136
- for fname in os.listdir(pdf_dir):
137
  if fname.lower().endswith(".pdf"):
138
  path = os.path.join(pdf_dir, fname)
139
  print("Reading:", path)
@@ -143,14 +166,19 @@ def load_all_pdfs(pdf_dir: str):
143
  return texts, metas
144
 
145
  def split_text(text: str, chunk_size=600, overlap=120):
 
 
146
  chunks = []
147
  start = 0
148
- while start < len(text):
149
- end = start + chunk_size
 
 
 
150
  chunk = text[start:end]
151
  if chunk.strip():
152
  chunks.append(chunk)
153
- start = max(end - overlap, end) # avoid infinite loop
154
  return chunks
155
 
156
  print("Loading embedding model:", EMBEDDING_MODEL_NAME)
@@ -168,36 +196,43 @@ for text, meta in zip(all_texts, all_metas):
168
  corpus_metas.extend([meta] * len(chs))
169
 
170
  print("Total chunks:", len(corpus_chunks))
 
171
  if len(corpus_chunks) > 0:
172
- print("Encoding chunks...")
173
- embs = embedding_model.encode(corpus_chunks, batch_size=32, show_progress_bar=False).astype("float32")
174
- dim = embs.shape[1]
175
- index = faiss.IndexFlatL2(dim)
176
- index.add(embs)
177
- print("FAISS index ready; dim:", dim)
 
 
 
 
178
  else:
179
- index = None
180
- print("No corpus chunks - upload PDFs to the `pdfs/class10` folder in the repo.")
181
 
182
  def rag_search(query: str, k: int = TOP_K):
183
  if index is None:
184
  return []
185
- q_vec = embedding_model.encode([query]).astype("float32")
186
- D, I = index.search(q_vec, k)
187
- results = []
188
- for dist, idx in zip(D[0], I[0]):
189
- if idx == -1:
190
- continue
191
- results.append(
192
- {
193
- "score": float(dist),
194
- "text": corpus_chunks[idx],
195
- "meta": corpus_metas[idx],
196
- }
197
- )
198
- return results
 
 
 
199
 
200
- # ------------- LLM helpers -------------
201
  SYSTEM_PROMPT = """
202
  You are "Jajabor", an expert SEBA Assamese tutor for Class 10.
203
  Always prefer to answer in Assamese. If the student clearly asks for English, you may reply in English.
@@ -235,20 +270,25 @@ def build_rag_prompt(context_blocks, question, chat_history):
235
  return prompt
236
 
237
  def call_llm_via_hf(prompt: str, max_tokens=512):
238
- if not HUGGINGFACE_API_TOKEN:
239
- return "LLM not available: HF API token (env HF_API_TOKEN) is required to call the Inference API."
240
  try:
241
- # huggingface InferenceApi text-generation returns text (model-specific format)
242
  out = inference(inputs=prompt, params={"max_new_tokens": max_tokens, "temperature": 0.3})
243
- # inference result may be a dict or string; try to extract
244
  if isinstance(out, dict) and "generated_text" in out:
245
  return out["generated_text"]
246
- if isinstance(out, list) and len(out) > 0 and "generated_text" in out[0]:
247
- return out[0]["generated_text"]
 
 
 
 
248
  if isinstance(out, str):
249
  return out
250
  return str(out)
251
  except Exception as e:
 
252
  return f"LLM call failed: {e}"
253
 
254
  def llm_answer_with_rag(question: str, chat_history):
@@ -259,23 +299,34 @@ def llm_answer_with_rag(question: str, chat_history):
259
  else:
260
  return "LLM not configured (USE_HF_INFERENCE=False)."
261
 
262
- # ------------- OCR + math helpers -------------
263
  def ocr_from_image(img: Image.Image):
264
  if img is None:
265
  return ""
266
- img = img.convert("RGB")
267
  try:
 
 
 
 
 
268
  text = pytesseract.image_to_string(img, lang="asm+eng")
269
  except Exception:
270
- text = pytesseract.image_to_string(img)
 
 
 
271
  return text.strip()
272
 
273
  def is_likely_math(text: str) -> bool:
 
 
274
  math_chars = set("0123456789+-*/=^()%")
275
  if any(ch in text for ch in math_chars):
276
  return True
277
- kws = ["গণিত", "সমীকৰণ", "উদাহৰণ", "প্ৰশ্ন", "বীজগণিত"]
278
- return any(k in text for k in kws)
 
 
279
 
280
  def solve_math_expression(expr: str):
281
  try:
@@ -289,31 +340,33 @@ def solve_math_expression(expr: str):
289
  steps = []
290
  steps.append("প্ৰথমে সমীকৰণ লওঁ:")
291
  steps.append(f"{sp.pretty(eq)}")
292
- steps.append("Sympy ৰ সহায়ত সমাধান পোৱা যায়:")
293
  steps.append(str(sol))
294
  explanation = "ধাপ-ধাপে সমাধান (সংক্ষেপে):\n" + "\n".join(f"- {s}" for s in steps)
295
- explanation += f"\n\nসেয়েহে সমাধান: {sol}"
296
  else:
297
  expr_s = sp.sympify(expr)
298
  simp = sp.simplify(expr_s)
299
  explanation = (
300
- "প্ৰদত্ত গণিতীয় অভিব্যক্তি:\n"
301
- f"{expr}\n\nসরলীকৰণ কৰাৰ পিছত পোৱা যায়:\n{simp}"
302
  )
303
  return explanation
304
  except Exception:
305
  return (
306
- "মই সঠিকভাৱে গণিতীয় অভিব্যক্তি চিনাক্ত কৰিব নোৱাৰিলোঁ। "
307
- "দয়া কৰি সমীকৰণটো অলপ বেছি স্পষ্টকৈ লিখা: উদাহৰণ – 2x + 3 = 7"
308
  )
309
 
310
  def speech_to_text(audio):
 
311
  return ""
312
 
313
  def text_to_speech(text: str):
 
314
  return None
315
 
316
- # ------------- Chat logic -------------
317
  def login_user(username, user_state):
318
  username = (username or "").strip()
319
  if not username:
@@ -336,28 +389,58 @@ def chat_logic(
336
  chat_history,
337
  user_state,
338
  ):
 
 
 
 
339
  if not user_state or not user_state.get("user_id"):
340
  sys_msg = "⚠️ প্ৰথমে ওপৰত আপোনাৰ নাম লিখি **Login / লগিন** টিপক।"
341
  chat_history = chat_history + [[text_input or "", sys_msg]]
342
  return chat_history, user_state, None
343
 
344
  user_id = user_state["user_id"]
345
-
346
  final_query_parts = []
 
 
347
  voice_text = speech_to_text(audio_input)
348
  if voice_text:
349
  final_query_parts.append(voice_text)
350
 
 
351
  ocr_text = ""
352
- if image_input is not None:
 
353
  try:
354
- img = Image.open(io.BytesIO(image_input.read()))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
355
  except Exception:
356
- img = image_input
357
- ocr_text = ocr_from_image(img)
358
- if ocr_text:
359
- final_query_parts.append(ocr_text)
360
 
 
 
 
 
 
 
 
 
 
361
  if text_input:
362
  final_query_parts.append(text_input)
363
 
@@ -367,6 +450,7 @@ def chat_logic(
367
  return chat_history, user_state, None
368
 
369
  full_query = "\n".join(final_query_parts)
 
370
  conv = []
371
  for u, b in chat_history:
372
  if u:
@@ -375,6 +459,7 @@ def chat_logic(
375
  conv.append(("Tutor", b))
376
 
377
  is_math = is_likely_math(full_query)
 
378
  if is_math:
379
  math_answer = solve_math_expression(full_query)
380
  combined_question = (
@@ -387,19 +472,25 @@ def chat_logic(
387
  else:
388
  final_answer = llm_answer_with_rag(full_query, conv)
389
 
 
 
 
 
 
 
390
  log_interaction(user_id, full_query, final_answer, is_math)
391
  audio_out = text_to_speech(final_answer)
392
  display_question = text_input or voice_text or ocr_text or "(empty)"
393
  chat_history = chat_history + [[display_question, final_answer]]
394
  return chat_history, user_state, audio_out
395
 
396
- # ------------- Gradio UI -------------
397
- with gr.Blocks(title=APP_NAME) as demo:
398
  gr.Markdown(
399
  """
400
- # 🧭 জাজাবৰ – SEBA অসমীয়া ক্লাছ ১০ AI Tutor (Spaces)
401
 
402
- - Upload your SEBA Class 10 PDFs to `pdfs/class10` in this Space repo
403
  - Text + Image (OCR) input
404
  - Math step-by-step solutions
405
  - User login + progress
@@ -411,23 +502,50 @@ with gr.Blocks(title=APP_NAME) as demo:
411
  with gr.Row():
412
  with gr.Column(scale=1):
413
  gr.Markdown("### 👤 লগিন")
414
- username_inp = gr.Textbox(label="নাম / ইউজাৰ আইডি", placeholder="উদাহৰণ: abu10")
 
 
 
415
  login_btn = gr.Button("✅ Login / লগিন")
416
  stats_md = gr.Markdown("এতিয়ালৈকে লগিন হোৱা নাই।", elem_classes="stats-box")
 
 
 
 
 
 
 
 
 
 
417
  with gr.Column(scale=3):
418
  chat = gr.Chatbot(label="জাজাবৰ সৈতে কথোপকথন", height=500)
419
- text_inp = gr.Textbox(label="আপোনাৰ প্ৰশ্ন লিখক", lines=2)
 
 
 
 
 
420
  with gr.Row():
421
- image_inp = gr.Image(label="📷 প্ৰশ্নৰ ছবি (Optional)", type="file")
422
- audio_inp = gr.Audio(label="🎙️ কণ্ঠস্বৰ প্ৰশ্ন (Stub)", type="numpy")
 
 
423
  with gr.Row():
424
  ask_btn = gr.Button("🤖 জাজাবৰক সোধক")
425
- audio_out = gr.Audio(label="🔊 উত্তৰৰ অডিঅ’ (TTS – future)", interactive=False)
426
 
427
- login_btn.click(login_user, inputs=[username_inp, user_state], outputs=[user_state, stats_md])
 
 
 
 
428
 
429
  def wrapped_chat(text, image, audio, history, user_state_inner, username_inner):
430
- if user_state_inner and username_inner and not user_state_inner.get("username"):
 
 
 
431
  user_state_inner["username"] = username_inner
432
  return chat_logic(username_inner, text, image, audio, history, user_state_inner)
433
 
@@ -436,11 +554,14 @@ with gr.Blocks(title=APP_NAME) as demo:
436
  inputs=[text_inp, image_inp, audio_inp, chat, user_state, username_inp],
437
  outputs=[chat, user_state, audio_out],
438
  )
 
439
  text_inp.submit(
440
  wrapped_chat,
441
  inputs=[text_inp, image_inp, audio_inp, chat, user_state, username_inp],
442
  outputs=[chat, user_state, audio_out],
443
  )
444
 
 
445
  if __name__ == "__main__":
 
446
  demo.launch()
 
1
  # app.py
2
+ """
3
+ Jajabor – SEBA Assamese Class 10 Tutor (Gradio app)
4
+ Full single-file app:
5
+ - Loads PDFs from ./pdfs/class10
6
+ - Builds FAISS index using sentence-transformers
7
+ - Optional Hugging Face Inference API for LLM (set HF_API_TOKEN env var)
8
+ - Login + sqlite interactions logging
9
+ - OCR from images (pytesseract) with robust handling of gr.Image(type="filepath")
10
+ """
11
+
12
  import os
13
  import io
14
  import sqlite3
15
  from datetime import datetime
16
+ import traceback
17
+
18
  import fitz # PyMuPDF
19
  import numpy as np
20
  from PIL import Image
 
24
  from sentence_transformers import SentenceTransformer
25
  import sympy as sp
26
 
27
+ # Optional HF inference
28
  from huggingface_hub import InferenceApi
29
 
30
+ # -------------------- CONFIG --------------------
31
+ APP_NAME = "Jajabor – SEBA Assamese Class 10 Tutor"
32
+
33
  BASE_DIR = os.path.abspath(os.path.dirname(__file__))
34
  PDF_DIR = os.path.join(BASE_DIR, "pdfs", "class10")
35
  DB_PATH = os.path.join(BASE_DIR, "jajabor_users.db")
36
 
37
+ # Embedding model (compact for Spaces)
38
  EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
39
 
40
+ # LLM: model to call via HF Inference API. Change if you have another hosted model.
41
+ LLM_MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
42
+ USE_HF_INFERENCE = True # set False if you don't want to call HF Inference
 
43
 
44
  CHUNK_SIZE = 600
45
  CHUNK_OVERLAP = 120
46
  TOP_K = 5
47
 
48
  HUGGINGFACE_API_TOKEN = os.environ.get("HF_API_TOKEN", None)
49
+ if USE_HF_INFERENCE and HUGGINGFACE_API_TOKEN is None:
50
+ print("Warning: HF_API_TOKEN not set. LLM calls will fail until the token is provided in env.")
51
+
52
+ inference = None
53
+ if USE_HF_INFERENCE and HUGGINGFACE_API_TOKEN:
54
+ try:
55
  inference = InferenceApi(repo_id=LLM_MODEL_NAME, token=HUGGINGFACE_API_TOKEN)
56
+ except Exception as e:
57
+ print("Failed to initialize HF Inference API client:", e)
58
+ inference = None
59
 
60
+ # -------------------- DB helpers --------------------
61
  def init_db(db_path=DB_PATH):
62
  os.makedirs(os.path.dirname(db_path), exist_ok=True)
63
  conn = sqlite3.connect(db_path)
 
134
 
135
  init_db()
136
 
137
+ # -------------------- PDF loading + RAG --------------------
138
  def extract_text_from_pdf(pdf_path: str) -> str:
139
+ try:
140
+ doc = fitz.open(pdf_path)
141
+ except Exception:
142
+ return ""
143
  pages = []
144
  for page in doc:
145
+ try:
146
+ txt = page.get_text("text")
147
+ if txt:
148
+ pages.append(txt)
149
+ except Exception:
150
+ continue
151
  return "\n".join(pages)
152
 
153
  def load_all_pdfs(pdf_dir: str):
 
156
  if not os.path.isdir(pdf_dir):
157
  print("PDF_DIR not found:", pdf_dir)
158
  return texts, metas
159
+ for fname in sorted(os.listdir(pdf_dir)):
160
  if fname.lower().endswith(".pdf"):
161
  path = os.path.join(pdf_dir, fname)
162
  print("Reading:", path)
 
166
  return texts, metas
167
 
168
  def split_text(text: str, chunk_size=600, overlap=120):
169
+ if not text:
170
+ return []
171
  chunks = []
172
  start = 0
173
+ L = len(text)
174
+ # Keep stepping forward by chunk_size - overlap
175
+ step = max(chunk_size - overlap, 1)
176
+ while start < L:
177
+ end = min(start + chunk_size, L)
178
  chunk = text[start:end]
179
  if chunk.strip():
180
  chunks.append(chunk)
181
+ start += step
182
  return chunks
183
 
184
  print("Loading embedding model:", EMBEDDING_MODEL_NAME)
 
196
  corpus_metas.extend([meta] * len(chs))
197
 
198
  print("Total chunks:", len(corpus_chunks))
199
+ index = None
200
  if len(corpus_chunks) > 0:
201
+ print("Encoding chunks (this may take some seconds)...")
202
+ try:
203
+ embs = embedding_model.encode(corpus_chunks, batch_size=32, show_progress_bar=False).astype("float32")
204
+ dim = embs.shape[1]
205
+ index = faiss.IndexFlatL2(dim)
206
+ index.add(embs)
207
+ print("✅ FAISS index ready; dim:", dim)
208
+ except Exception as e:
209
+ print("Failed to encode/add to index:", e)
210
+ index = None
211
  else:
212
+ print("No corpus chunks found: upload PDFs to ./pdfs/class10")
 
213
 
214
  def rag_search(query: str, k: int = TOP_K):
215
  if index is None:
216
  return []
217
+ try:
218
+ q_vec = embedding_model.encode([query]).astype("float32")
219
+ D, I = index.search(q_vec, k)
220
+ results = []
221
+ for dist, idx in zip(D[0], I[0]):
222
+ if idx == -1:
223
+ continue
224
+ results.append(
225
+ {
226
+ "score": float(dist),
227
+ "text": corpus_chunks[idx],
228
+ "meta": corpus_metas[idx],
229
+ }
230
+ )
231
+ return results
232
+ except Exception:
233
+ return []
234
 
235
+ # -------------------- LLM helpers --------------------
236
  SYSTEM_PROMPT = """
237
  You are "Jajabor", an expert SEBA Assamese tutor for Class 10.
238
  Always prefer to answer in Assamese. If the student clearly asks for English, you may reply in English.
 
270
  return prompt
271
 
272
  def call_llm_via_hf(prompt: str, max_tokens=512):
273
+ if inference is None:
274
+ return "LLM not available: HF Inference client not configured (set HF_API_TOKEN and ensure model name is accessible)."
275
  try:
276
+ # Some inference endpoints accept dict return, some strings. Handle flexibly.
277
  out = inference(inputs=prompt, params={"max_new_tokens": max_tokens, "temperature": 0.3})
278
+ # Handle common return types
279
  if isinstance(out, dict) and "generated_text" in out:
280
  return out["generated_text"]
281
+ if isinstance(out, list) and len(out) > 0:
282
+ if isinstance(out[0], dict) and "generated_text" in out[0]:
283
+ return out[0]["generated_text"]
284
+ # sometimes list of strings
285
+ if isinstance(out[0], str):
286
+ return out[0]
287
  if isinstance(out, str):
288
  return out
289
  return str(out)
290
  except Exception as e:
291
+ traceback.print_exc()
292
  return f"LLM call failed: {e}"
293
 
294
  def llm_answer_with_rag(question: str, chat_history):
 
299
  else:
300
  return "LLM not configured (USE_HF_INFERENCE=False)."
301
 
302
+ # -------------------- OCR + math helpers --------------------
303
  def ocr_from_image(img: Image.Image):
304
  if img is None:
305
  return ""
 
306
  try:
307
+ img = img.convert("RGB")
308
+ except Exception:
309
+ pass
310
+ try:
311
+ # try Assamese + English; fallback if languages not installed
312
  text = pytesseract.image_to_string(img, lang="asm+eng")
313
  except Exception:
314
+ try:
315
+ text = pytesseract.image_to_string(img)
316
+ except Exception:
317
+ text = ""
318
  return text.strip()
319
 
320
  def is_likely_math(text: str) -> bool:
321
+ if not text:
322
+ return False
323
  math_chars = set("0123456789+-*/=^()%")
324
  if any(ch in text for ch in math_chars):
325
  return True
326
+ kws = ["গণিত", "সমীকৰণ", "উদাহৰণ", "প্ৰশ্ন", "বীজগণিত", "solve", "equation"]
327
+ if any(k in text for k in kws):
328
+ return True
329
+ return False
330
 
331
  def solve_math_expression(expr: str):
332
  try:
 
340
  steps = []
341
  steps.append("প্ৰথমে সমীকৰণ লওঁ:")
342
  steps.append(f"{sp.pretty(eq)}")
343
+ steps.append("Sympy ৰ সহায়ত সমাধান পোৱা যায়:")
344
  steps.append(str(sol))
345
  explanation = "ধাপ-ধাপে সমাধান (সংক্ষেপে):\n" + "\n".join(f"- {s}" for s in steps)
346
+ explanation += f"\n\nসেয়ে সমাধান: {sol}"
347
  else:
348
  expr_s = sp.sympify(expr)
349
  simp = sp.simplify(expr_s)
350
  explanation = (
351
+ "প্ৰদত্ত গণিতীয় অভিব্যক্তি:\n"
352
+ f"{expr}\n\nসরলীকৰণ কৰাৰ পিছত পোৱা যায়:\n{simp}"
353
  )
354
  return explanation
355
  except Exception:
356
  return (
357
+ "মই সঠিকভাৱে গণিতীয় অভিব্যক্তি চিনাক্ত কৰিব নোৱাৰিলোঁ। "
358
+ "দয়া কৰি সমীকৰণটো অলপ বেছি স্পষ্টকৈ লিখা: উদাহৰণ – 2*x + 3 = 7"
359
  )
360
 
361
  def speech_to_text(audio):
362
+ # stub for future ASR integration
363
  return ""
364
 
365
  def text_to_speech(text: str):
366
+ # stub for TTS integration
367
  return None
368
 
369
+ # -------------------- Chat logic --------------------
370
  def login_user(username, user_state):
371
  username = (username or "").strip()
372
  if not username:
 
389
  chat_history,
390
  user_state,
391
  ):
392
+ # Ensure chat_history is a list
393
+ if chat_history is None:
394
+ chat_history = []
395
+
396
  if not user_state or not user_state.get("user_id"):
397
  sys_msg = "⚠️ প্ৰথমে ওপৰত আপোনাৰ নাম লিখি **Login / লগিন** টিপক।"
398
  chat_history = chat_history + [[text_input or "", sys_msg]]
399
  return chat_history, user_state, None
400
 
401
  user_id = user_state["user_id"]
 
402
  final_query_parts = []
403
+
404
+ # audio (stub)
405
  voice_text = speech_to_text(audio_input)
406
  if voice_text:
407
  final_query_parts.append(voice_text)
408
 
409
+ # image handling (robust)
410
  ocr_text = ""
411
+ if image_input is not None and image_input != "":
412
+ img = None
413
  try:
414
+ # If Gradio returns a file path (string)
415
+ if isinstance(image_input, str):
416
+ try:
417
+ img = Image.open(image_input)
418
+ except Exception:
419
+ img = None
420
+ else:
421
+ # If it's a file-like object: has .read()
422
+ read_method = getattr(image_input, "read", None)
423
+ if callable(read_method):
424
+ try:
425
+ raw = image_input.read()
426
+ img = Image.open(io.BytesIO(raw))
427
+ except Exception:
428
+ img = None
429
+ # If it's already a PIL Image
430
+ if img is None and isinstance(image_input, Image.Image):
431
+ img = image_input
432
  except Exception:
433
+ img = None
 
 
 
434
 
435
+ if img is not None:
436
+ try:
437
+ ocr_text = ocr_from_image(img)
438
+ if ocr_text:
439
+ final_query_parts.append(ocr_text)
440
+ except Exception:
441
+ pass
442
+
443
+ # text input
444
  if text_input:
445
  final_query_parts.append(text_input)
446
 
 
450
  return chat_history, user_state, None
451
 
452
  full_query = "\n".join(final_query_parts)
453
+
454
  conv = []
455
  for u, b in chat_history:
456
  if u:
 
459
  conv.append(("Tutor", b))
460
 
461
  is_math = is_likely_math(full_query)
462
+
463
  if is_math:
464
  math_answer = solve_math_expression(full_query)
465
  combined_question = (
 
472
  else:
473
  final_answer = llm_answer_with_rag(full_query, conv)
474
 
475
+ # If LLM returns the whole prompt + generation, try to remove the prompt (best-effort)
476
+ if isinstance(final_answer, str) and final_answer.strip().startswith(SYSTEM_PROMPT.strip()):
477
+ # best-effort: don't leak huge prompts to chat UI; keep as-is if detection fails
478
+ # (Many HF inference responses do not include the prompt anyway)
479
+ pass
480
+
481
  log_interaction(user_id, full_query, final_answer, is_math)
482
  audio_out = text_to_speech(final_answer)
483
  display_question = text_input or voice_text or ocr_text or "(empty)"
484
  chat_history = chat_history + [[display_question, final_answer]]
485
  return chat_history, user_state, audio_out
486
 
487
+ # -------------------- Gradio UI --------------------
488
+ with gr.Blocks(title=APP_NAME, css=None) as demo:
489
  gr.Markdown(
490
  """
491
+ # 🧭 জাজাবৰ – SEBA অসমীয়া ক্লাছ ১০ AI Tutor
492
 
493
+ - Upload your SEBA Class 10 PDFs to `pdfs/class10` in this repo (or when running locally, ensure folder exists)
494
  - Text + Image (OCR) input
495
  - Math step-by-step solutions
496
  - User login + progress
 
502
  with gr.Row():
503
  with gr.Column(scale=1):
504
  gr.Markdown("### 👤 লগিন")
505
+ username_inp = gr.Textbox(
506
+ label="নাম / ইউজাৰ আইডি",
507
+ placeholder="উদাহৰণ: abu10, student01 ..."
508
+ )
509
  login_btn = gr.Button("✅ Login / লগিন")
510
  stats_md = gr.Markdown("এতিয়ালৈকে লগিন হোৱা নাই।", elem_classes="stats-box")
511
+
512
+ gr.Markdown(
513
+ """
514
+ ### 💡 টিপছ
515
+ - "ক্লাছ ১০ গণিত: উদাহৰণ ৩.১ প্ৰশ্ন ২" – এই ধৰণৰ প্ৰশ্ন ভাল
516
+ - ফটো আপলোড কৰিলে টেক্স্টটো OCR কৰি পঢ়িব চেষ্টা কৰা হয়
517
+ - সম্ভৱ হলে প্ৰশ্নটো অসমীয়াত সোধক 🙂
518
+ """
519
+ )
520
+
521
  with gr.Column(scale=3):
522
  chat = gr.Chatbot(label="জাজাবৰ সৈতে কথোপকথন", height=500)
523
+ text_inp = gr.Textbox(
524
+ label="আপোনাৰ প্ৰশ্ন লিখক",
525
+ placeholder='উদাহৰণ: "ক্লাছ ১০ অসমীয়া: অনুচ্ছেদ পাঠ ১ ৰ মূল বিষয় কি?"',
526
+ lines=2,
527
+ )
528
+
529
  with gr.Row():
530
+ # IMPORTANT: use type="filepath" so Gradio returns a local path string
531
+ image_inp = gr.Image(label="📷 প্ৰশ্নৰ ছবি (Optional)", type="filepath")
532
+ audio_inp = gr.Audio(label="🎙️ কণ্ঠস্বৰ প্ৰশ্ন (Stub — not used now)", type="numpy")
533
+
534
  with gr.Row():
535
  ask_btn = gr.Button("🤖 জাজাবৰক সোধক")
536
+ audio_out = gr.Audio(label="🔊 উত্তৰৰ অডিঅ’ (TTS – future upgrade)", interactive=False)
537
 
538
+ login_btn.click(
539
+ login_user,
540
+ inputs=[username_inp, user_state],
541
+ outputs=[user_state, stats_md],
542
+ )
543
 
544
  def wrapped_chat(text, image, audio, history, user_state_inner, username_inner):
545
+ # keep username in state if provided
546
+ if user_state_inner is None:
547
+ user_state_inner = {}
548
+ if username_inner and not user_state_inner.get("username"):
549
  user_state_inner["username"] = username_inner
550
  return chat_logic(username_inner, text, image, audio, history, user_state_inner)
551
 
 
554
  inputs=[text_inp, image_inp, audio_inp, chat, user_state, username_inp],
555
  outputs=[chat, user_state, audio_out],
556
  )
557
+
558
  text_inp.submit(
559
  wrapped_chat,
560
  inputs=[text_inp, image_inp, audio_inp, chat, user_state, username_inp],
561
  outputs=[chat, user_state, audio_out],
562
  )
563
 
564
+ # -------------------- Launch --------------------
565
  if __name__ == "__main__":
566
+ # For Spaces, demo.launch() is fine. Locally you can set server_name to "0.0.0.0"
567
  demo.launch()