Sazid2 commited on
Commit
0c9db78
·
verified ·
1 Parent(s): db3f951

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +418 -435
app.py CHANGED
@@ -1,45 +1,73 @@
1
  """
2
- Jajabor – SEBA Assamese Class 10 Tutor (Free-tier CPU-ready)
3
- Fixed version with Gradio compatibility fixes
4
  """
5
 
6
  import os
7
- import io
8
  import sqlite3
9
- import traceback
10
  from datetime import datetime
11
 
12
- from PyPDF2 import PdfReader
13
- import numpy as np
14
- from PIL import Image
15
- import gradio as gr
16
- import faiss
17
- import pytesseract
18
- from sentence_transformers import SentenceTransformer
19
- import sympy as sp
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # -------------------- CONFIG --------------------
24
- APP_NAME = "Jajabor – SEBA Assamese Class 10 Tutor (Free CPU)"
25
 
26
  BASE_DIR = os.path.abspath(os.path.dirname(__file__))
27
  PDF_DIR = os.path.join(BASE_DIR, "pdfs", "class10")
28
  DB_PATH = os.path.join(BASE_DIR, "jajabor_users.db")
29
 
30
- EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
31
- USE_HF_INFERENCE = False
32
- LLM_LOCAL_NAME = "google/flan-t5-small"
33
- LLM_MAX_TOKENS = 128
34
-
35
- CHUNK_SIZE = 400 # Reduced for better performance
36
- CHUNK_OVERLAP = 80
37
- TOP_K = 3 # Reduced for faster retrieval
38
-
39
  # -------------------- DATABASE --------------------
40
- def init_db(path=DB_PATH):
41
- os.makedirs(os.path.dirname(path), exist_ok=True)
42
- conn = sqlite3.connect(path)
43
  cur = conn.cursor()
44
  cur.execute(
45
  """
@@ -79,14 +107,14 @@ def get_or_create_user(username: str):
79
  else:
80
  cur.execute(
81
  "INSERT INTO users (username, created_at) VALUES (?, ?)",
82
- (username, datetime.utcnow().isoformat()),
83
  )
84
  conn.commit()
85
  user_id = cur.lastrowid
86
  conn.close()
87
  return user_id
88
 
89
- def log_interaction(user_id, query, answer, is_math: bool):
90
  conn = sqlite3.connect(DB_PATH)
91
  cur = conn.cursor()
92
  cur.execute(
@@ -94,445 +122,400 @@ def log_interaction(user_id, query, answer, is_math: bool):
94
  INSERT INTO interactions (user_id, timestamp, query, answer, is_math)
95
  VALUES (?, ?, ?, ?, ?)
96
  """,
97
- (user_id, datetime.utcnow().isoformat(), query, answer, 1 if is_math else 0),
98
  )
99
  conn.commit()
100
  conn.close()
101
 
102
- def get_user_stats(user_id):
103
- conn = sqlite3.connect(DB_PATH)
104
- cur = conn.cursor()
105
- cur.execute(
106
- "SELECT COUNT(*), SUM(is_math) FROM interactions WHERE user_id=?", (user_id,)
107
- )
108
- row = cur.fetchone()
109
- conn.close()
110
- total = row[0] or 0
111
- math_count = row[1] or 0
112
- return total, math_count
113
-
114
- init_db()
115
-
116
- # -------------------- PDF reading --------------------
117
- def extract_text_from_pdf(pdf_path: str) -> str:
118
- text_pages = []
119
- try:
120
- reader = PdfReader(pdf_path)
121
- for page in reader.pages:
122
  try:
123
- txt = page.extract_text() or ""
124
- text_pages.append(txt)
125
- except Exception:
126
- continue
127
- except Exception as e:
128
- print("PDF read error:", e)
129
- return "\n".join(text_pages)
130
-
131
- def load_all_pdfs(pdf_dir: str):
132
- texts = []
133
- metas = []
134
- if not os.path.isdir(pdf_dir):
135
- print("PDF_DIR not found:", pdf_dir)
136
- return texts, metas
137
- for fname in sorted(os.listdir(pdf_dir)):
138
- if fname.lower().endswith(".pdf"):
139
- path = os.path.join(pdf_dir, fname)
140
- print("Reading:", path)
141
- text = extract_text_from_pdf(path)
142
- if text.strip():
143
- texts.append(text)
144
- metas.append({"source": fname})
145
- return texts, metas
146
-
147
- def split_text(text: str, chunk_size=CHUNK_SIZE, overlap=CHUNK_OVERLAP):
148
- if not text:
149
- return []
150
- chunks = []
151
- step = max(chunk_size - overlap, 1)
152
- start = 0
153
- L = len(text)
154
- while start < L:
155
- end = min(start + chunk_size, L)
156
- chunk = text[start:end]
157
- if chunk.strip():
158
- chunks.append(chunk)
159
- start += step
160
- return chunks
161
-
162
- # -------------------- Embeddings + FAISS --------------------
163
- print("Loading embedding model:", EMBEDDING_MODEL_NAME)
164
- embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
165
-
166
- print("Loading PDFs from", PDF_DIR)
167
- all_texts, all_metas = load_all_pdfs(PDF_DIR)
168
- print("Number of PDFs with content:", len(all_texts))
169
-
170
- corpus_chunks = []
171
- corpus_metas = []
172
- for text, meta in zip(all_texts, all_metas):
173
- chs = split_text(text, CHUNK_SIZE, CHUNK_OVERLAP)
174
- corpus_chunks.extend(chs)
175
- corpus_metas.extend([meta] * len(chs))
176
-
177
- print("Total chunks:", len(corpus_chunks))
178
- index = None
179
- if len(corpus_chunks) > 0:
180
- print("Encoding chunks...")
181
- try:
182
- embs = embedding_model.encode(corpus_chunks, batch_size=16, show_progress_bar=False).astype("float32")
183
- dim = embs.shape[1]
184
- index = faiss.IndexFlatL2(dim)
185
- index.add(embs)
186
- print("✅ FAISS index ready; dim:", dim)
187
- except Exception as e:
188
- print("Failed to encode/add to index:", e)
189
- index = None
190
- else:
191
- print("No corpus chunks found: upload PDFs to ./pdfs/class10")
192
-
193
- def rag_search(query: str, k: int = TOP_K):
194
- if index is None or len(corpus_chunks) == 0:
195
- return []
196
- try:
197
- q_vec = embedding_model.encode([query]).astype("float32")
198
- D, I = index.search(q_vec, k)
199
- results = []
200
- for dist, idx in zip(D[0], I[0]):
201
- if idx == -1 or idx >= len(corpus_chunks):
202
- continue
203
- results.append(
204
- {
205
- "score": float(dist),
206
- "text": corpus_chunks[idx],
207
- "meta": corpus_metas[idx],
208
- }
209
- )
210
- return results
211
- except Exception as e:
212
- print("RAG search error:", e)
213
- return []
214
-
215
- # -------------------- Local CPU LLM --------------------
216
- print("Loading local CPU LLM:", LLM_LOCAL_NAME)
217
- llm_pipe = None
218
- try:
219
- tokenizer = AutoTokenizer.from_pretrained(LLM_LOCAL_NAME)
220
- model = AutoModelForSeq2SeqLM.from_pretrained(LLM_LOCAL_NAME)
221
- llm_pipe = pipeline(
222
- "text2text-generation",
223
- model=model,
224
- tokenizer=tokenizer,
225
- device=-1, # CPU
226
- torch_dtype="auto"
227
- )
228
- print("✅ Local LLM loaded successfully")
229
- except Exception as e:
230
- print("Failed to load local LLM:", e)
231
- llm_pipe = None
232
-
233
- SYSTEM_PROMPT = """You are "Jajabor", an expert SEBA Assamese tutor for Class 10.
234
- Answer in Assamese unless the student asks for English.
235
- Use the textbook context provided. If unsure, say you don't know.
236
- Explain simply with examples."""
237
-
238
- def build_rag_prompt(context_blocks, question, chat_history):
239
- ctx = ""
240
- for i, block in enumerate(context_blocks, start=1):
241
- src = block["meta"].get("source", "textbook")
242
- ctx += f"[Context {i} - {src}]\n{block['text']}\n\n"
243
-
244
- hist = ""
245
- for u, a in chat_history[-3:]: # Last 3 exchanges
246
- if u:
247
- hist += f"Student: {u}\n"
248
- if a:
249
- hist += f"Tutor: {a}\n"
250
-
251
- prompt = f"""{SYSTEM_PROMPT}
252
-
253
- Previous conversation:
254
- {hist}
255
-
256
- Student's question:
257
- {question}
258
-
259
- Textbook content:
260
- {ctx}
261
-
262
- Provide a helpful, easy-to-understand answer in Assamese:"""
263
- return prompt
264
-
265
- def llm_answer_with_rag(question: str, chat_history):
266
- if not question.strip():
267
- return "অনুগ্ৰহ কৰি এটা প্ৰশ্ন সোধক।"
268
 
269
- retrieved = rag_search(question, TOP_K)
270
- if not retrieved:
271
- return "মই এই প্ৰশ্নৰ উত্তৰ দিবলৈ প্��য়োজনীয় তথ্য বিচাৰি পোৱা নাই। দয়া কৰি নিশ্চিত কৰক যে আপোনাৰ পাঠ্যপুথিৰ PDF ফাইলসমূহ সঠিকভাৱে আপলোড কৰা হৈছে।"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
- prompt = build_rag_prompt(retrieved, question, chat_history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
- if llm_pipe is None:
276
- return "AI মডেল ল'ড হোৱা নাই। দয়া কৰি পুনৰ চেষ্টা কৰক।"
 
 
277
 
278
- try:
279
- out = llm_pipe(
280
- prompt,
281
- max_new_tokens=LLM_MAX_TOKENS,
282
- do_sample=False,
283
- temperature=0.3
284
- )
285
- if isinstance(out, list) and len(out) > 0:
286
- if hasattr(out[0], 'get') and "generated_text" in out[0]:
287
- return out[0]["generated_text"]
288
- elif isinstance(out[0], str):
289
- return out[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  else:
291
- return str(out[0])
292
- return "উত্তৰ তৈয়াৰ কৰোঁতে সমস্যা হ'ল।"
293
- except Exception as e:
294
- print("LLM generation error:", e)
295
- return f"উত্তৰ তৈয়াৰ কৰোঁতে ত্ৰুটি: {str(e)}"
296
-
297
- # -------------------- OCR + Math helpers --------------------
298
- def ocr_from_image(img_path: str):
299
- if not img_path:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  return ""
 
301
  try:
302
- img = Image.open(img_path)
303
- img = img.convert("RGB")
304
- text = pytesseract.image_to_string(img, lang="eng")
305
  return text.strip()
306
  except Exception as e:
307
- print("OCR error:", e)
308
  return ""
309
 
310
- def is_likely_math(text: str) -> bool:
311
- if not text:
312
- return False
313
- math_chars = set("0123456789+-*/=^()%")
314
- text_chars = set(text)
315
- if math_chars.intersection(text_chars):
316
- return True
317
- math_kws = ["গণিত", "সমীকৰণ", "উদাহৰণ", "প্ৰশ্ন", "বীজগণিত", "solve", "equation", "math", "calculate"]
318
- return any(k in text.lower() for k in math_kws)
319
-
320
- def solve_math_expression(expr: str):
321
- try:
322
- # Clean the expression
323
- expr = expr.strip()
324
- expr = expr.replace('^', '**')
325
-
326
- if '=' in expr:
327
- parts = expr.split('=')
328
- if len(parts) == 2:
329
- left = sp.sympify(parts[0].strip())
330
- right = sp.sympify(parts[1].strip())
331
- equation = sp.Eq(left, right)
332
- solutions = sp.solve(equation)
333
-
334
- if solutions:
335
- solution_str = f"সমীকৰণ: {equation}\n\nসমাধান: x = {solutions[0]}"
336
- if len(solutions) > 1:
337
- solution_str += f"\nবা x = {solutions[1]}"
338
- return solution_str
339
- else:
340
- return "কোনো সমাধান পোৱা নগ'ল।"
341
- else:
342
- # Just simplify the expression
343
- expr_sym = sp.sympify(expr)
344
- simplified = sp.simplify(expr_sym)
345
- return f"প্ৰকাশ: {expr}\n\nসৰলীকৃত: {simplified}"
346
-
347
- except Exception as e:
348
- return f"গণিত সমাধানত সমস্যা: {str(e)}\nদয়া কৰি স্পষ্টকৈ লিখক, যেনে: 2*x + 3 = 7"
349
-
350
- # -------------------- Chat logic --------------------
351
- def login_user(username):
352
- username = (username or "").strip()
353
- if not username:
354
- return {}, "⚠️ অনুগ্ৰহ কৰি প্ৰথমে লগিনৰ বাবে এটা নাম লিখক।"
355
 
356
- user_id = get_or_create_user(username)
357
- if not user_id:
358
- return {}, "⚠️ লগিন কৰোঁতে সমস্যা হ'ল।"
359
 
360
- user_state = {"username": username, "user_id": user_id}
361
- total, math_count = get_user_stats(user_id)
362
- stats = (
363
- f"👤 ব্যৱহাৰকাৰী: **{username}**\n\n"
364
- f"📊 মোট প্ৰশ্ন: **{total}**\n"
365
- f"🧮 গণিত প্ৰশ্ন: **{math_count}**"
366
- )
367
- return user_state, stats
368
-
369
- def chat_logic(text_input, image_input, chat_history, user_state):
370
- if chat_history is None:
371
- chat_history = []
372
-
373
- # Check if user is logged in
374
- if not user_state or not user_state.get("user_id"):
375
- chat_history.append([text_input or "", "⚠️ প্ৰথমে ওপৰত আপোনাৰ নাম লিখি **Login / লগিন** টিপক।"])
376
- return chat_history, user_state
377
-
378
- user_id = user_state["user_id"]
379
- final_query_parts = []
380
-
381
- # Process image OCR
382
- if image_input is not None:
383
- ocr_text = ocr_from_image(image_input)
384
- if ocr_text:
385
- final_query_parts.append(f"[ছবিৰ পাঠ] {ocr_text}")
386
-
387
- if text_input and text_input.strip():
388
- final_query_parts.append(text_input.strip())
389
-
390
- if not final_query_parts:
391
- chat_history.append(["", "⚠️ অনুগ্ৰহ কৰি প্ৰশ্ন লিখক, কিম্বা ছবি আপলোড কৰক।"])
392
- return chat_history, user_state
393
-
394
- full_query = "\n".join(final_query_parts)
395
-
396
- is_math = is_likely_math(full_query)
397
-
398
- if is_math:
399
- math_answer = solve_math_expression(full_query)
400
- # Combine math solution with request for explanation
401
- combined_question = f"{full_query}\n\nগণিত সমাধান:\n{math_answer}\n\nঅনুগ্ৰহ কৰি ইয়াক সহজ ভাষাত ব্যাখ্যা কৰক:"
402
- final_answer = llm_answer_with_rag(combined_question, chat_history)
403
- else:
404
- final_answer = llm_answer_with_rag(full_query, chat_history)
405
-
406
- log_interaction(user_id, full_query, final_answer, is_math)
407
 
408
- display_question = text_input or "[ছবিৰ প্ৰশ্ন]"
409
- chat_history.append([display_question, final_answer])
410
-
411
- return chat_history, user_state
412
-
413
- def clear_chat():
414
- return [], None
415
-
416
- # -------------------- Gradio UI --------------------
417
- with gr.Blocks(
418
- title=APP_NAME,
419
- css="""
420
- .stats-box {
421
- background: #f0f8ff;
422
- padding: 15px;
423
- border-radius: 8px;
424
- border: 1px solid #d1e7ff;
425
- margin-bottom: 15px;
426
- }
427
- .login-section {
428
- background: #f8f9fa;
429
- padding: 15px;
430
- border-radius: 8px;
431
- margin-bottom: 15px;
432
- }
433
- """
434
- ) as demo:
435
- gr.Markdown(f"# 🧭 {APP_NAME}")
436
 
437
- gr.Markdown("""
438
- - SEBA Class 10 PDFs upload to `pdfs/class10` folder
439
- - Text + Image (OCR) input support
440
- - Math step-by-step solutions
441
- - User login + progress tracking
442
- """)
443
-
444
- # Use a simpler state management approach
445
- user_state = gr.State(value={})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446
 
447
- with gr.Row():
448
- with gr.Column(scale=1):
449
- with gr.Group(elem_classes="login-section"):
450
- gr.Markdown("### 👤 লগিন")
451
- username_inp = gr.Textbox(
452
- label="নাম / ইউজাৰ আইডি",
453
- placeholder="উদাহৰণ: abu10, student01 ...",
454
- max_lines=1
455
- )
456
- login_btn = gr.Button("✅ Login / লগিন", variant="primary")
457
- stats_md = gr.Markdown("এতিয়ালৈকে লগিন হোৱা নাই।", elem_classes="stats-box")
458
-
459
- gr.Markdown("""
460
- ### 💡 টিপছ
461
- - "ক্লাছ ১০ গণিত: উদাহৰণ ৩.১ প্ৰশ্ন ২" – এই ধৰণৰ প্ৰশ্ন ভাল
462
- - ফটো আপলোড কৰিলে টেক্স্টটো OCR কৰি পঢ়িব চেষ্টা কৰা হয়
463
- - সম্ভৱ হলে প্ৰশ্নটো অসমীয়াত সোধক 🙂
464
- """)
465
-
466
- with gr.Column(scale=3):
467
- chatbot = gr.Chatbot(
468
- label="জাজাবৰ সৈতে কথোপকথন",
469
- height=500,
470
- show_copy_button=True
471
- )
472
-
473
- with gr.Row():
474
- text_inp = gr.Textbox(
475
- label="আপোনাৰ প্ৰশ্ন লিখক",
476
- placeholder='উদাহৰণ: "ক্লাছ ১০ অসমীয়া: অনুচ্ছেদ পাঠ ১ ৰ মূল বিষয় কি?"',
477
- lines=2,
478
- scale=4
479
- )
480
 
481
  with gr.Row():
482
- image_inp = gr.Image(
483
- label="📷 প্ৰশ্নৰ ছবি (Optional)",
484
- type="filepath",
485
- scale=3
486
- )
 
 
 
 
 
 
 
 
 
487
 
488
- with gr.Row():
489
- ask_btn = gr.Button("🤖 জাজাবৰক সোধক", variant="primary", scale=2)
490
- clear_btn = gr.Button("🧹 পৰিষ্কাৰ কৰক", variant="secondary", scale=1)
491
-
492
- # Event handlers
493
- login_btn.click(
494
- login_user,
495
- inputs=[username_inp],
496
- outputs=[user_state, stats_md]
497
- )
498
-
499
- # Chat function - simplified
500
- def process_chat(text, image, history, state):
501
- return chat_logic(text, image, history, state)
502
-
503
- ask_btn.click(
504
- process_chat,
505
- inputs=[text_inp, image_inp, chatbot, user_state],
506
- outputs=[chatbot, user_state]
507
- ).then(
508
- lambda: ("", None),
509
- outputs=[text_inp, image_inp]
510
- )
511
-
512
- text_inp.submit(
513
- process_chat,
514
- inputs=[text_inp, image_inp, chatbot, user_state],
515
- outputs=[chatbot, user_state]
516
- ).then(
517
- lambda: ("", None),
518
- outputs=[text_inp, image_inp]
519
- )
 
 
 
 
 
 
 
 
 
 
 
520
 
521
- clear_btn.click(
522
- clear_chat,
523
- outputs=[chatbot, image_inp]
524
- )
525
 
 
526
  if __name__ == "__main__":
527
- # For Hugging Face Spaces, don't use share=True
 
 
 
 
 
 
528
  try:
529
  demo.launch(
530
- server_name="0.0.0.0",
531
  server_port=7860,
532
- share=False, # Changed to False for Hugging Face Spaces
533
  show_error=True
534
  )
535
  except Exception as e:
536
  print(f"Launch error: {e}")
537
- # Fallback to simple launch
538
  demo.launch(share=False)
 
1
  """
2
+ Jajabor – SEBA Assamese Class 10 Tutor (Fixed for Hugging Face Spaces)
 
3
  """
4
 
5
  import os
 
6
  import sqlite3
 
7
  from datetime import datetime
8
 
9
+ # Import with error handling
10
+ try:
11
+ from PyPDF2 import PdfReader
12
+ PDF_AVAILABLE = True
13
+ except ImportError:
14
+ PDF_AVAILABLE = False
15
+ print("PyPDF2 not available")
16
+
17
+ try:
18
+ from sentence_transformers import SentenceTransformer
19
+ EMBEDDING_AVAILABLE = True
20
+ except ImportError:
21
+ EMBEDDING_AVAILABLE = False
22
+ print("sentence-transformers not available")
23
+
24
+ try:
25
+ import faiss
26
+ FAISS_AVAILABLE = True
27
+ except ImportError:
28
+ FAISS_AVAILABLE = False
29
+ print("faiss not available")
30
 
31
+ try:
32
+ from transformers import pipeline
33
+ TRANSFORMERS_AVAILABLE = True
34
+ except ImportError:
35
+ TRANSFORMERS_AVAILABLE = False
36
+ print("transformers not available")
37
+
38
+ try:
39
+ import gradio as gr
40
+ GRADIO_AVAILABLE = True
41
+ except ImportError:
42
+ GRADIO_AVAILABLE = False
43
+ print("gradio not available")
44
+
45
+ try:
46
+ import pytesseract
47
+ from PIL import Image
48
+ OCR_AVAILABLE = True
49
+ except ImportError:
50
+ OCR_AVAILABLE = False
51
+ print("OCR dependencies not available")
52
+
53
+ try:
54
+ import sympy as sp
55
+ SYMPY_AVAILABLE = True
56
+ except ImportError:
57
+ SYMPY_AVAILABLE = False
58
+ print("sympy not available")
59
 
60
  # -------------------- CONFIG --------------------
61
+ APP_NAME = "Jajabor – SEBA Class 10 Tutor"
62
 
63
  BASE_DIR = os.path.abspath(os.path.dirname(__file__))
64
  PDF_DIR = os.path.join(BASE_DIR, "pdfs", "class10")
65
  DB_PATH = os.path.join(BASE_DIR, "jajabor_users.db")
66
 
 
 
 
 
 
 
 
 
 
67
  # -------------------- DATABASE --------------------
68
+ def init_db():
69
+ os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
70
+ conn = sqlite3.connect(DB_PATH)
71
  cur = conn.cursor()
72
  cur.execute(
73
  """
 
107
  else:
108
  cur.execute(
109
  "INSERT INTO users (username, created_at) VALUES (?, ?)",
110
+ (username, datetime.now().isoformat()),
111
  )
112
  conn.commit()
113
  user_id = cur.lastrowid
114
  conn.close()
115
  return user_id
116
 
117
+ def log_interaction(user_id, query, answer, is_math=False):
118
  conn = sqlite3.connect(DB_PATH)
119
  cur = conn.cursor()
120
  cur.execute(
 
122
  INSERT INTO interactions (user_id, timestamp, query, answer, is_math)
123
  VALUES (?, ?, ?, ?, ?)
124
  """,
125
+ (user_id, datetime.now().isoformat(), query, answer, 1 if is_math else 0),
126
  )
127
  conn.commit()
128
  conn.close()
129
 
130
+ # -------------------- SIMPLE TUTOR --------------------
131
+ class SimpleTutor:
132
+ def __init__(self):
133
+ self.llm = None
134
+ self.embedding_model = None
135
+ self.index = None
136
+ self.corpus_chunks = []
137
+ self.loaded = False
138
+
139
+ self._load_models()
140
+ self.load_pdfs()
141
+
142
+ def _load_models(self):
143
+ """Load models with error handling"""
144
+ if EMBEDDING_AVAILABLE:
 
 
 
 
 
145
  try:
146
+ self.embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
147
+ print("✅ Embedding model loaded")
148
+ except Exception as e:
149
+ print(f"❌ Could not load embedding model: {e}")
150
+
151
+ if TRANSFORMERS_AVAILABLE:
152
+ try:
153
+ self.llm = pipeline(
154
+ "text2text-generation",
155
+ model="google/flan-t5-small",
156
+ device=-1, # CPU
157
+ torch_dtype="auto"
158
+ )
159
+ print("✅ LLM loaded")
160
+ except Exception as e:
161
+ print(f"❌ Could not load LLM: {e}")
162
+
163
+ self.loaded = True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
 
165
+ def load_pdfs(self):
166
+ """Simple PDF loading"""
167
+ if not PDF_AVAILABLE or not os.path.exists(PDF_DIR):
168
+ print(f"PDF directory not found: {PDF_DIR}")
169
+ return
170
+
171
+ all_texts = []
172
+ for fname in os.listdir(PDF_DIR):
173
+ if fname.lower().endswith(".pdf"):
174
+ path = os.path.join(PDF_DIR, fname)
175
+ try:
176
+ reader = PdfReader(path)
177
+ text = ""
178
+ for page in reader.pages:
179
+ text += page.extract_text() or ""
180
+ if text.strip():
181
+ all_texts.append(text)
182
+ print(f"📖 Loaded {fname}")
183
+ except Exception as e:
184
+ print(f"Error reading {fname}: {e}")
185
+
186
+ # Simple text splitting
187
+ self.corpus_chunks = []
188
+ for text in all_texts:
189
+ chunks = self._split_text(text)
190
+ self.corpus_chunks.extend(chunks)
191
+
192
+ print(f"📚 Total chunks: {len(self.corpus_chunks)}")
193
+
194
+ # Build FAISS index if we have chunks and embedding model
195
+ if self.corpus_chunks and self.embedding_model and FAISS_AVAILABLE:
196
+ try:
197
+ embs = self.embedding_model.encode(self.corpus_chunks, show_progress_bar=False).astype("float32")
198
+ dim = embs.shape[1]
199
+ self.index = faiss.IndexFlatL2(dim)
200
+ self.index.add(embs)
201
+ print(f"✅ FAISS index ready; dim: {dim}")
202
+ except Exception as e:
203
+ print(f"❌ FAISS index creation failed: {e}")
204
+
205
+ def _split_text(self, text, chunk_size=400):
206
+ """Simple text splitting"""
207
+ if not text:
208
+ return []
209
+ chunks = []
210
+ for i in range(0, len(text), chunk_size):
211
+ chunk = text[i:i+chunk_size]
212
+ if chunk.strip():
213
+ chunks.append(chunk)
214
+ return chunks
215
 
216
+ def answer_question(self, question):
217
+ """Simple question answering"""
218
+ if not question.strip():
219
+ return "অনুগ্ৰহ কৰি এটা প্ৰশ্ন সোধক।"
220
+
221
+ # Simple math detection
222
+ if self._is_math_question(question):
223
+ return self._solve_math(question)
224
+
225
+ # Simple RAG if available
226
+ context = ""
227
+ if self.index is not None and self.corpus_chunks:
228
+ relevant_chunks = self._find_relevant_chunks(question)
229
+ if relevant_chunks:
230
+ context = "\n".join(relevant_chunks[:2])
231
+
232
+ # Generate answer
233
+ if self.llm:
234
+ try:
235
+ if context:
236
+ prompt = f"প্ৰশ্ন: {question}\n\nসংদৰ্ভ: {context}\n\nসহায়ক উত্তৰ:"
237
+ else:
238
+ prompt = f"প্ৰশ্ন: {question}\n\nউত্তৰ:"
239
+
240
+ response = self.llm(
241
+ prompt,
242
+ max_new_tokens=150,
243
+ temperature=0.3,
244
+ do_sample=False
245
+ )
246
+
247
+ if isinstance(response, list) and len(response) > 0:
248
+ if hasattr(response[0], 'get'):
249
+ answer = response[0].get('generated_text', 'উত্তৰ তৈয়াৰ কৰিব পৰা নগল।')
250
+ else:
251
+ answer = str(response[0])
252
+ else:
253
+ answer = str(response)
254
+
255
+ except Exception as e:
256
+ answer = f"উত্তৰ তৈয়াৰ কৰোঁতে সমস্যা: {str(e)}"
257
+ else:
258
+ # Fallback responses
259
+ fallback_responses = [
260
+ "মই আপোনাৰ প্ৰশ্নটো বুজিলোঁ। অধ্যয়নৰ বাবে শুভেচ্ছা!",
261
+ "এই বিষয়টো মনোযোগেৰে পঢ়িবলৈ চেষ্টা কৰক।",
262
+ "আপোনাৰ পাঠ্যপুথিৰ সংশ্লিষ্ট অধ্যায়টো চাওক।",
263
+ "এই প্ৰশ্নটোৰ বাবে আপোনাৰ শিক্ষকৰ সহায় ল'ব পাৰে।"
264
+ ]
265
+ import random
266
+ answer = random.choice(fallback_responses)
267
+
268
+ return answer
269
 
270
+ def _is_math_question(self, text):
271
+ """Simple math detection"""
272
+ math_indicators = ['+', '-', '*', '/', '=', 'x', 'y', 'গণিত', 'সমীকৰণ', 'solve', 'calculate']
273
+ return any(indicator in text.lower() for indicator in math_indicators)
274
 
275
+ def _solve_math(self, expr):
276
+ """Simple math solving"""
277
+ if not SYMPY_AVAILABLE:
278
+ return "গণিত সমাধানৰ বাবে sympy পেকেজ প্ৰয়োজন।"
279
+
280
+ try:
281
+ # Clean the expression
282
+ expr = expr.strip()
283
+ expr = expr.replace('^', '**')
284
+
285
+ if '=' in expr:
286
+ parts = expr.split('=')
287
+ if len(parts) == 2:
288
+ left = sp.sympify(parts[0].strip())
289
+ right = sp.sympify(parts[1].strip())
290
+ equation = sp.Eq(left, right)
291
+ solutions = sp.solve(equation)
292
+
293
+ if solutions:
294
+ solution_str = f"সমীকৰণ: {equation}\n\nসমাধান: x = {solutions[0]}"
295
+ if len(solutions) > 1:
296
+ solution_str += f"\nবা x = {solutions[1]}"
297
+ return solution_str
298
+ else:
299
+ return "কোনো সমাধান পোৱা নগ'ল।"
300
  else:
301
+ # Just simplify the expression
302
+ expr_sym = sp.sympify(expr)
303
+ simplified = sp.simplify(expr_sym)
304
+ return f"প্ৰকাশ: {expr}\n\nসৰলীকৃত: {simplified}"
305
+
306
+ except Exception as e:
307
+ return f"গণিত সমাধানত সমস্যা: {str(e)}\nদয়া কৰি স্পষ্টকৈ লিখক, যেনে: 2*x + 3 = 7"
308
+
309
+ def _find_relevant_chunks(self, question, k=3):
310
+ """Find relevant chunks using FAISS or keyword matching"""
311
+ if not self.corpus_chunks:
312
+ return []
313
+
314
+ # Try FAISS first
315
+ if self.index is not None and self.embedding_model:
316
+ try:
317
+ q_vec = self.embedding_model.encode([question]).astype("float32")
318
+ D, I = self.index.search(q_vec, k)
319
+ results = []
320
+ for idx in I[0]:
321
+ if 0 <= idx < len(self.corpus_chunks):
322
+ results.append(self.corpus_chunks[idx])
323
+ return results
324
+ except Exception:
325
+ pass # Fall back to keyword matching
326
+
327
+ # Keyword matching fallback
328
+ question_words = set(question.lower().split())
329
+ scored_chunks = []
330
+
331
+ for chunk in self.corpus_chunks:
332
+ chunk_words = set(chunk.lower().split())
333
+ common_words = question_words.intersection(chunk_words)
334
+ score = len(common_words)
335
+ if score > 0:
336
+ scored_chunks.append((score, chunk))
337
+
338
+ # Return top k chunks
339
+ scored_chunks.sort(reverse=True)
340
+ return [chunk for _, chunk in scored_chunks[:k]]
341
+
342
+ # -------------------- OCR FUNCTION --------------------
343
+ def extract_text_from_image(image_path):
344
+ """Extract text from image using OCR"""
345
+ if not OCR_AVAILABLE or not image_path:
346
  return ""
347
+
348
  try:
349
+ image = Image.open(image_path)
350
+ text = pytesseract.image_to_string(image)
 
351
  return text.strip()
352
  except Exception as e:
353
+ print(f"OCR error: {e}")
354
  return ""
355
 
356
+ # -------------------- GRADIO APP --------------------
357
+ def main():
358
+ """Main function to run the app"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
 
360
+ # Initialize components
361
+ init_db()
362
+ tutor = SimpleTutor()
363
 
364
+ # Store user state in a simple way (avoiding gr.State issues)
365
+ user_states = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
 
367
+ def get_user_state(username):
368
+ """Simple user state management"""
369
+ if not username:
370
+ return None
371
+ if username not in user_states:
372
+ user_id = get_or_create_user(username)
373
+ if user_id:
374
+ user_states[username] = {"username": username, "user_id": user_id}
375
+ else:
376
+ return None
377
+ return user_states[username]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
 
379
+ def chat_function(message, image, chat_history, username):
380
+ """Main chat function"""
381
+ # Initialize chat history if None
382
+ if chat_history is None:
383
+ chat_history = []
384
+
385
+ # Check if user is logged in
386
+ user_state = get_user_state(username.strip())
387
+ if not user_state:
388
+ new_history = chat_history + [[message, "⚠️ প্ৰথমে নাম লিখি লগিন কৰক।"]]
389
+ return new_history, ""
390
+
391
+ # Combine text and image input
392
+ full_question = message.strip()
393
+ if image:
394
+ ocr_text = extract_text_from_image(image)
395
+ if ocr_text:
396
+ full_question += f"\n[ছবিৰ পাঠ: {ocr_text}]"
397
+
398
+ if not full_question:
399
+ new_history = chat_history + [["", "⚠️ প্ৰশ্ন লিখক বা ছবি আপলোড কৰক।"]]
400
+ return new_history, ""
401
+
402
+ # Get answer from tutor
403
+ answer = tutor.answer_question(full_question)
404
+
405
+ # Log interaction
406
+ log_interaction(user_state["user_id"], full_question, answer)
407
+
408
+ # Update chat
409
+ display_question = message if message.strip() else "[ছবিৰ প্ৰশ্ন]"
410
+ new_history = chat_history + [[display_question, answer]]
411
+ return new_history, ""
412
 
413
+ def clear_chat():
414
+ """Clear chat history"""
415
+ return [], ""
416
+
417
+ # Create Gradio interface
418
+ with gr.Blocks(
419
+ title=APP_NAME,
420
+ theme=gr.themes.Soft(),
421
+ css="""
422
+ .container {
423
+ max-width: 1200px;
424
+ margin: auto;
425
+ padding: 20px;
426
+ }
427
+ .login-section {
428
+ background: #f8f9fa;
429
+ padding: 15px;
430
+ border-radius: 10px;
431
+ margin-bottom: 20px;
432
+ }
433
+ """
434
+ ) as demo:
435
+
436
+ with gr.Column(elem_classes="container"):
437
+ gr.Markdown(f"# 🧭 {APP_NAME}")
438
+ gr.Markdown("SEBA Class 10 AI Tutor - Ask questions in Assamese or English")
 
 
 
 
 
 
 
439
 
440
  with gr.Row():
441
+ with gr.Column(scale=1):
442
+ with gr.Group(elem_classes="login-section"):
443
+ gr.Markdown("### 👤 লগিন")
444
+ username = gr.Textbox(
445
+ label="আপোনাৰ নাম",
446
+ placeholder="আপোনাৰ নাম লিখক...",
447
+ max_lines=1
448
+ )
449
+ gr.Markdown("""
450
+ ### 💡 টিপছ
451
+ - নাম লিখি প্ৰশ্ন সোধক
452
+ - পাঠ্যপুথিৰ PDF ফাইলসমূহ `pdfs/class10` ফ'ল্ডাৰত ৰাখক
453
+ - ছবি আপলোড কৰিলে OCR ৰ সহায়ত পাঠ পঢ়িব
454
+ """)
455
 
456
+ with gr.Column(scale=2):
457
+ chatbot = gr.Chatbot(
458
+ label="জাজাবৰ সৈতে কথোপকথন",
459
+ height=500,
460
+ show_copy_button=True
461
+ )
462
+
463
+ with gr.Row():
464
+ message = gr.Textbox(
465
+ label="প্ৰশ্ন",
466
+ placeholder="আপোনাৰ প্ৰশ্ন ইয়াত লিখক...",
467
+ lines=2,
468
+ scale=4
469
+ )
470
+
471
+ with gr.Row():
472
+ image = gr.Image(
473
+ label="ছবি আপলোড কৰক (ঐচ্ছিক)",
474
+ type="filepath",
475
+ height=150
476
+ )
477
+
478
+ with gr.Row():
479
+ submit_btn = gr.Button("📤 প্ৰশ্ন পঠিয়াওক", variant="primary", scale=2)
480
+ clear_btn = gr.Button("🧹 পৰিষ্কাৰ কৰক", variant="secondary", scale=1)
481
+
482
+ # Event handlers
483
+ submit_btn.click(
484
+ fn=chat_function,
485
+ inputs=[message, image, chatbot, username],
486
+ outputs=[chatbot, message]
487
+ )
488
+
489
+ message.submit(
490
+ fn=chat_function,
491
+ inputs=[message, image, chatbot, username],
492
+ outputs=[chatbot, message]
493
+ )
494
+
495
+ clear_btn.click(
496
+ fn=clear_chat,
497
+ outputs=[chatbot, message]
498
+ )
499
 
500
+ return demo
 
 
 
501
 
502
+ # -------------------- LAUNCH --------------------
503
  if __name__ == "__main__":
504
+ if not GRADIO_AVAILABLE:
505
+ print("Gradio not available. Please install gradio.")
506
+ exit(1)
507
+
508
+ demo = main()
509
+
510
+ # For Hugging Face Spaces, use share=False and don't specify server_name
511
  try:
512
  demo.launch(
513
+ server_name="0.0.0.0" if os.getenv('SPACE_ID') else None,
514
  server_port=7860,
515
+ share=False, # Important: set to False for Spaces
516
  show_error=True
517
  )
518
  except Exception as e:
519
  print(f"Launch error: {e}")
520
+ # Fallback launch without server_name
521
  demo.launch(share=False)