Sazid2 commited on
Commit
0cdd015
·
verified ·
1 Parent(s): e7b0783

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -505
app.py CHANGED
@@ -1,515 +1,74 @@
1
  """
2
- Jajabor – SEBA Assamese Class 10 Tutor
3
- Simplified version for Hugging Face Spaces
4
  """
5
 
 
6
  import os
7
- import sqlite3
8
- from datetime import datetime
9
 
10
- # Import with error handling
11
- try:
12
- from PyPDF2 import PdfReader
13
- PDF_AVAILABLE = True
14
- except ImportError:
15
- PDF_AVAILABLE = False
16
- print("PyPDF2 not available")
17
-
18
- try:
19
- from sentence_transformers import SentenceTransformer
20
- EMBEDDING_AVAILABLE = True
21
- except ImportError:
22
- EMBEDDING_AVAILABLE = False
23
- print("sentence-transformers not available")
24
-
25
- try:
26
- import faiss
27
- FAISS_AVAILABLE = True
28
- except ImportError:
29
- FAISS_AVAILABLE = False
30
- print("faiss not available")
31
-
32
- try:
33
- from transformers import pipeline
34
- TRANSFORMERS_AVAILABLE = True
35
- except ImportError:
36
- TRANSFORMERS_AVAILABLE = False
37
- print("transformers not available")
38
-
39
- try:
40
- import gradio as gr
41
- GRADIO_AVAILABLE = True
42
- except ImportError:
43
- GRADIO_AVAILABLE = False
44
- print("gradio not available")
45
-
46
- try:
47
- import pytesseract
48
- from PIL import Image
49
- OCR_AVAILABLE = True
50
- except ImportError:
51
- OCR_AVAILABLE = False
52
- print("OCR dependencies not available")
53
-
54
- try:
55
- import sympy as sp
56
- SYMPY_AVAILABLE = True
57
- except ImportError:
58
- SYMPY_AVAILABLE = False
59
- print("sympy not available")
60
-
61
- # -------------------- CONFIG --------------------
62
- APP_NAME = "Jajabor – SEBA Class 10 Tutor"
63
-
64
- BASE_DIR = os.path.abspath(os.path.dirname(__file__))
65
- PDF_DIR = os.path.join(BASE_DIR, "pdfs", "class10")
66
- DB_PATH = os.path.join(BASE_DIR, "jajabor_users.db")
67
-
68
- # -------------------- DATABASE --------------------
69
- def init_db():
70
- os.makedirs(os.path.dirname(DB_PATH), exist_ok=True)
71
- conn = sqlite3.connect(DB_PATH)
72
- cur = conn.cursor()
73
- cur.execute(
74
- """
75
- CREATE TABLE IF NOT EXISTS users (
76
- id INTEGER PRIMARY KEY AUTOINCREMENT,
77
- username TEXT UNIQUE,
78
- created_at TEXT
79
- )
80
- """
81
- )
82
- cur.execute(
83
- """
84
- CREATE TABLE IF NOT EXISTS interactions (
85
- id INTEGER PRIMARY KEY AUTOINCREMENT,
86
- user_id INTEGER,
87
- timestamp TEXT,
88
- query TEXT,
89
- answer TEXT,
90
- is_math INTEGER,
91
- FOREIGN KEY(user_id) REFERENCES users(id)
92
- )
93
- """
94
- )
95
- conn.commit()
96
- conn.close()
97
-
98
- def get_or_create_user(username: str):
99
- username = username.strip()
100
- if not username:
101
- return None
102
- conn = sqlite3.connect(DB_PATH)
103
- cur = conn.cursor()
104
- cur.execute("SELECT id FROM users WHERE username=?", (username,))
105
- row = cur.fetchone()
106
- if row:
107
- user_id = row[0]
108
- else:
109
- cur.execute(
110
- "INSERT INTO users (username, created_at) VALUES (?, ?)",
111
- (username, datetime.now().isoformat()),
112
- )
113
- conn.commit()
114
- user_id = cur.lastrowid
115
- conn.close()
116
- return user_id
117
-
118
- def log_interaction(user_id, query, answer, is_math=False):
119
- conn = sqlite3.connect(DB_PATH)
120
- cur = conn.cursor()
121
- cur.execute(
122
- """
123
- INSERT INTO interactions (user_id, timestamp, query, answer, is_math)
124
- VALUES (?, ?, ?, ?, ?)
125
- """,
126
- (user_id, datetime.now().isoformat(), query, answer, 1 if is_math else 0),
127
- )
128
- conn.commit()
129
- conn.close()
130
-
131
- # -------------------- SIMPLE TUTOR --------------------
132
- class SimpleTutor:
133
- def __init__(self):
134
- self.llm = None
135
- self.embedding_model = None
136
- self.index = None
137
- self.corpus_chunks = []
138
-
139
- print("🔄 Loading models...")
140
- self._load_models()
141
- self.load_pdfs()
142
- print("✅ Tutor initialized")
143
-
144
- def _load_models(self):
145
- """Load models with error handling"""
146
- if EMBEDDING_AVAILABLE:
147
- try:
148
- self.embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
149
- print("✅ Embedding model loaded")
150
- except Exception as e:
151
- print(f"❌ Could not load embedding model: {e}")
152
-
153
- if TRANSFORMERS_AVAILABLE:
154
- try:
155
- self.llm = pipeline(
156
- "text2text-generation",
157
- model="google/flan-t5-small",
158
- device=-1
159
- )
160
- print("✅ LLM loaded")
161
- except Exception as e:
162
- print(f"❌ Could not load LLM: {e}")
163
-
164
- def load_pdfs(self):
165
- """Simple PDF loading"""
166
- if not PDF_AVAILABLE:
167
- print("📚 PDF reading not available")
168
- return
169
-
170
- if not os.path.exists(PDF_DIR):
171
- print(f"📁 PDF directory not found: {PDF_DIR}")
172
- return
173
-
174
- all_texts = []
175
- pdf_files = [f for f in os.listdir(PDF_DIR) if f.lower().endswith('.pdf')]
176
-
177
- if not pdf_files:
178
- print("📭 No PDF files found in directory")
179
- return
180
 
181
- for fname in pdf_files:
182
- path = os.path.join(PDF_DIR, fname)
183
- try:
184
- reader = PdfReader(path)
185
- text = ""
186
- for page in reader.pages:
187
- text += page.extract_text() or ""
188
- if text.strip():
189
- all_texts.append(text)
190
- print(f"📖 Loaded {fname}")
191
- except Exception as e:
192
- print(f"❌ Error reading {fname}: {e}")
193
-
194
- # Simple text splitting
195
- self.corpus_chunks = []
196
- for text in all_texts:
197
- chunks = self._split_text(text)
198
- self.corpus_chunks.extend(chunks)
199
-
200
- print(f"📚 Total text chunks: {len(self.corpus_chunks)}")
201
-
202
- # Build FAISS index if we have chunks and embedding model
203
- if self.corpus_chunks and self.embedding_model and FAISS_AVAILABLE:
204
- try:
205
- print("🔨 Building FAISS index...")
206
- embs = self.embedding_model.encode(self.corpus_chunks, show_progress_bar=False).astype("float32")
207
- dim = embs.shape[1]
208
- self.index = faiss.IndexFlatL2(dim)
209
- self.index.add(embs)
210
- print(f"✅ FAISS index ready; dim: {dim}")
211
- except Exception as e:
212
- print(f"❌ FAISS index creation failed: {e}")
213
-
214
- def _split_text(self, text, chunk_size=400):
215
- """Simple text splitting"""
216
- if not text:
217
- return []
218
- chunks = []
219
- for i in range(0, len(text), chunk_size):
220
- chunk = text[i:i+chunk_size]
221
- if chunk.strip():
222
- chunks.append(chunk)
223
- return chunks
224
-
225
- def answer_question(self, question):
226
- """Simple question answering"""
227
- if not question.strip():
228
- return "অনুগ্ৰহ কৰি এটা প্ৰশ্ন সোধক।"
229
-
230
- # Simple math detection
231
- if self._is_math_question(question):
232
- return self._solve_math(question)
233
-
234
- # Simple RAG if available
235
- context = ""
236
- if self.index is not None and self.corpus_chunks:
237
- relevant_chunks = self._find_relevant_chunks(question)
238
- if relevant_chunks:
239
- context = "\n".join(relevant_chunks[:2])
240
-
241
- # Generate answer
242
- if self.llm:
243
- try:
244
- if context:
245
- prompt = f"প্ৰশ্ন: {question}\n\nসংদৰ্ভ: {context}\n\nসহায়ক উত্তৰ:"
246
- else:
247
- prompt = f"প্ৰশ্ন: {question}\n\nউত্তৰ:"
248
-
249
- response = self.llm(
250
- prompt,
251
- max_new_tokens=150,
252
- temperature=0.3,
253
- do_sample=False
254
- )
255
-
256
- if isinstance(response, list) and len(response) > 0:
257
- answer = response[0].get('generated_text', 'উত্তৰ তৈয়াৰ কৰিব পৰা নগল।')
258
- else:
259
- answer = str(response)
260
-
261
- except Exception as e:
262
- answer = f"উত্তৰ তৈয়াৰ কৰোঁতে সমস্যা: {str(e)}"
263
- else:
264
- # Fallback responses
265
- fallback_responses = [
266
- "মই আপোনাৰ প্ৰশ্নটো বুজিলোঁ। অধ্যয়নৰ বাবে শুভেচ্ছা!",
267
- "এই বিষয়টো মনোযোগেৰে পঢ়িবলৈ চেষ্টা কৰক।",
268
- "আপোনাৰ পাঠ্যপুথিৰ সংশ্লিষ্ট অধ্যায়টো চাওক।",
269
- "এই প্ৰশ্নটোৰ বাবে আপোনাৰ শিক্ষকৰ সহায় ল'ব পাৰে।"
270
- ]
271
- import random
272
- answer = random.choice(fallback_responses)
273
-
274
- return answer
275
-
276
- def _is_math_question(self, text):
277
- """Simple math detection"""
278
- math_indicators = ['+', '-', '*', '/', '=', 'x', 'y', 'গণিত', 'সমীকৰণ', 'solve', 'calculate']
279
- return any(indicator in text.lower() for indicator in math_indicators)
280
-
281
- def _solve_math(self, expr):
282
- """Simple math solving"""
283
- if not SYMPY_AVAILABLE:
284
- return "গণিত সমাধানৰ বাবে sympy পেকেজ প্ৰয়োজন।"
285
-
286
- try:
287
- # Clean the expression
288
- expr = expr.strip()
289
- expr = expr.replace('^', '**')
290
-
291
- if '=' in expr:
292
- parts = expr.split('=')
293
- if len(parts) == 2:
294
- left = sp.sympify(parts[0].strip())
295
- right = sp.sympify(parts[1].strip())
296
- equation = sp.Eq(left, right)
297
- solutions = sp.solve(equation)
298
-
299
- if solutions:
300
- solution_str = f"সমীকৰণ: {equation}\n\nসমাধান: x = {solutions[0]}"
301
- if len(solutions) > 1:
302
- solution_str += f"\nবা x = {solutions[1]}"
303
- return solution_str
304
- else:
305
- return "কোনো সমাধান পোৱা নগ'ল।"
306
- else:
307
- # Just simplify the expression
308
- expr_sym = sp.sympify(expr)
309
- simplified = sp.simplify(expr_sym)
310
- return f"প্ৰকাশ: {expr}\n\nসৰলীকৃত: {simplified}"
311
-
312
- except Exception as e:
313
- return f"গণিত সমাধানত সমস্যা: {str(e)}\nদয়া কৰি স্পষ্টকৈ লিখক, যেনে: 2*x + 3 = 7"
314
-
315
- def _find_relevant_chunks(self, question, k=3):
316
- """Find relevant chunks using FAISS or keyword matching"""
317
- if not self.corpus_chunks:
318
- return []
319
-
320
- # Try FAISS first
321
- if self.index is not None and self.embedding_model:
322
- try:
323
- q_vec = self.embedding_model.encode([question]).astype("float32")
324
- D, I = self.index.search(q_vec, k)
325
- results = []
326
- for idx in I[0]:
327
- if 0 <= idx < len(self.corpus_chunks):
328
- results.append(self.corpus_chunks[idx])
329
- return results
330
- except Exception:
331
- pass # Fall back to keyword matching
332
-
333
- # Keyword matching fallback
334
- question_words = set(question.lower().split())
335
- scored_chunks = []
336
-
337
- for chunk in self.corpus_chunks:
338
- chunk_words = set(chunk.lower().split())
339
- common_words = question_words.intersection(chunk_words)
340
- score = len(common_words)
341
- if score > 0:
342
- scored_chunks.append((score, chunk))
343
-
344
- # Return top k chunks
345
- scored_chunks.sort(reverse=True)
346
- return [chunk for _, chunk in scored_chunks[:k]]
347
-
348
- # -------------------- OCR FUNCTION --------------------
349
- def extract_text_from_image(image_path):
350
- """Extract text from image using OCR"""
351
- if not OCR_AVAILABLE or not image_path:
352
- return ""
353
-
354
- try:
355
- image = Image.open(image_path)
356
- text = pytesseract.image_to_string(image)
357
- return text.strip()
358
- except Exception as e:
359
- print(f"OCR error: {e}")
360
- return ""
361
-
362
- # -------------------- SIMPLE GRADIO APP --------------------
363
- def create_app():
364
- """Create and return the Gradio app"""
365
-
366
- # Initialize components
367
- print("🚀 Starting application...")
368
- init_db()
369
- tutor = SimpleTutor()
370
-
371
- # Store user state in memory (simple approach)
372
- user_states = {}
373
-
374
- def get_user_state(username):
375
- """Simple user state management"""
376
- username = (username or "").strip()
377
- if not username:
378
- return None
379
-
380
- if username not in user_states:
381
- user_id = get_or_create_user(username)
382
- if user_id:
383
- user_states[username] = {"username": username, "user_id": user_id}
384
- else:
385
- return None
386
- return user_states[username]
387
-
388
- def chat_function(message, image, chat_history, username):
389
- """Main chat function"""
390
- # Initialize chat history if None
391
- if chat_history is None:
392
- chat_history = []
393
-
394
- # Check if user is logged in
395
- user_state = get_user_state(username)
396
- if not user_state:
397
- new_history = chat_history + [[message or "", "⚠️ প্ৰথমে নাম লিখি লগিন কৰক।"]]
398
- return new_history, ""
399
-
400
- # Combine text and image input
401
- full_question = (message or "").strip()
402
- if image is not None:
403
- ocr_text = extract_text_from_image(image)
404
- if ocr_text:
405
- full_question += f"\n[ছবিৰ পাঠ: {ocr_text}]"
406
-
407
- if not full_question:
408
- new_history = chat_history + [["", "⚠️ প্ৰশ্ন লিখক বা ছবি আপলোড কৰক।"]]
409
- return new_history, ""
410
-
411
- # Get answer from tutor
412
- answer = tutor.answer_question(full_question)
413
-
414
- # Log interaction
415
- log_interaction(user_state["user_id"], full_question, answer)
416
-
417
- # Update chat
418
- display_question = message if message and message.strip() else "[ছবিৰ প্ৰশ্ন]"
419
- new_history = chat_history + [[display_question, answer]]
420
- return new_history, ""
421
-
422
- def clear_chat():
423
- """Clear chat history"""
424
- return [], None
425
 
426
- # Create Gradio interface - SIMPLIFIED without problematic components
427
- with gr.Blocks(
428
- title=APP_NAME,
429
- theme=gr.themes.Soft()
430
- ) as demo:
431
-
432
- gr.Markdown(f"# 🧭 {APP_NAME}")
433
- gr.Markdown("SEBA Class 10 AI Tutor - Ask questions in Assamese or English")
434
-
435
- with gr.Row():
436
- with gr.Column(scale=1):
437
- gr.Markdown("### 👤 লগিন")
438
- username = gr.Textbox(
439
- label="আপোনাৰ নাম",
440
- placeholder="আপোনাৰ নাম লিখক...",
441
- max_lines=1
442
- )
443
- gr.Markdown("""
444
- ### 💡 টিপছ
445
- - নাম লিখি প্ৰশ্ন সোধক
446
- - পাঠ্যপুথিৰ PDF ফাইলসমূহ `pdfs/class10` ফ'ল্ডাৰত ৰাখক
447
- - ছবি আপলোড কৰিলে OCR ৰ সহায়ত পাঠ পঢ়িব
448
- """)
449
-
450
- with gr.Column(scale=2):
451
- chatbot = gr.Chatbot(
452
- label="জাজাবৰ সৈতে কথোপকথন",
453
- height=500
454
- )
455
-
456
- with gr.Row():
457
- message = gr.Textbox(
458
- label="প্ৰশ্ন",
459
- placeholder="আপোনাৰ প্ৰশ্ন ইয়াত লিখক...",
460
- lines=2
461
- )
462
-
463
- with gr.Row():
464
- image = gr.Image(
465
- label="ছবি আপলোড কৰক (ঐচ্ছিক)",
466
- type="filepath"
467
- )
468
-
469
- with gr.Row():
470
- submit_btn = gr.Button("📤 প্ৰশ্ন পঠিয়াওক", variant="primary")
471
- clear_btn = gr.Button("🧹 পৰিষ্কাৰ কৰক", variant="secondary")
472
-
473
- # Event handlers
474
- submit_btn.click(
475
- fn=chat_function,
476
- inputs=[message, image, chatbot, username],
477
- outputs=[chatbot, message]
478
- )
479
-
480
- message.submit(
481
- fn=chat_function,
482
- inputs=[message, image, chatbot, username],
483
- outputs=[chatbot, message]
484
- )
485
-
486
- clear_btn.click(
487
- fn=clear_chat,
488
- outputs=[chatbot, image]
489
- )
490
 
491
- return demo
492
 
493
- # -------------------- MAIN --------------------
494
  if __name__ == "__main__":
495
- if not GRADIO_AVAILABLE:
496
- print("❌ Gradio not available. Please install gradio.")
497
- exit(1)
498
-
499
- try:
500
- demo = create_app()
501
- print("✅ App created successfully")
502
-
503
- # Simple launch for Hugging Face Spaces
504
- demo.launch(
505
- server_name="0.0.0.0",
506
- server_port=7860,
507
- share=False # Critical: must be False for Spaces
508
- )
509
- except Exception as e:
510
- print(f"❌ Error launching app: {e}")
511
- # Final fallback
512
- try:
513
- demo.launch(share=False)
514
- except:
515
- print("💥 Failed to launch application")
 
1
  """
2
+ Jajabor – Minimal Working Version for Hugging Face Spaces
 
3
  """
4
 
5
+ import gradio as gr
6
  import os
 
 
7
 
8
+ def simple_chat(message, history):
9
+ """Simple echo function for testing"""
10
+ if not message.strip():
11
+ return history, "অনুগ্ৰহ কৰি প্ৰশ্ন লিখক।"
12
+
13
+ responses = [
14
+ "আপোনাৰ প্ৰশ্নটো বুজিলোঁ। অধ্যয়নৰ বাবে শুভেচ্ছা!",
15
+ "এই বিষয়টো মনোযোগেৰে পঢ়িবলৈ চেষ্টা কৰক।",
16
+ "আপোনাৰ পাঠ্যপুথিৰ সংশ্লিষ্ট অধ্যায়টো চাওক।",
17
+ "জাজাবৰ আপোনাক সহায় কৰিবলৈ সদায় সাজু আছে!",
18
+ "এই প্ৰশ্নটোৰ বাবে আপোনাৰ শিক্ষকৰ সহায় ল'ব পাৰে।"
19
+ ]
20
+
21
+ import random
22
+ response = random.choice(responses)
23
+
24
+ if history is None:
25
+ history = []
26
+
27
+ history.append([message, response])
28
+ return history, ""
29
+
30
+ # Create minimal interface
31
+ with gr.Blocks(title="Jajabor SEBA Tutor") as demo:
32
+ gr.Markdown("# 🧭 জাজাবৰ – SEBA Class 10 Tutor")
33
+ gr.Markdown("অসমীয়া মাধ্যমৰ দশম শ্ৰেণীৰ ছাত্ৰ-ছাত্ৰীৰ বাবে AI টিউটাৰ")
34
+
35
+ with gr.Row():
36
+ with gr.Column(scale=1):
37
+ gr.Markdown("### 👤 লগিন")
38
+ username = gr.Textbox(label="আপোনাৰ নাম", placeholder="নাম লিখক...")
39
+ gr.Markdown("""
40
+ ### 💡 টিপছ
41
+ - নাম লিখি প্ৰশ্ন সোধক
42
+ - পাঠ্যপুথিৰ PDF ফাইলসমূহ `pdfs/class10` ত ৰাখক
43
+ - জাজাবৰ আপোনাক সহায় কৰিব!
44
+ """)
45
+
46
+ with gr.Column(scale=2):
47
+ chatbot = gr.Chatbot(label="কথোপকথন", height=400)
48
+ message = gr.Textbox(label="প্ৰশ্ন", placeholder="প্ৰশ্ন লিখক...", lines=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ with gr.Row():
51
+ submit_btn = gr.Button("📤 প্ৰশ্ন পঠিয়াওক", variant="primary")
52
+ clear_btn = gr.Button("🧹 পৰিষ্কাৰ কৰক", variant="secondary")
53
+
54
+ # Simple event handlers
55
+ def process_message(msg, hist, user):
56
+ return simple_chat(msg, hist)
57
+
58
+ submit_btn.click(
59
+ fn=process_message,
60
+ inputs=[message, chatbot, username],
61
+ outputs=[chatbot, message]
62
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ message.submit(
65
+ fn=process_message,
66
+ inputs=[message, chatbot, username],
67
+ outputs=[chatbot, message]
68
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ clear_btn.click(lambda: ([], ""), outputs=[chatbot, message])
71
 
72
+ # Launch with minimal settings
73
  if __name__ == "__main__":
74
+ demo.launch(share=False)