Spaces:

Sazid2
/

Assamese

Sleeping

App Files Files Community

Sazid2 commited on Nov 28, 2025

Commit

8ba6650

verified ·

1 Parent(s): 7f26a44

Update app.py

Browse files

Files changed (1) hide show

app.py +98 -153

app.py CHANGED Viewed

@@ -1,15 +1,6 @@
-# app.py
 """
 Jajabor – SEBA Assamese Class 10 Tutor (Free-tier CPU-ready)
-- PDF reading: PyPDF2
-- CPU LLM: google/flan-t5-small (transformers pipeline)
-- Embeddings: sentence-transformers/all-MiniLM-L6-v2
-- FAISS for retrieval
-- OCR via pytesseract
-- SymPy for math solving
-- Gradio UI (gr.Image uses type="filepath")
-Notes:
-- requirements.txt must include: PyPDF2 (capitalized), gradio==4.44.0, gradio-client==0.4.3, sentence-transformers, faiss-cpu, transformers, torch, pytesseract, pillow, sympy
 """
 import os
@@ -37,7 +28,7 @@ PDF_DIR = os.path.join(BASE_DIR, "pdfs", "class10")
 DB_PATH = os.path.join(BASE_DIR, "jajabor_users.db")
 EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
-USE_HF_INFERENCE = False  # Free plan: use local small model
 LLM_LOCAL_NAME = "google/flan-t5-small"
 LLM_MAX_TOKENS = 128
@@ -122,7 +113,7 @@ def get_user_stats(user_id):
 init_db()
-# -------------------- PDF reading (PyPDF2) --------------------
 def extract_text_from_pdf(pdf_path: str) -> str:
     text_pages = []
     try:
@@ -185,7 +176,7 @@ for text, meta in zip(all_texts, all_metas):
 print("Total chunks:", len(corpus_chunks))
 index = None
 if len(corpus_chunks) > 0:
-    print("Encoding chunks (this may take some seconds)...")
     try:
         embs = embedding_model.encode(corpus_chunks, batch_size=32, show_progress_bar=False).astype("float32")
         dim = embs.shape[1]
@@ -199,14 +190,14 @@ else:
     print("No corpus chunks found: upload PDFs to ./pdfs/class10")
 def rag_search(query: str, k: int = TOP_K):
-    if index is None:
         return []
     try:
         q_vec = embedding_model.encode([query]).astype("float32")
         D, I = index.search(q_vec, k)
         results = []
         for dist, idx in zip(D[0], I[0]):
-            if idx == -1:
                 continue
             results.append(
                 {
@@ -220,16 +211,16 @@ def rag_search(query: str, k: int = TOP_K):
         print("RAG search error:", e)
         return []
-# -------------------- Local CPU LLM (flan-t5-small) --------------------
 print("Loading local CPU LLM:", LLM_LOCAL_NAME)
 llm_pipe = None
 try:
     tokenizer = AutoTokenizer.from_pretrained(LLM_LOCAL_NAME)
     model = AutoModelForSeq2SeqLM.from_pretrained(LLM_LOCAL_NAME)
-    llm_pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device_map=None)
     print("Local LLM loaded.")
 except Exception as e:
-    print("Failed to load local LLM (will return notice):", e)
     llm_pipe = None
 SYSTEM_PROMPT = """
@@ -250,7 +241,7 @@ def build_rag_prompt(context_blocks, question, chat_history):
         ctx += f"\n[Context {i} – {src}]\n{block['text']}\n"
     hist = ""
-    for role, msg in chat_history:
         hist += f"{role}: {msg}\n"
     prompt = f"""{SYSTEM_PROMPT}
@@ -270,41 +261,37 @@ def build_rag_prompt(context_blocks, question, chat_history):
 def llm_answer_with_rag(question: str, chat_history):
     retrieved = rag_search(question, TOP_K)
     prompt = build_rag_prompt(retrieved, question, chat_history)
-    if USE_HF_INFERENCE:
-        return "HF inference disabled in free plan."
-    else:
-        if llm_pipe is None:
-            return "Local LLM not loaded. Ensure model weights are available on first run."
-        try:
-            out = llm_pipe(prompt, max_new_tokens=LLM_MAX_TOKENS, do_sample=False)
-            if isinstance(out, list) and len(out) > 0 and "generated_text" in out[0]:
                 return out[0]["generated_text"]
-            if isinstance(out, list) and len(out) > 0 and isinstance(out[0], str):
-                return out[0]
-            if isinstance(out, dict) and "generated_text" in out:
-                return out["generated_text"]
-            return str(out)
-        except Exception as e:
-            traceback.print_exc()
-            return f"LLM generation failed: {e}"
 # -------------------- OCR + Math helpers --------------------
-def ocr_from_image(img: Image.Image):
-    if img is None:
         return ""
     try:
         img = img.convert("RGB")
-    except Exception:
-        pass
-    try:
-        text = pytesseract.image_to_string(img, lang="asm+eng")
-    except Exception:
-        try:
-            text = pytesseract.image_to_string(img)
-        except Exception:
-            text = ""
-    return text.strip()
 def is_likely_math(text: str) -> bool:
     if not text:
@@ -312,46 +299,32 @@ def is_likely_math(text: str) -> bool:
     math_chars = set("0123456789+-*/=^()%")
     if any(ch in text for ch in math_chars):
         return True
-    kws = ["গণিত", "সমীকৰণ", "উদাহৰণ", "প্ৰশ্ন", "বীজগণিত", "solve", "equation"]
-    return any(k in text for k in kws)
 def solve_math_expression(expr: str):
     try:
         expr = expr.replace("^", "**")
         if "=" in expr:
             left, right = expr.split("=", 1)
-            left_s = sp.sympify(left)
-            right_s = sp.sympify(right)
             eq = sp.Eq(left_s, right_s)
             sol = sp.solve(eq)
-            steps = [
-                "প্ৰথমে সমীকৰণ লওঁ:",
-                f"{sp.pretty(eq)}",
-                "Sympy ৰ সহায়ত সমাধান পোৱা যায়:",
-                str(sol),
-            ]
-            explanation = "ধাপ-ধাপে সমাধান (সংক্ষেপে):\n" + "\n".join(f"- {s}" for s in steps)
-            explanation += f"\n\nসেয়ে সমাধান: {sol}"
         else:
             expr_s = sp.sympify(expr)
             simp = sp.simplify(expr_s)
-            explanation = (
-                "প্ৰদত্ত গণিতীয় অভিব্যক্তি:\n"
-                f"{expr}\n\nসরলীকৰণ কৰাৰ পিছত পোৱা যায়:\n{simp}"
-            )
         return explanation
-    except Exception:
-        return (
-            "মই সঠিকভাৱে গণিতীয় অভিব্যক্তি চিনাক্ত কৰিব নোৱাৰিলোঁ। "
-            "দয়া কৰি সমীকৰণটো অলপ বেছি স্পষ্ট কৰি লিখক: উদাহৰণ – 2*x + 3 = 7"
-        )
 def speech_to_text(audio):
-    return ""
 def text_to_speech(text: str):
-    # stub: return empty string to avoid None in Gradio outputs
-    return ""
 # -------------------- Chat logic --------------------
 def login_user(username, user_state):
@@ -368,104 +341,74 @@ def login_user(username, user_state):
     )
     return user_state, stats
-def chat_logic(
-    username,
-    text_input,
-    image_input,
-    audio_input,
-    chat_history,
-    user_state,
-):
     if chat_history is None:
         chat_history = []
     if not user_state or not user_state.get("user_id"):
         sys_msg = "⚠️ প্ৰথমে ওপৰত আপোনাৰ নাম লিখি **Login / লগিন** টিপক।"
-        chat_history = chat_history + [[text_input or "", sys_msg]]
-        return chat_history, user_state, ""
     user_id = user_state["user_id"]
     final_query_parts = []
-    voice_text = speech_to_text(audio_input)
-    if voice_text:
-        final_query_parts.append(voice_text)
     ocr_text = ""
-    if image_input is not None and image_input != "":
-        img = None
-        try:
-            if isinstance(image_input, str):
-                img = Image.open(image_input)
-            else:
-                read_method = getattr(image_input, "read", None)
-                if callable(read_method):
-                    raw = image_input.read()
-                    img = Image.open(io.BytesIO(raw))
-                if img is None and isinstance(image_input, Image.Image):
-                    img = image_input
-        except Exception:
-            img = None
-        if img is not None:
-            try:
-                ocr_text = ocr_from_image(img)
-                if ocr_text:
-                    final_query_parts.append(ocr_text)
-            except Exception:
-                pass
     if text_input:
         final_query_parts.append(text_input)
     if not final_query_parts:
         sys_msg = "⚠️ অনুগ্ৰহ কৰি প্ৰশ্ন লিখক, কিম্বা ছবি আপলোড কৰক।"
-        chat_history = chat_history + [["", sys_msg]]
-        return chat_history, user_state, ""
     full_query = "\n".join(final_query_parts)
     conv = []
     for u, b in chat_history:
-        if u:
-            conv.append(("Student", u))
-        if b:
-            conv.append(("Tutor", b))
     is_math = is_likely_math(full_query)
     if is_math:
         math_answer = solve_math_expression(full_query)
         combined_question = (
-            full_query
-            + "\n\nগণিত প্ৰোগ্ৰামে এই ফলাফল দিছে:\n"
-            + math_answer
-            + "\n\nঅনুগ্ৰহ কৰি শ্রেণী ১০ ৰ শিক্ষাৰ্থীৰ বাবে সহজ ভাষাত ব্যাখ্যা কৰক।"
         )
         final_answer = llm_answer_with_rag(combined_question, conv)
     else:
         final_answer = llm_answer_with_rag(full_query, conv)
-    if final_answer is None:
-        final_answer = "মাফ কৰক — মই ইয়াৰ উত্তর দিব পৰা নাই।"
     log_interaction(user_id, full_query, final_answer, is_math)
-    audio_out = text_to_speech(final_answer) or ""
-    display_question = text_input or voice_text or ocr_text or "(empty)"
-    chat_history = chat_history + [[display_question, final_answer]]
-    return chat_history, user_state, audio_out
 # -------------------- Gradio UI --------------------
-with gr.Blocks(title=APP_NAME, css=None) as demo:
     gr.Markdown(
-        """
-        # 🧭 জাজাবৰ – SEBA অসমীয়া ক্লাছ ১০ AI Tutor (Free CPU)
-        - Upload your SEBA Class 10 PDFs to `pdfs/class10` in this repo (or when running locally, ensure folder exists)
-        - Text + Image (OCR) input
         - Math step-by-step solutions
-        - User login + progress
         """
     )
@@ -500,42 +443,44 @@ with gr.Blocks(title=APP_NAME, css=None) as demo:
             with gr.Row():
                 image_inp = gr.Image(label="📷 প্ৰশ্নৰ ছবি (Optional)", type="filepath")
-                audio_inp = gr.Audio(label="🎙️ কণ্ঠস্বৰ প্ৰশ্ন (Stub — not used now)", type="numpy")
             with gr.Row():
                 ask_btn = gr.Button("🤖 জাজাবৰক সোধক")
-                audio_out = gr.Audio(
-                    label="🔊 উত্তৰৰ অডিঅ’ (TTS – future upgrade)",
-                    interactive=False,
-                    type="filepath"
-                )
     login_btn.click(
         login_user,
         inputs=[username_inp, user_state],
         outputs=[user_state, stats_md],
     )
-    def wrapped_chat(text, image, audio, history, user_state_inner, username_inner):
-        if user_state_inner is None:
-            user_state_inner = {}
-        if username_inner and not user_state_inner.get("username"):
-            user_state_inner["username"] = username_inner
-        return chat_logic(username_inner, text, image, audio, history, user_state_inner)
     ask_btn.click(
-        wrapped_chat,
-        inputs=[text_inp, image_inp, audio_inp, chat, user_state, username_inp],
-        outputs=[chat, user_state, audio_out],
     )
     text_inp.submit(
-        wrapped_chat,
-        inputs=[text_inp, image_inp, audio_inp, chat, user_state, username_inp],
-        outputs=[chat, user_state, audio_out],
     )
-# -------------------- Launch --------------------
 if __name__ == "__main__":
-    # bind to 0.0.0.0 and allow share link for hosted environments where localhost may be blocked
-    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)

 """
 Jajabor – SEBA Assamese Class 10 Tutor (Free-tier CPU-ready)
+Fixed version with correct Gradio version and improved error handling
 """
 import os
 DB_PATH = os.path.join(BASE_DIR, "jajabor_users.db")
 EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+USE_HF_INFERENCE = False
 LLM_LOCAL_NAME = "google/flan-t5-small"
 LLM_MAX_TOKENS = 128
 init_db()
+# -------------------- PDF reading --------------------
 def extract_text_from_pdf(pdf_path: str) -> str:
     text_pages = []
     try:
 print("Total chunks:", len(corpus_chunks))
 index = None
 if len(corpus_chunks) > 0:
+    print("Encoding chunks...")
     try:
         embs = embedding_model.encode(corpus_chunks, batch_size=32, show_progress_bar=False).astype("float32")
         dim = embs.shape[1]
     print("No corpus chunks found: upload PDFs to ./pdfs/class10")
 def rag_search(query: str, k: int = TOP_K):
+    if index is None or len(corpus_chunks) == 0:
         return []
     try:
         q_vec = embedding_model.encode([query]).astype("float32")
         D, I = index.search(q_vec, k)
         results = []
         for dist, idx in zip(D[0], I[0]):
+            if idx == -1 or idx >= len(corpus_chunks):
                 continue
             results.append(
                 {
         print("RAG search error:", e)
         return []
+# -------------------- Local CPU LLM --------------------
 print("Loading local CPU LLM:", LLM_LOCAL_NAME)
 llm_pipe = None
 try:
     tokenizer = AutoTokenizer.from_pretrained(LLM_LOCAL_NAME)
     model = AutoModelForSeq2SeqLM.from_pretrained(LLM_LOCAL_NAME)
+    llm_pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer, device=-1)  # CPU
     print("Local LLM loaded.")
 except Exception as e:
+    print("Failed to load local LLM:", e)
     llm_pipe = None
 SYSTEM_PROMPT = """
         ctx += f"\n[Context {i} – {src}]\n{block['text']}\n"
     hist = ""
+    for role, msg in chat_history[-4:]:  # Keep last 4 exchanges
         hist += f"{role}: {msg}\n"
     prompt = f"""{SYSTEM_PROMPT}
 def llm_answer_with_rag(question: str, chat_history):
     retrieved = rag_search(question, TOP_K)
+    if not retrieved:
+        return "মই এই প্ৰশ্নৰ উত্তৰ দিবলৈ প্ৰয়োজনীয় তথ্য বিচাৰি পোৱা নাই। দয়া কৰি নিশ্চিত কৰক যে আপোনাৰ পাঠ্যপুথিৰ PDF ফাইলসমূহ সঠিকভাৱে আপলোড কৰা হৈছে।"
     prompt = build_rag_prompt(retrieved, question, chat_history)
+    if llm_pipe is None:
+        return "AI মডেল ল'ড হোৱা নাই। দয়া কৰি পুনৰ চেষ্টা কৰক।"
+    try:
+        out = llm_pipe(prompt, max_new_tokens=LLM_MAX_TOKENS, do_sample=False)
+        if isinstance(out, list) and len(out) > 0:
+            if "generated_text" in out[0]:
                 return out[0]["generated_text"]
+            return str(out[0])
+        return str(out)
+    except Exception as e:
+        print("LLM generation error:", e)
+        return f"উত্তৰ তৈয়াৰ কৰোঁতে সমস্যা: {e}"
 # -------------------- OCR + Math helpers --------------------
+def ocr_from_image(img_path: str):
+    if not img_path:
         return ""
     try:
+        img = Image.open(img_path)
         img = img.convert("RGB")
+        text = pytesseract.image_to_string(img, lang="eng+asm")
+        return text.strip()
+    except Exception as e:
+        print("OCR error:", e)
+        return ""
 def is_likely_math(text: str) -> bool:
     if not text:
     math_chars = set("0123456789+-*/=^()%")
     if any(ch in text for ch in math_chars):
         return True
+    math_kws = ["গণিত", "সমীকৰণ", "উদাহৰণ", "প্ৰশ্ন", "বীজগণিত", "solve", "equation", "math"]
+    return any(k in text.lower() for k in math_kws)
 def solve_math_expression(expr: str):
     try:
         expr = expr.replace("^", "**")
         if "=" in expr:
             left, right = expr.split("=", 1)
+            left_s = sp.sympify(left.strip())
+            right_s = sp.sympify(right.strip())
             eq = sp.Eq(left_s, right_s)
             sol = sp.solve(eq)
+            explanation = f"সমীকৰণ: {eq}\n\nসমাধান: {sol}"
         else:
             expr_s = sp.sympify(expr)
             simp = sp.simplify(expr_s)
+            explanation = f"প্ৰকাশ: {expr}\n\nসৰলীকৃত: {simp}"
         return explanation
+    except Exception as e:
+        return f"গণিত সমাধানত সমস্যা: {e}"
 def speech_to_text(audio):
+    return ""  # Stub for future implementation
 def text_to_speech(text: str):
+    return None  # Stub for future implementation
 # -------------------- Chat logic --------------------
 def login_user(username, user_state):
     )
     return user_state, stats
+def chat_logic(text_input, image_input, chat_history, user_state):
     if chat_history is None:
         chat_history = []
     if not user_state or not user_state.get("user_id"):
         sys_msg = "⚠️ প্ৰথমে ওপৰত আপোনাৰ নাম লিখি **Login / লগিন** টিপক।"
+        chat_history.append([text_input or "", sys_msg])
+        return chat_history, user_state, None
     user_id = user_state["user_id"]
     final_query_parts = []
+    # Process image OCR
     ocr_text = ""
+    if image_input is not None:
+        ocr_text = ocr_from_image(image_input)
+        if ocr_text:
+            final_query_parts.append(f"ছবিৰ পৰা পাঠ: {ocr_text}")
     if text_input:
         final_query_parts.append(text_input)
     if not final_query_parts:
         sys_msg = "⚠️ অনুগ্ৰহ কৰি প্ৰশ্ন লিখক, কিম্বা ছবি আপলোড কৰক।"
+        chat_history.append(["", sys_msg])
+        return chat_history, user_state, None
     full_query = "\n".join(final_query_parts)
+    # Convert chat history to conversation format
     conv = []
     for u, b in chat_history:
+        if u and u.strip():
+            conv.append(("Student", u.strip()))
+        if b and b.strip():
+            conv.append(("Tutor", b.strip()))
     is_math = is_likely_math(full_query)
     if is_math:
         math_answer = solve_math_expression(full_query)
         combined_question = (
+            full_query + "\n\nগণিত সমাধান:\n" + math_answer +
+            "\n\nঅনুগ্ৰহ কৰি শ্রেণী ১০ ৰ শিক্ষাৰ্থীৰ বাবে সহজ ভাষাত ব্যাখ্যা কৰক।"
         )
         final_answer = llm_answer_with_rag(combined_question, conv)
     else:
         final_answer = llm_answer_with_rag(full_query, conv)
     log_interaction(user_id, full_query, final_answer, is_math)
+    display_question = text_input or ocr_text or "(ছবিৰ প্ৰশ্ন)"
+    chat_history.append([display_question, final_answer])
+    return chat_history, user_state, None
 # -------------------- Gradio UI --------------------
+with gr.Blocks(title=APP_NAME, css="""
+.stats-box { background: #f0f8ff; padding: 10px; border-radius: 5px; }
+""") as demo:
     gr.Markdown(
+        f"""
+        # 🧭 {APP_NAME}
+        - SEBA Class 10 PDFs upload to `pdfs/class10` folder
+        - Text + Image (OCR) input support
         - Math step-by-step solutions
+        - User login + progress tracking
         """
     )
             with gr.Row():
                 image_inp = gr.Image(label="📷 প্ৰশ্নৰ ছবি (Optional)", type="filepath")
             with gr.Row():
                 ask_btn = gr.Button("🤖 জাজাবৰক সোধক")
+                clear_btn = gr.Button("🧹 পৰিষ্কাৰ কৰক")
+    # Login handler
     login_btn.click(
         login_user,
         inputs=[username_inp, user_state],
         outputs=[user_state, stats_md],
     )
+    # Chat handler
     ask_btn.click(
+        chat_logic,
+        inputs=[text_inp, image_inp, chat, user_state],
+        outputs=[chat, user_state, image_inp],
+    ).then(
+        lambda: "", None, text_inp
+    ).then(
+        lambda: None, None, image_inp
     )
+    # Text submit handler
     text_inp.submit(
+        chat_logic,
+        inputs=[text_inp, image_inp, chat, user_state],
+        outputs=[chat, user_state, image_inp],
+    ).then(
+        lambda: "", None, text_inp
+    ).then(
+        lambda: None, None, image_inp
     )
+    # Clear chat
+    def clear_chat():
+        return [], None
+    clear_btn.click(clear_chat, outputs=[chat, image_inp])
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860, share=True)