Assamese / app.py
Sazid2's picture
Create app.py
bf281e4 verified
raw
history blame
20.3 kB
# app.py
"""
Jajabor – SEBA Assamese Class 10 Tutor (Gradio app)
Full single-file app:
- Loads PDFs from ./pdfs/class10
- Builds FAISS index using sentence-transformers
- Optional Hugging Face Inference API for LLM (set HF_API_TOKEN env var)
- Login + sqlite interactions logging
- OCR from images (pytesseract) with robust handling of gr.Image(type="filepath")
"""
import os
import io
import sqlite3
from datetime import datetime
import traceback
import fitz # PyMuPDF
import numpy as np
from PIL import Image
import gradio as gr
import faiss
import pytesseract
from sentence_transformers import SentenceTransformer
import sympy as sp
# Optional HF inference
from huggingface_hub import InferenceApi
# -------------------- CONFIG --------------------
APP_NAME = "Jajabor – SEBA Assamese Class 10 Tutor"
BASE_DIR = os.path.abspath(os.path.dirname(__file__))
PDF_DIR = os.path.join(BASE_DIR, "pdfs", "class10")
DB_PATH = os.path.join(BASE_DIR, "jajabor_users.db")
# Embedding model (compact for Spaces)
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
# LLM: model to call via HF Inference API. Change if you have another hosted model.
LLM_MODEL_NAME = "Qwen/Qwen2.5-3B-Instruct"
USE_HF_INFERENCE = True # set False if you don't want to call HF Inference
CHUNK_SIZE = 600
CHUNK_OVERLAP = 120
TOP_K = 5
HUGGINGFACE_API_TOKEN = os.environ.get("HF_API_TOKEN", None)
if USE_HF_INFERENCE and HUGGINGFACE_API_TOKEN is None:
print("Warning: HF_API_TOKEN not set. LLM calls will fail until the token is provided in env.")
inference = None
if USE_HF_INFERENCE and HUGGINGFACE_API_TOKEN:
try:
inference = InferenceApi(repo_id=LLM_MODEL_NAME, token=HUGGINGFACE_API_TOKEN)
except Exception as e:
print("Failed to initialize HF Inference API client:", e)
inference = None
# -------------------- DB helpers --------------------
def init_db(db_path=DB_PATH):
os.makedirs(os.path.dirname(db_path), exist_ok=True)
conn = sqlite3.connect(db_path)
cur = conn.cursor()
cur.execute(
"""
CREATE TABLE IF NOT EXISTS users (
id INTEGER PRIMARY KEY AUTOINCREMENT,
username TEXT UNIQUE,
created_at TEXT
)
"""
)
cur.execute(
"""
CREATE TABLE IF NOT EXISTS interactions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
user_id INTEGER,
timestamp TEXT,
query TEXT,
answer TEXT,
is_math INTEGER,
FOREIGN KEY(user_id) REFERENCES users(id)
)
"""
)
conn.commit()
conn.close()
def get_or_create_user(username: str):
username = username.strip()
if not username:
return None
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
cur.execute("SELECT id FROM users WHERE username=?", (username,))
row = cur.fetchone()
if row:
user_id = row[0]
else:
cur.execute(
"INSERT INTO users (username, created_at) VALUES (?, ?)",
(username, datetime.utcnow().isoformat()),
)
conn.commit()
user_id = cur.lastrowid
conn.close()
return user_id
def log_interaction(user_id, query, answer, is_math: bool):
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
cur.execute(
"""
INSERT INTO interactions (user_id, timestamp, query, answer, is_math)
VALUES (?, ?, ?, ?, ?)
""",
(user_id, datetime.utcnow().isoformat(), query, answer, 1 if is_math else 0),
)
conn.commit()
conn.close()
def get_user_stats(user_id):
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
cur.execute(
"SELECT COUNT(*), SUM(is_math) FROM interactions WHERE user_id=?", (user_id,)
)
row = cur.fetchone()
conn.close()
total = row[0] or 0
math_count = row[1] or 0
return total, math_count
init_db()
# -------------------- PDF loading + RAG --------------------
def extract_text_from_pdf(pdf_path: str) -> str:
try:
doc = fitz.open(pdf_path)
except Exception:
return ""
pages = []
for page in doc:
try:
txt = page.get_text("text")
if txt:
pages.append(txt)
except Exception:
continue
return "\n".join(pages)
def load_all_pdfs(pdf_dir: str):
texts = []
metas = []
if not os.path.isdir(pdf_dir):
print("PDF_DIR not found:", pdf_dir)
return texts, metas
for fname in sorted(os.listdir(pdf_dir)):
if fname.lower().endswith(".pdf"):
path = os.path.join(pdf_dir, fname)
print("Reading:", path)
text = extract_text_from_pdf(path)
texts.append(text)
metas.append({"source": fname})
return texts, metas
def split_text(text: str, chunk_size=600, overlap=120):
if not text:
return []
chunks = []
start = 0
L = len(text)
# Keep stepping forward by chunk_size - overlap
step = max(chunk_size - overlap, 1)
while start < L:
end = min(start + chunk_size, L)
chunk = text[start:end]
if chunk.strip():
chunks.append(chunk)
start += step
return chunks
print("Loading embedding model:", EMBEDDING_MODEL_NAME)
embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
print("Loading PDFs from", PDF_DIR)
all_texts, all_metas = load_all_pdfs(PDF_DIR)
print("Number of PDFs:", len(all_texts))
corpus_chunks = []
corpus_metas = []
for text, meta in zip(all_texts, all_metas):
chs = split_text(text, CHUNK_SIZE, CHUNK_OVERLAP)
corpus_chunks.extend(chs)
corpus_metas.extend([meta] * len(chs))
print("Total chunks:", len(corpus_chunks))
index = None
if len(corpus_chunks) > 0:
print("Encoding chunks (this may take some seconds)...")
try:
embs = embedding_model.encode(corpus_chunks, batch_size=32, show_progress_bar=False).astype("float32")
dim = embs.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(embs)
print("✅ FAISS index ready; dim:", dim)
except Exception as e:
print("Failed to encode/add to index:", e)
index = None
else:
print("No corpus chunks found: upload PDFs to ./pdfs/class10")
def rag_search(query: str, k: int = TOP_K):
if index is None:
return []
try:
q_vec = embedding_model.encode([query]).astype("float32")
D, I = index.search(q_vec, k)
results = []
for dist, idx in zip(D[0], I[0]):
if idx == -1:
continue
results.append(
{
"score": float(dist),
"text": corpus_chunks[idx],
"meta": corpus_metas[idx],
}
)
return results
except Exception:
return []
# -------------------- LLM helpers --------------------
SYSTEM_PROMPT = """
You are "Jajabor", an expert SEBA Assamese tutor for Class 10.
Always prefer to answer in Assamese. If the student clearly asks for English, you may reply in English.
Rules:
- Use ONLY the given textbook context.
- If you are not sure, say: "এই প্ৰশ্নটো পাঠ্যপুথিৰ অংশত স্পষ্টকৈ নাই, সেয়েহে মই নিশ্চিত নহয়।"
- বোঝাপৰা সহজ ভাষাত ব্যাখ্যা কৰা, উদাহৰণ দিয়ক।
- If it is a maths question, explain step-by-step clearly.
"""
def build_rag_prompt(context_blocks, question, chat_history):
ctx = ""
for i, block in enumerate(context_blocks, start=1):
src = block["meta"].get("source", "textbook")
ctx += f"\n[Context {i}{src}]\n{block['text']}\n"
hist = ""
for role, msg in chat_history:
hist += f"{role}: {msg}\n"
prompt = f"""{SYSTEM_PROMPT}
পূৰ্বৰ বাৰ্তাসমূহ:
{hist}
সদস্যৰ প্ৰশ্ন:
{question}
সম্পৰ্কিত পাঠ্যপুথিৰ অংশ:
{ctx}
এতিয়া একেদম সহায়ক আৰু বুজিবলৈ সহজ উত্তৰ দিয়া।
"""
return prompt
def call_llm_via_hf(prompt: str, max_tokens=512):
if inference is None:
return "LLM not available: HF Inference client not configured (set HF_API_TOKEN and ensure model name is accessible)."
try:
# Some inference endpoints accept dict return, some strings. Handle flexibly.
out = inference(inputs=prompt, params={"max_new_tokens": max_tokens, "temperature": 0.3})
# Handle common return types
if isinstance(out, dict) and "generated_text" in out:
return out["generated_text"]
if isinstance(out, list) and len(out) > 0:
if isinstance(out[0], dict) and "generated_text" in out[0]:
return out[0]["generated_text"]
# sometimes list of strings
if isinstance(out[0], str):
return out[0]
if isinstance(out, str):
return out
return str(out)
except Exception as e:
traceback.print_exc()
return f"LLM call failed: {e}"
def llm_answer_with_rag(question: str, chat_history):
retrieved = rag_search(question, TOP_K)
prompt = build_rag_prompt(retrieved, question, chat_history)
if USE_HF_INFERENCE:
return call_llm_via_hf(prompt)
else:
return "LLM not configured (USE_HF_INFERENCE=False)."
# -------------------- OCR + math helpers --------------------
def ocr_from_image(img: Image.Image):
if img is None:
return ""
try:
img = img.convert("RGB")
except Exception:
pass
try:
# try Assamese + English; fallback if languages not installed
text = pytesseract.image_to_string(img, lang="asm+eng")
except Exception:
try:
text = pytesseract.image_to_string(img)
except Exception:
text = ""
return text.strip()
def is_likely_math(text: str) -> bool:
if not text:
return False
math_chars = set("0123456789+-*/=^()%")
if any(ch in text for ch in math_chars):
return True
kws = ["গণিত", "সমীকৰণ", "উদাহৰণ", "প্ৰশ্ন", "বীজগণিত", "solve", "equation"]
if any(k in text for k in kws):
return True
return False
def solve_math_expression(expr: str):
try:
expr = expr.replace("^", "**")
if "=" in expr:
left, right = expr.split("=", 1)
left_s = sp.sympify(left)
right_s = sp.sympify(right)
eq = sp.Eq(left_s, right_s)
sol = sp.solve(eq)
steps = []
steps.append("প্ৰথমে সমীকৰণ লওঁ:")
steps.append(f"{sp.pretty(eq)}")
steps.append("Sympy ৰ সহায়ত সমাধান পোৱা যায়:")
steps.append(str(sol))
explanation = "ধাপ-ধাপে সমাধান (সংক্ষেপে):\n" + "\n".join(f"- {s}" for s in steps)
explanation += f"\n\nসেয়ে সমাধান: {sol}"
else:
expr_s = sp.sympify(expr)
simp = sp.simplify(expr_s)
explanation = (
"প্ৰদত্ত গণিতীয় অভিব্যক্তি:\n"
f"{expr}\n\nসরলীকৰণ কৰাৰ পিছত পোৱা যায়:\n{simp}"
)
return explanation
except Exception:
return (
"মই সঠিকভাৱে গণিতীয় অভিব্যক্তি চিনাক্ত কৰিব নোৱাৰিলোঁ। "
"দয়া কৰি সমীকৰণটো অলপ বেছি স্পষ্টকৈ লিখা: উদাহৰণ – 2*x + 3 = 7"
)
def speech_to_text(audio):
# stub for future ASR integration
return ""
def text_to_speech(text: str):
# stub for TTS integration
return None
# -------------------- Chat logic --------------------
def login_user(username, user_state):
username = (username or "").strip()
if not username:
return user_state, "⚠️ অনুগ্ৰহ কৰি প্ৰথমে লগিনৰ বাবে এটা নাম লিখক।"
user_id = get_or_create_user(username)
user_state = {"username": username, "user_id": user_id}
total, math_count = get_user_stats(user_id)
stats = (
f"👤 ব্যৱহাৰকাৰী: **{username}**\n\n"
f"📊 মোট প্ৰশ্ন: **{total}**\n"
f"🧮 গণিত প্ৰশ্ন: **{math_count}**"
)
return user_state, stats
def chat_logic(
username,
text_input,
image_input,
audio_input,
chat_history,
user_state,
):
# Ensure chat_history is a list
if chat_history is None:
chat_history = []
if not user_state or not user_state.get("user_id"):
sys_msg = "⚠️ প্ৰথমে ওপৰত আপোনাৰ নাম লিখি **Login / লগিন** টিপক।"
chat_history = chat_history + [[text_input or "", sys_msg]]
return chat_history, user_state, None
user_id = user_state["user_id"]
final_query_parts = []
# audio (stub)
voice_text = speech_to_text(audio_input)
if voice_text:
final_query_parts.append(voice_text)
# image handling (robust)
ocr_text = ""
if image_input is not None and image_input != "":
img = None
try:
# If Gradio returns a file path (string)
if isinstance(image_input, str):
try:
img = Image.open(image_input)
except Exception:
img = None
else:
# If it's a file-like object: has .read()
read_method = getattr(image_input, "read", None)
if callable(read_method):
try:
raw = image_input.read()
img = Image.open(io.BytesIO(raw))
except Exception:
img = None
# If it's already a PIL Image
if img is None and isinstance(image_input, Image.Image):
img = image_input
except Exception:
img = None
if img is not None:
try:
ocr_text = ocr_from_image(img)
if ocr_text:
final_query_parts.append(ocr_text)
except Exception:
pass
# text input
if text_input:
final_query_parts.append(text_input)
if not final_query_parts:
sys_msg = "⚠️ অনুগ্ৰহ কৰি প্ৰশ্ন লিখক, কিম্বা ছবি আপলোড কৰক।"
chat_history = chat_history + [["", sys_msg]]
return chat_history, user_state, None
full_query = "\n".join(final_query_parts)
conv = []
for u, b in chat_history:
if u:
conv.append(("Student", u))
if b:
conv.append(("Tutor", b))
is_math = is_likely_math(full_query)
if is_math:
math_answer = solve_math_expression(full_query)
combined_question = (
full_query
+ "\n\nগণিত প্ৰোগ্ৰামে এই ফলাফল দিছে:\n"
+ math_answer
+ "\n\nঅনুগ্ৰহ কৰি শ্রেণী ১০ ৰ শিক্ষাৰ্থীৰ বাবে সহজ ভাষাত ব্যাখ্যা কৰক।"
)
final_answer = llm_answer_with_rag(combined_question, conv)
else:
final_answer = llm_answer_with_rag(full_query, conv)
# If LLM returns the whole prompt + generation, try to remove the prompt (best-effort)
if isinstance(final_answer, str) and final_answer.strip().startswith(SYSTEM_PROMPT.strip()):
# best-effort: don't leak huge prompts to chat UI; keep as-is if detection fails
# (Many HF inference responses do not include the prompt anyway)
pass
log_interaction(user_id, full_query, final_answer, is_math)
audio_out = text_to_speech(final_answer)
display_question = text_input or voice_text or ocr_text or "(empty)"
chat_history = chat_history + [[display_question, final_answer]]
return chat_history, user_state, audio_out
# -------------------- Gradio UI --------------------
with gr.Blocks(title=APP_NAME, css=None) as demo:
gr.Markdown(
"""
# 🧭 জাজাবৰ – SEBA অসমীয়া ক্লাছ ১০ AI Tutor
- Upload your SEBA Class 10 PDFs to `pdfs/class10` in this repo (or when running locally, ensure folder exists)
- Text + Image (OCR) input
- Math step-by-step solutions
- User login + progress
"""
)
user_state = gr.State({})
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 👤 লগিন")
username_inp = gr.Textbox(
label="নাম / ইউজাৰ আইডি",
placeholder="উদাহৰণ: abu10, student01 ..."
)
login_btn = gr.Button("✅ Login / লগিন")
stats_md = gr.Markdown("এতিয়ালৈকে লগিন হোৱা নাই।", elem_classes="stats-box")
gr.Markdown(
"""
### 💡 টিপছ
- "ক্লাছ ১০ গণিত: উদাহৰণ ৩.১ প্ৰশ্ন ২" – এই ধৰণৰ প্ৰশ্ন ভাল
- ফটো আপলোড কৰিলে টেক্স্টটো OCR কৰি পঢ়িব চেষ্টা কৰা হয়
- সম্ভৱ হলে প্ৰশ্নটো অসমীয়াত সোধক 🙂
"""
)
with gr.Column(scale=3):
chat = gr.Chatbot(label="জাজাবৰ সৈতে কথোপকথন", height=500)
text_inp = gr.Textbox(
label="আপোনাৰ প্ৰশ্ন লিখক",
placeholder='উদাহৰণ: "ক্লাছ ১০ অসমীয়া: অনুচ্ছেদ পাঠ ১ ৰ মূল বিষয় কি?"',
lines=2,
)
with gr.Row():
# IMPORTANT: use type="filepath" so Gradio returns a local path string
image_inp = gr.Image(label="📷 প্ৰশ্নৰ ছবি (Optional)", type="filepath")
audio_inp = gr.Audio(label="🎙️ কণ্ঠস্বৰ প্ৰশ্ন (Stub — not used now)", type="numpy")
with gr.Row():
ask_btn = gr.Button("🤖 জাজাবৰক সোধক")
audio_out = gr.Audio(label="🔊 উত্তৰৰ অডিঅ’ (TTS – future upgrade)", interactive=False)
login_btn.click(
login_user,
inputs=[username_inp, user_state],
outputs=[user_state, stats_md],
)
def wrapped_chat(text, image, audio, history, user_state_inner, username_inner):
# keep username in state if provided
if user_state_inner is None:
user_state_inner = {}
if username_inner and not user_state_inner.get("username"):
user_state_inner["username"] = username_inner
return chat_logic(username_inner, text, image, audio, history, user_state_inner)
ask_btn.click(
wrapped_chat,
inputs=[text_inp, image_inp, audio_inp, chat, user_state, username_inp],
outputs=[chat, user_state, audio_out],
)
text_inp.submit(
wrapped_chat,
inputs=[text_inp, image_inp, audio_inp, chat, user_state, username_inp],
outputs=[chat, user_state, audio_out],
)
# -------------------- Launch --------------------
if __name__ == "__main__":
# For Spaces, demo.launch() is fine. Locally you can set server_name to "0.0.0.0"
demo.launch()