Spaces:

aimanathar
/

virtual_trainr

Sleeping

App Files Files Community

aimanathar commited on Sep 12, 2025

Commit

da2d95f

verified ·

1 Parent(s): 0cdf6e1

Upload app.py

Browse files

Files changed (1) hide show

app.py +71 -0

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import textwrap
+import warnings
+import faiss
+import numpy as np
+import torch
+warnings.filterwarnings("ignore")
+import gradio as gr
+import pytesseract
+from pdf2image import convert_from_path
+from pdfminer.high_level import extract_text
+from sentence_transformers import SentenceTransformer
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+# ================== PDF Handling Functions ==================
+def pdf_to_text(path):
+    try:
+        txt = extract_text(path) or ""
+    except Exception:
+        txt = ""
+    if len(txt.strip()) < 200:
+        try:
+            pages = convert_from_path(path, dpi=200)
+            ocr_all = [pytesseract.image_to_string(img) for img in pages]
+            txt = "\n".join(ocr_all)
+        except Exception:
+            txt = ""
+    return txt
+def chunk_text(text, max_chars=800):
+    paras = [p.strip() for p in text.split("\n") if p.strip()]
+    chunks, buf = [], ""
+    for p in paras:
+        if len(p) > max_chars:
+            for piece in textwrap.wrap(p, width=max_chars, break_long_words=False):
+                chunks.append(piece.strip())
+        else:
+            if len(buf) + len(p) + 1 <= max_chars:
+                buf = (buf + "\n" + p).strip()
+            else:
+                if buf: chunks.append(buf)
+                buf = p
+    if buf: chunks.append(buf)
+    return [c for c in chunks if len(c) > 80]
+# ================== Load Embeddings + Model ==================
+embed_model = SentenceTransformer("all-MiniLM-L6-v2")
+model_id = "google/flan-t5-base"
+tok = AutoTokenizer.from_pretrained(model_id)
+gen_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+gen_model.to(device)
+# ================== Chat Function ==================
+def chat_fn(message, history=None):
+    prompt = f"Answer clearly and exam-ready:\n\nQuestion:\n{message}"
+    inputs = tok(prompt, return_tensors="pt", truncation=True, padding=True, max_length=1024).to(device)
+    out = gen_model.generate(**inputs, max_new_tokens=120, num_beams=4, do_sample=False)
+    return tok.decode(out[0], skip_special_tokens=True).strip()
+# ================== Gradio Interface ==================
+iface = gr.ChatInterface(
+    fn=chat_fn,
+    title="💬 Practical Chatbot",
+    description="Ask about Physics & Chemistry Practicals (Class 9–10)."
+)
+iface.launch()