aimanathar commited on
Commit
0cdf6e1
·
verified ·
1 Parent(s): e47b438

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -71
app.py DELETED
@@ -1,71 +0,0 @@
1
- import textwrap
2
- import warnings
3
-
4
- import faiss
5
- import numpy as np
6
- import torch
7
-
8
- warnings.filterwarnings("ignore")
9
- import gradio as gr
10
- import pytesseract
11
- from pdf2image import convert_from_path
12
- from pdfminer.high_level import extract_text
13
- from sentence_transformers import SentenceTransformer
14
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
15
-
16
-
17
- # ================== PDF Handling Functions ==================
18
- def pdf_to_text(path):
19
- try:
20
- txt = extract_text(path) or ""
21
- except Exception:
22
- txt = ""
23
- if len(txt.strip()) < 200:
24
- try:
25
- pages = convert_from_path(path, dpi=200)
26
- ocr_all = [pytesseract.image_to_string(img) for img in pages]
27
- txt = "\n".join(ocr_all)
28
- except Exception:
29
- txt = ""
30
- return txt
31
-
32
- def chunk_text(text, max_chars=800):
33
- paras = [p.strip() for p in text.split("\n") if p.strip()]
34
- chunks, buf = [], ""
35
- for p in paras:
36
- if len(p) > max_chars:
37
- for piece in textwrap.wrap(p, width=max_chars, break_long_words=False):
38
- chunks.append(piece.strip())
39
- else:
40
- if len(buf) + len(p) + 1 <= max_chars:
41
- buf = (buf + "\n" + p).strip()
42
- else:
43
- if buf: chunks.append(buf)
44
- buf = p
45
- if buf: chunks.append(buf)
46
- return [c for c in chunks if len(c) > 80]
47
-
48
- # ================== Load Embeddings + Model ==================
49
- embed_model = SentenceTransformer("all-MiniLM-L6-v2")
50
-
51
- model_id = "google/flan-t5-base"
52
- tok = AutoTokenizer.from_pretrained(model_id)
53
- gen_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
54
- device = "cuda" if torch.cuda.is_available() else "cpu"
55
- gen_model.to(device)
56
-
57
- # ================== Chat Function ==================
58
- def chat_fn(message, history=None):
59
- prompt = f"Answer clearly and exam-ready:\n\nQuestion:\n{message}"
60
- inputs = tok(prompt, return_tensors="pt", truncation=True, padding=True, max_length=1024).to(device)
61
- out = gen_model.generate(**inputs, max_new_tokens=120, num_beams=4, do_sample=False)
62
- return tok.decode(out[0], skip_special_tokens=True).strip()
63
-
64
- # ================== Gradio Interface ==================
65
- iface = gr.ChatInterface(
66
- fn=chat_fn,
67
- title="💬 Practical Chatbot",
68
- description="Ask about Physics & Chemistry Practicals (Class 9–10)."
69
- )
70
-
71
- iface.launch()