Mahrukhh commited on
Commit
e9a0d05
Β·
verified Β·
1 Parent(s): bf43c5b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -0
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py β€” Hugging Face deployable RAG Study Supervisor
2
+
3
+ import os
4
+ import gradio as gr
5
+ import fitz # PyMuPDF
6
+ import docx
7
+ import numpy as np
8
+ import faiss
9
+ import requests
10
+ from sentence_transformers import SentenceTransformer
11
+
12
+ # πŸ” Read Groq API Key from environment (set it in HF secrets)
13
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
14
+ MODEL = "llama3-8b-8192"
15
+
16
+ # πŸ’¬ Prompt templates
17
+ system_template = "You are a helpful and knowledgeable study supervisor. You are given excerpts from a document, and your job is to answer student questions based on that information. Be precise and explain clearly like a teacher."
18
+ user_template = """Context: {context}
19
+
20
+ Question: {question}
21
+
22
+ Answer like a teacher:"""
23
+
24
+ # πŸ“š Embedding model
25
+ embedder = SentenceTransformer("all-MiniLM-L6-v2")
26
+
27
+ def extract_text(file):
28
+ ext = file.name.split(".")[-1].lower()
29
+ if ext == "pdf":
30
+ doc = fitz.open(file.name)
31
+ return "\n".join([page.get_text() for page in doc])
32
+ elif ext in ["docx", "doc"]:
33
+ doc = docx.Document(file.name)
34
+ return "\n".join([p.text for p in doc.paragraphs])
35
+ return ""
36
+
37
+ def chunk_text(text, size=300, overlap=50):
38
+ words = text.split()
39
+ return [" ".join(words[i:i + size]) for i in range(0, len(words), size - overlap)]
40
+
41
+ def embed_chunks(chunks):
42
+ return embedder.encode(chunks).astype("float32")
43
+
44
+ def store_faiss(chunks, vectors):
45
+ index = faiss.IndexFlatL2(vectors.shape[1])
46
+ index.add(vectors)
47
+ return {"documents": chunks, "vectors": vectors, "index": index}
48
+
49
+ def get_context(query, state, k=3):
50
+ if state is None or "index" not in state:
51
+ return "⚠️ Please upload a document first.", None
52
+ q_vec = embedder.encode([query]).astype("float32")
53
+ _, I = state["index"].search(q_vec, k)
54
+ return [state["documents"][i] for i in I[0]], state
55
+
56
+ def query_llm(context, question):
57
+ prompt = user_template.format(context="\n".join(context), question=question)
58
+ response = requests.post(
59
+ "https://api.groq.com/openai/v1/chat/completions",
60
+ headers={"Authorization": f"Bearer {GROQ_API_KEY}"},
61
+ json={
62
+ "model": MODEL,
63
+ "messages": [
64
+ {"role": "system", "content": system_template},
65
+ {"role": "user", "content": prompt}
66
+ ],
67
+ "temperature": 0.3
68
+ }
69
+ )
70
+ if response.status_code == 200:
71
+ return response.json()["choices"][0]["message"]["content"]
72
+ else:
73
+ return f"❌ Error: {response.json()}"
74
+
75
+ def upload_file(file):
76
+ text = extract_text(file)
77
+ if not text.strip():
78
+ return "⚠️ File is empty or unreadable.", None
79
+ chunks = chunk_text(text)
80
+ vectors = embed_chunks(chunks)
81
+ state = store_faiss(chunks, vectors)
82
+ return "βœ… Document processed!", state
83
+
84
+ def ask_question(question, state):
85
+ if not question.strip():
86
+ return "❗ Please enter a question.", state
87
+ context, state = get_context(question, state)
88
+ if isinstance(context, str): # error message
89
+ return context, state
90
+ return query_llm(context, question), state
91
+
92
+ # πŸŽ›οΈ Gradio Interface
93
+ with gr.Blocks() as demo:
94
+ gr.Markdown("# πŸ“˜ Study Supervisor Chatbot\nUpload a document and ask questions about it.")
95
+
96
+ state = gr.State()
97
+
98
+ with gr.Row():
99
+ file = gr.File(file_types=[".pdf", ".docx", ".doc"], label="πŸ“‚ Upload Document")
100
+ upload_btn = gr.Button("πŸ“€ Upload and Process")
101
+
102
+ status = gr.Textbox(label="Status", interactive=False)
103
+ question = gr.Textbox(label="❓ Ask a Question")
104
+ answer = gr.Textbox(label="πŸ“š Answer", lines=8)
105
+
106
+ upload_btn.click(upload_file, inputs=file, outputs=[status, state])
107
+ question.submit(ask_question, inputs=[question, state], outputs=[answer, state])
108
+
109
+ demo.launch()