ST-THOMAS-OF-AQUINAS commited on
Commit
9529ffe
Β·
verified Β·
1 Parent(s): 867a3c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -161
app.py CHANGED
@@ -1,4 +1,5 @@
1
- # πŸŽ“ AI Auto-Grader with Groq API (Multilingual: English + Kiswahili)
 
2
 
3
  import gradio as gr
4
  import PyPDF2
@@ -10,27 +11,33 @@ import time
10
  import os
11
  from sentence_transformers import SentenceTransformer
12
  import faiss
 
13
 
14
- # ─────────────────────────────────────────
15
  # Groq API Configuration
16
- # ─────────────────────────────────────────
17
  GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
18
- GROQ_MODEL = "openai/gpt-oss-120b"
 
 
19
  GROQ_API_KEY = os.getenv("GROQ_API_KEY", "").strip()
 
 
20
 
21
- # ─────────────────────────────────────────
22
- # Embedding Model (MULTILINGUAL)
23
- # ─────────────────────────────────────────
24
- embed_model = SentenceTransformer("paraphrase-multilingual-mpnet-base-v2")
25
 
26
  vector_store = None
27
  chunks_store = None
 
28
  TOP_K = 3
29
  MAX_MARKS = 4
30
 
31
- # ─────────────────────────────────────────
32
  # Text Cleaner
33
- # ─────────────────────────────────────────
34
  def clean_text(text):
35
  if not text:
36
  return ""
@@ -38,170 +45,189 @@ def clean_text(text):
38
  text = re.sub(r'\s+', ' ', text)
39
  return text.strip()
40
 
41
- # ─────────────────────────────────────────
42
  # PDF Loader
43
- # ─────────────────────────────────────────
44
  def load_pdf(file_path):
45
- reader = PyPDF2.PdfReader(file_path)
46
- return [clean_text(p.extract_text()) for p in reader.pages]
47
-
48
- # ─────────────────────────────────────────
49
- # REGEX (ENGLISH + KISWAHILI)
50
- # ─────────────────────────────────────────
51
- QA_PATTERN = re.compile(
52
- r"(?:Question|Swali):\s*(.+?)\s*(?:Answer|Jibu):\s*(.+?)(?=(?:Question|Swali):|$)",
53
- re.DOTALL | re.IGNORECASE
54
- )
55
-
56
- # ─────────────────────────────────────────
57
- # Chunk Text
58
- # ─────────────────────────────────────────
59
- def chunk_text(pages):
60
- text = " ".join(pages)
61
- matches = QA_PATTERN.findall(text)
62
- return [f"Question: {q.strip()} Answer: {a.strip()}" for q, a in matches]
63
-
64
- # ─────────────────────────────────────────
65
  # Vectorize Marking Scheme
66
- # ─────────────────────────────────────────
67
- def vectorize_pdf(file):
68
- global vector_store, chunks_store
69
-
70
- if file is None:
71
- return {"error": "Upload PDF first"}
72
-
73
- pages = load_pdf(file)
74
- chunks = chunk_text(pages)
75
-
76
- if not chunks:
77
- return {"error": "No Q&A found. Check format."}
78
-
79
- embeddings = embed_model.encode(chunks, convert_to_numpy=True)
80
-
81
- vector_store = faiss.IndexFlatL2(embeddings.shape[1])
82
- vector_store.add(embeddings)
83
-
84
- chunks_store = chunks
85
-
86
- return {
87
- "status": "βœ… Ready",
88
- "chunks": len(chunks),
89
- "preview": chunks[:3]
90
- }
91
-
92
- # ─────────────────────────────────────────
93
- # Parse Student Answers
94
- # ─────────────────────────────────────────
95
- def parse_student_pdf_qna(file):
96
- pages = load_pdf(file)
97
- text = " ".join(pages)
98
- matches = QA_PATTERN.findall(text)
99
- return [(q.strip(), a.strip()) for q, a in matches]
100
-
101
- # ─────────────────────────────────────────
102
- # Call Groq
103
- # ─────────────────────────────────────────
104
- def call_groq(prompt):
105
- headers = {
106
- "Authorization": f"Bearer {GROQ_API_KEY}",
107
- "Content-Type": "application/json"
108
- }
109
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  payload = {
111
- "model": GROQ_MODEL,
112
  "messages": [{"role": "user", "content": prompt}],
113
  "temperature": 0.1,
114
- "response_format": {"type": "json_object"}
 
115
  }
116
 
117
- res = requests.post(GROQ_API_URL, headers=headers, json=payload)
118
- content = res.json()["choices"][0]["message"]["content"]
119
-
120
- if "```" in content:
121
- content = content.split("```")[1]
122
-
123
- return json.loads(content)
124
-
125
- # ─────────────────────────────────────────
126
- # MAIN GRADING FUNCTION
127
- # ─────────────────────────────────────────
128
- def grade(student_pdf):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
- if vector_store is None:
131
- return "❌ Upload marking scheme first"
 
132
 
133
- qas = parse_student_pdf_qna(student_pdf)
134
  results = []
135
- total = 0
136
-
137
- for i, (q, a) in enumerate(qas, 1):
138
-
139
- query = f"Question: {q} Answer: {a}"
140
- vec = embed_model.encode([query], convert_to_numpy=True)
141
-
142
- _, idx = vector_store.search(vec, TOP_K)
143
- context = "\n".join([chunks_store[j] for j in idx[0]])
144
 
145
- # πŸ”₯ BILINGUAL PROMPT
 
 
 
146
  prompt = f"""
147
- Maelekezo / Instruction:
148
- Wewe ni mtahini wa kitaifa. Linganisha jibu la mwanafunzi na mwongozo wa alama.
149
- Toa alama kwa usahihi. Ruhusu alama za sehemu.
150
-
151
- You are a national exam marker. Compare answer with marking scheme and grade fairly.
152
-
153
- Swali / Question:
154
- {q}
155
-
156
- Jibu la mwanafunzi / Student Answer:
157
- {a}
158
-
159
- Mwongozo / Marking Scheme:
160
- {context}
161
-
162
- Alama za juu / Maximum Marks: {MAX_MARKS}
163
-
164
- Toa JSON:
165
- {{
166
- "score": number,
167
- "rationale": "maelezo"
168
- }}
169
- """
170
-
171
- result = call_groq(prompt)
172
-
173
- score = float(result.get("score", 0))
174
- total += score
175
-
176
- results.append(f"""
177
- Q{i}: {q}
178
  Answer: {a}
179
- Score: {score}/{MAX_MARKS}
180
- Reason: {result.get("rationale")}
181
- """)
182
-
183
- return f"""
184
- TOTAL: {total}/{len(qas)*MAX_MARKS}
185
-
186
- """ + "\n".join(results)
187
-
188
- # ─────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  # Gradio UI
190
- # ─────────────────────────────────────────
191
  with gr.Blocks() as demo:
192
- gr.Markdown("## πŸŽ“ Multilingual AI Auto-Grader (English + Kiswahili)")
193
-
194
- ms_file = gr.File(label="Marking Scheme PDF")
195
- st_file = gr.File(label="Student Answers PDF")
196
-
197
- btn1 = gr.Button("Build Index")
198
- btn2 = gr.Button("Grade")
199
-
200
- out1 = gr.JSON()
201
- out2 = gr.Textbox(lines=20)
202
-
203
- btn1.click(vectorize_pdf, ms_file, out1)
204
- btn2.click(grade, st_file, out2)
205
-
206
- if __name__ == "__main__":
207
- demo.launch()
 
 
1
+ # πŸŽ“ AI Auto-Grader with Language-Aware Rationale
2
+ # Supports English & Kiswahili, dynamic Groq model selection
3
 
4
  import gradio as gr
5
  import PyPDF2
 
11
  import os
12
  from sentence_transformers import SentenceTransformer
13
  import faiss
14
+ from langdetect import detect # to detect language
15
 
16
+ # ─────────────────────────────────────────────────────────────
17
  # Groq API Configuration
18
+ # ─────────────────────────────────────────────────────────────
19
  GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions"
20
+ DEFAULT_MODEL = "llama-3.3-70b-versatile"
21
+ KISWAHILI_MODEL = "qwen/qwen3-32b"
22
+
23
  GROQ_API_KEY = os.getenv("GROQ_API_KEY", "").strip()
24
+ if not GROQ_API_KEY:
25
+ print("⚠️ GROQ_API_KEY not set. Add it in Space Secrets.")
26
 
27
+ # ─────────────────────────────────────────────────────────────
28
+ # Embedding Model & Global Storage
29
+ # ─────────────────────────────────────────────────────────────
30
+ embed_model = SentenceTransformer("all-mpnet-base-v2")
31
 
32
  vector_store = None
33
  chunks_store = None
34
+ embeddings_store = None
35
  TOP_K = 3
36
  MAX_MARKS = 4
37
 
38
+ # ─────────────────────────────────────────────────────────────
39
  # Text Cleaner
40
+ # ─────────────────────────────────────────────────────────────
41
  def clean_text(text):
42
  if not text:
43
  return ""
 
45
  text = re.sub(r'\s+', ' ', text)
46
  return text.strip()
47
 
48
+ # ─────────────────────────────────────────────────────────────
49
  # PDF Loader
50
+ # ─────────────────────────────────────────────────────────────
51
  def load_pdf(file_path):
52
+ pdf_reader = PyPDF2.PdfReader(file_path)
53
+ text_pages = [clean_text(page.extract_text()) for page in pdf_reader.pages]
54
+ return text_pages
55
+
56
+ # ─────────────────���───────────────────────────────────────────
57
+ # Chunk text by Q&A
58
+ # ─────────────────────────────────────────────────────────────
59
+ def chunk_text(text_pages):
60
+ chunks = []
61
+ text = " ".join(text_pages)
62
+ pattern = re.compile(r"Question:\s*(.+?)\s*Answer:\s*(.+?)(?=Question:|$)", re.DOTALL | re.IGNORECASE)
63
+ matches = pattern.findall(text)
64
+ for q, a in matches:
65
+ chunks.append(f"Question: {q.strip()} Answer: {a.strip()}")
66
+ return chunks
67
+
68
+ # ─────────────────────────────────────────────────────────────
 
 
 
69
  # Vectorize Marking Scheme
70
+ # ─────────────────────────────────────────────────────────────
71
+ def vectorize_pdf(marking_scheme_file):
72
+ global vector_store, chunks_store, embeddings_store
73
+ if marking_scheme_file is None:
74
+ return {"error": "Please upload a PDF."}
75
+ try:
76
+ pages = load_pdf(marking_scheme_file)
77
+ chunks = chunk_text(pages)
78
+ if not chunks:
79
+ return {"error": "No Q&A found in PDF."}
80
+ embeddings = embed_model.encode(chunks, convert_to_numpy=True)
81
+ vector_store = faiss.IndexFlatL2(embeddings.shape[1])
82
+ vector_store.add(embeddings)
83
+ chunks_store = chunks
84
+ embeddings_store = embeddings
85
+ preview = [{"id": i+1, "preview": chunk[:120]+"..." if len(chunk)>120 else chunk} for i, chunk in enumerate(chunks[:5])]
86
+ return {"status": "βœ… Success", "chunks_found": len(chunks), "embedding_dim": embeddings.shape[1], "preview": preview}
87
+ except Exception as e:
88
+ return {"error": str(e)}
89
+
90
+ # ─────────────────────────────────────────────────────────────
91
+ # Parse Student PDF
92
+ # ─────────────────────────────────────────────────────────────
93
+ def parse_student_pdf_qna(student_pdf_file):
94
+ if student_pdf_file is None:
95
+ return []
96
+ try:
97
+ pages = load_pdf(student_pdf_file)
98
+ text = " ".join(pages)
99
+ pattern = re.compile(r"Question:\s*(.+?)\s*Answer:\s*(.+?)(?=Question:|$)", re.DOTALL | re.IGNORECASE)
100
+ qas = pattern.findall(text)
101
+ return [(q.strip(), a.strip()) for q, a in qas if q.strip() and a.strip()]
102
+ except Exception as e:
103
+ print(f"Error parsing student PDF: {e}")
104
+ return []
105
+
106
+ # ─────────────────────────────────────────────────────────────
107
+ # Detect language
108
+ # ─────────────────────────────────────────────────────────────
109
+ def detect_language(text):
110
+ try:
111
+ lang = detect(text)
112
+ if lang.startswith("sw"):
113
+ return "sw"
114
+ return "en"
115
+ except:
116
+ return "en"
117
+
118
+ # ────────��────────────────────────────────────────────────────
119
+ # Call Groq API with language-aware prompt
120
+ # ─────────────────────────────────────────────────────────────
121
+ def call_groq(prompt, question_text, max_retries=2):
122
+ if not GROQ_API_KEY:
123
+ return {"success": False, "score": 0, "rationale": "❌ GROQ_API_KEY not configured."}
124
+
125
+ # Select model based on language
126
+ lang = detect_language(question_text)
127
+ model = KISWAHILI_MODEL if lang=="sw" else DEFAULT_MODEL
128
+
129
+ headers = {"Authorization": f"Bearer {GROQ_API_KEY}", "Content-Type": "application/json"}
130
  payload = {
131
+ "model": model,
132
  "messages": [{"role": "user", "content": prompt}],
133
  "temperature": 0.1,
134
+ "response_format": {"type": "json_object"},
135
+ "max_tokens": 500
136
  }
137
 
138
+ for attempt in range(max_retries+1):
139
+ try:
140
+ response = requests.post(GROQ_API_URL, headers=headers, json=payload, timeout=45)
141
+ response.raise_for_status()
142
+ content = response.json()['choices'][0]['message']['content'].strip()
143
+ if "```json" in content:
144
+ content = content.split("```json")[1].split("```")[0].strip()
145
+ elif "```" in content:
146
+ content = content.split("```")[1].split("```")[0].strip()
147
+ grading = json.loads(content)
148
+ return {"success": True, "score": grading["score"], "rationale": grading["rationale"]}
149
+ except Exception as e:
150
+ if attempt < max_retries:
151
+ time.sleep(1.5)
152
+ else:
153
+ return {"success": False, "score": 0, "rationale": f"❌ {str(e)}"}
154
+
155
+ # ─────────────────────────────────────────────────────────────
156
+ # Grade student answers
157
+ # ─────────────────────────────────────────────────────────────
158
+ def grade_student_answers(student_pdf_file):
159
+ global vector_store, chunks_store
160
+ if vector_store is None or chunks_store is None:
161
+ return "❌ Upload & vectorize marking scheme first."
162
+ if student_pdf_file is None:
163
+ return "❌ Upload student PDF."
164
 
165
+ qas = parse_student_pdf_qna(student_pdf_file)
166
+ if not qas:
167
+ return "❌ No Q&A found in student PDF."
168
 
 
169
  results = []
170
+ total_score = 0
 
 
 
 
 
 
 
 
171
 
172
+ for idx, (q, a) in enumerate(qas, 1):
173
+ query_vec = embed_model.encode([f"Question: {q} Answer: {a}"], convert_to_numpy=True)
174
+ distances, indices = vector_store.search(query_vec, min(TOP_K, len(chunks_store)))
175
+ context = "\n".join([chunks_store[i] for i in indices[0] if i<len(chunks_store)])
176
  prompt = f"""
177
+ Instruction: You are a national exam marker.
178
+ Use the same language as the question to write the rationale.
179
+ Compare the student's answer with the marking scheme context and award marks. Output in JSON.
180
+ Question: {q}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  Answer: {a}
182
+ Marking Scheme Context: {context}
183
+ Maximum Marks: {MAX_MARKS}
184
+ Output Format:
185
+ {{"score": <numeric>, "rationale": "<explanation>"}}
186
+ """
187
+ grading = call_groq(prompt, q)
188
+ score = grading.get("score", 0)
189
+ rationale = grading.get("rationale", "")
190
+ results.append({"q_num": idx, "question": q, "answer": a, "score": score, "max": MAX_MARKS, "rationale": rationale, "ok": grading.get("success", False)})
191
+ try: total_score += float(score)
192
+ except: pass
193
+ time.sleep(0.3)
194
+
195
+ n = len(results)
196
+ summary = {"total": n, "score": round(total_score,2), "max": n*MAX_MARKS, "pct": round((total_score/(n*MAX_MARKS))*100,1) if n>0 else 0}
197
+ return {"summary": summary, "results": results}
198
+
199
+ # ─────────────────────────────────────────────────────────────
200
+ # Format output HTML
201
+ # ─────────────────────────────────────────────────────────────
202
+ def format_output(data):
203
+ if isinstance(data,str):
204
+ return f"<div style='color:red'>{data}</div>"
205
+ s = data["summary"]
206
+ r = data["results"]
207
+ html = f"<h3>Summary</h3>Total Questions: {s['total']} | Score: {s['score']}/{s['max']} | Percentage: {s['pct']}%<hr>"
208
+ for item in r:
209
+ icon = "βœ…" if item["ok"] else "❌"
210
+ html += f"<p>{icon} Q{item['q_num']}: {item['question']}<br>Answer: {item['answer']}<br>Score: {item['score']}/{item['max']}<br>Rationale: {item['rationale']}</p><hr>"
211
+ return html
212
+
213
+ # ─────────────────────────────────────────────────────────────
214
  # Gradio UI
215
+ # ─────────────────────────────────────────────────────────────
216
  with gr.Blocks() as demo:
217
+ gr.Markdown("## πŸŽ“ AI Auto-Grader (English + Kiswahili)")
218
+ marking_file = gr.File(label="πŸ“„ Marking Scheme PDF", file_types=[".pdf"])
219
+ vector_status = gr.JSON(label="Vectorization Status")
220
+ vector_btn = gr.Button("Build Index")
221
+
222
+ student_file = gr.File(label="πŸ“ Student Answers PDF", file_types=[".pdf"])
223
+ grade_btn = gr.Button("Grade Answers", interactive=False)
224
+ output = gr.HTML(label="πŸ“‹ Results")
225
+
226
+ vector_btn.click(vectorize_pdf, inputs=[marking_file], outputs=[vector_status]).then(
227
+ lambda s: gr.update(interactive=True) if s.get("status")=="βœ… Success" else gr.update(interactive=False),
228
+ inputs=[vector_status], outputs=[grade_btn]
229
+ )
230
+ grade_btn.click(lambda f: format_output(grade_student_answers(f)), inputs=[student_file], outputs=[output])
231
+
232
+ if __name__=="__main__":
233
+ demo.launch(server_name="0.0.0.0", server_port=7860, show_error=True)