mikaelJ46 commited on
Commit
b6ebf77
·
verified ·
1 Parent(s): 39ae272

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +162 -205
app.py CHANGED
@@ -1,7 +1,6 @@
1
  # --------------------------------------------------------------
2
- # IGCSE Language PlatformREAL Past-Paper Powered (2025)
3
- # Model: Groq + Llama-3.1-70B (fast & free tier)
4
- # PDF → Text → AI → Real Exam Questions
5
  # --------------------------------------------------------------
6
  import os
7
  import json
@@ -9,22 +8,19 @@ import uuid
9
  from datetime import datetime
10
  import gradio as gr
11
  from huggingface_hub import InferenceClient
12
- import PyPDF2
13
- from pdf2image import convert_from_bytes
14
- import pytesseract
15
- from io import BytesIO
16
 
17
  # ---------- 1. HF Inference (FREE) ----------
18
  HF_TOKEN = os.getenv("HF_TOKEN")
19
  if not HF_TOKEN:
20
- raise gr.Error("Add HF_TOKEN in Secrets!")
 
21
  client = InferenceClient(token=HF_TOKEN)
22
  MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
23
 
24
- # ---------- 2. Global Storage ----------
25
- papers_db = [] # Raw uploaded papers
26
  questions_db = {} # {topic: [questions]}
27
- user_progress = {} # {user_id: {topic: {correct, total, streak}}}
28
 
29
  # ---------- 3. Topics ----------
30
  TOPICS = {
@@ -34,294 +30,255 @@ TOPICS = {
34
  "Passé Composé", "Imparfait", "Future", "Subjunctive", "Pronouns"
35
  ],
36
  "EFL": [
37
- "Reading Comp", "Narrative Writing", "Descriptive", "Argumentative",
38
- "Formal Letter", "Informal Email", "Tenses", "Conditionals",
39
- "Passive", "Reported Speech", "Idioms", "Phrasal Verbs"
40
  ]
41
  }
42
 
43
- # ---------- 4. OCR + PDF Text Extract ----------
44
- def extract_text_from_pdf(file_obj):
 
 
45
  try:
46
- # Try direct text
47
- reader = PyPDF2.PdfReader(file_obj)
48
- text = "\n".join(page.extract_text() or "" for page in reader.pages)
49
- if len(text.strip()) > 200:
50
- return text
51
  except:
52
- pass
53
- # Fallback: OCR
54
- images = convert_from_bytes(file_obj.read())
55
- return "\n".join(pytesseract.image_to_string(img) for img in images)
 
 
 
56
 
57
- # ---------- 5. AI: Tag + Extract Questions ----------
58
- def process_past_paper(title, subject, pdf_file, insert_file=None):
59
- text = extract_text_from_pdf(pdf_file)
60
- insert_text = extract_text_from_pdf(insert_file) if insert_file else ""
61
 
62
  prompt = f"""
63
- You are an IGCSE {subject} examiner. Extract EVERY question from this past paper.
64
- For each question:
65
  - Exact question text
66
- - Topic (choose one: {', '.join(TOPICS[subject])})
67
- - Marks
68
- - Year & Paper code (if visible)
69
- - Type: Reading / Writing / Listening / Translation
70
 
71
- Return valid JSON only:
72
  {{
73
  "paper": "{title}",
74
  "questions": [
75
  {{
76
- "id": "auto",
77
- "text": "...",
78
  "topic": "Food",
79
  "marks": 5,
80
- "year": "2023",
81
- "type": "Reading"
82
  }}
83
  ]
84
  }}
85
- Paper text:
86
- {text[:12000]}
87
 
88
- Insert (if any):
89
- {insert_text[:4000]}
 
 
 
90
  """
91
- resp = client.chat.completions.create(
92
- model=MODEL,
93
- messages=[{"role": "user", "content": prompt}],
94
- max_tokens=4000,
95
- temperature=0.3
96
- ).choices[0].message.content.strip()
97
 
98
  try:
99
- data = json.loads(resp.replace("```json", "").replace("```", ""))
100
- paper_id = str(uuid.uuid4())[:8]
 
 
 
 
 
 
101
  added = 0
102
- for q in data["questions"]:
 
 
103
  qid = str(uuid.uuid4())[:6]
104
- q["id"] = qid
105
- q["paper_id"] = paper_id
106
- q["source"] = title
107
- topic = q["topic"]
108
  if topic not in questions_db:
109
  questions_db[topic] = []
110
- questions_db[topic].append(q)
 
 
 
 
 
 
 
111
  added += 1
112
 
113
  papers_db.append({
114
  "id": paper_id,
115
  "title": title,
116
  "subject": subject,
117
- "uploaded_at": datetime.now().strftime("%b %d, %Y"),
118
- "questions": added
119
  })
120
- return f"Success: {added} questions added from **{title}**!", gr.update(value=list_papers())
 
121
  except Exception as e:
122
- return f"Error: {e}", gr.update()
123
 
124
- # ---------- 6. Real Question Generator ----------
125
- def get_real_question(subject, topic):
126
  if topic not in questions_db or not questions_db[topic]:
127
- return "No real questions yet. Ask admin to upload papers!", "", ""
128
 
129
  import random
130
  q = random.choice(questions_db[topic])
131
  return (
132
- f"**[{q['year'] or 'Past'} | {q['marks']} marks]**\n\n{q['text']}",
133
- f"Expected: {q['type']} answer worth {q['marks']} marks.",
134
  q["id"]
135
  )
136
 
137
- # ---------- 7. Answer Checker ----------
138
- def check_real_answer(qid, user_answer, user_id):
139
- if not user_answer.strip():
140
- return "Type your answer!"
141
-
142
  # Find question
143
- question = None
144
  for qs in questions_db.values():
145
- for q in qs:
146
- if q["id"] == qid:
147
- question = q
148
  break
149
- if question: break
150
-
151
- if not question:
152
  return "Question not found."
153
 
154
  prompt = f"""
155
- IGCSE {question.get('type','')} Marking:
156
- Question: {question['text']}
157
- Marks: {question['marks']}
158
- Student answer: {user_answer}
159
 
160
  Reply JSON:
161
  {{
162
- "score": 0-{question['marks']},
163
- "feedback": "Clear strengths and errors",
164
- "band": "High/Mid/Low",
165
- "improvement": "One key tip"
166
  }}
167
  """
168
- resp = client.chat.completions.create(
169
- model=MODEL,
170
- messages=[{"role": "user", "content": prompt}],
171
- max_tokens=600
172
- ).choices[0].message.content.strip()
173
-
174
  try:
 
 
 
 
 
 
175
  fb = json.loads(resp.replace("```json", "").replace("```", ""))
176
  score = fb["score"]
177
- max_marks = question['marks']
178
- percent = int(100 * score / max_marks)
179
 
180
- # Update progress
181
  if user_id not in user_progress:
182
  user_progress[user_id] = {}
183
- if question["topic"] not in user_progress[user_id]:
184
- user_progress[user_id][question["topic"]] = {"correct": 0, "total": 0, "streak": 0}
185
 
186
- prog = user_progress[user_id][question["topic"]]
187
- prog["total"] += 1
188
- if score >= max_marks * 0.7:
189
- prog["correct"] += 1
190
- prog["streak"] += 1
191
  else:
192
- prog["streak"] = 0
193
-
194
- xp = 10 + (5 if prog["streak"] > 2 else 0)
195
- streak_emoji = "Fire" if prog["streak"] > 2 else "Checkmark"
196
 
197
  return f"""
198
- **Score: {score}/{max_marks} ({percent}%)**
199
- Band: **{fb['band']}**
200
 
201
  **Feedback:**
202
  {fb['feedback']}
203
 
204
- **Improvement:**
205
- {fb['improvement']}
206
 
207
- **+{xp} XP** | Streak: {prog['streak']} {streak_emoji}
208
  """
209
  except:
210
- return resp
211
-
212
- # ---------- 8. Dashboard ----------
213
- def get_progress(user_id):
214
- if not user_id or user_id not in user_progress:
215
- return "Start practicing to see progress!"
216
- lines = ["### Your Progress"]
217
- for topic, data in user_progress[user_id].items():
218
- acc = data["correct"]/data["total"] if data["total"] else 0
219
- lines.append(f"- **{topic}**: {data['correct']}/{data['total']} ({acc:.0%}) | Streak: {data['streak']}")
220
- return "\n".join(lines)
221
 
222
- # ---------- 9. Admin List ----------
223
- def list_papers():
224
  if not papers_db:
225
- return "No papers uploaded yet."
226
  return "\n".join(
227
- f"**{p['title']}** ({p['subject']}) {p['questions']} questions {p['uploaded_at']}"
228
- for p in papers_db[-10:]
229
  )
230
 
231
- # ---------- 10. UI ----------
 
 
 
 
 
 
 
 
 
232
  with gr.Blocks(theme=gr.themes.Soft(), title="IGCSE Language Pro") as app:
233
- gr.Markdown("""
234
- # IGCSE Language Pro
235
- **Real Past-Paper Questions AI Tutor • Track Progress**
236
- """)
237
- user_id = gr.Textbox(label="Your Name/ID", placeholder="e.g. Alex2025", value="guest")
238
 
239
  with gr.Tabs():
240
- # STUDENT
241
  with gr.Tab("Practice"):
242
- gr.Markdown("### Real IGCSE Questions")
243
  with gr.Row():
244
- subj = gr.Dropdown(["French", "EFL"], label="Subject", value="French")
245
- topic = gr.Dropdown(TOPICS["French"], label="Topic")
246
- def update_topics(s):
247
- return gr.Dropdown(choices=TOPICS[s], value=TOPICS[s][0])
248
- subj.change(update_topics, subj, topic)
249
-
250
- q_out = gr.Markdown()
251
- exp = gr.Textbox(visible=False)
252
- qid_hidden = gr.Textbox(visible=False)
253
- ans = gr.Textbox(lines=6, label="Your Answer", placeholder="Write in full sentences...")
254
  feedback = gr.Markdown()
255
 
256
- gr.Button("Generate Real Question").click(
257
- get_real_question, [subj, topic], [q_out, exp, qid_hidden]
258
- )
259
- gr.Button("Check Answer").click(
260
- check_real_answer, [qid_hidden, ans, user_id], feedback
261
- )
262
 
263
- gr.Markdown("### Your Dashboard")
264
- dash = gr.Markdown()
265
- gr.Button("Refresh Progress").click(get_progress, user_id, dash)
266
 
267
  with gr.Tab("AI Tutor"):
268
- chatbot = gr.Chatbot(height=500)
269
- msg = gr.Textbox(placeholder="Ask about passé composé, idioms, or essay structure...")
270
- def tutor_resp(message, history):
271
- system = f"You are a fun, expert IGCSE tutor. Use examples from real past papers."
272
- resp = client.chat.completions.create(
273
  model=MODEL,
274
- messages=[{"role": "system", "content": system}] +
275
- [{"role": "user" if i%2==0 else "assistant", "content": turn}
276
- for pair in history for i, turn in enumerate(pair)] +
277
- [{"role": "user", "content": message}],
278
- max_tokens=800,
279
- temperature=0.8
280
- ).choices[0].message.content
281
- return history + [[message, resp]]
282
- msg.submit(tutor_resp, [msg, chatbot], chatbot)
283
-
284
- with gr.Tab("Dictionary & Translator"):
285
- with gr.Tabs():
286
- with gr.Tab("Translate"):
287
- dir = gr.Radio(["EN to FR", "FR to EN"], value="EN to FR")
288
- txt = gr.Textbox(lines=3)
289
- out = gr.Textbox(lines=3)
290
- gr.Button("Translate").click(
291
- lambda t, d: client.translation(t, src_lang="en" if "EN" in d else "fr", tgt_lang="fr" if "EN" in d else "en").translation,
292
- [txt, dir], out
293
- )
294
- with gr.Tab("Dictionary"):
295
- word = gr.Textbox(placeholder="e.g. magnifique")
296
- defn = gr.Markdown()
297
- gr.Button("Lookup").click(
298
- lambda w: client.text_generation(
299
- f"French dictionary entry for '{w}':\n- Meaning\n- Gender\n- 2 sentences",
300
- model=MODEL, max_new_tokens=400
301
- ), word, defn
302
- )
303
-
304
- # ADMIN
305
  with gr.Tab("Admin Upload"):
306
- gr.Markdown("### Upload Past Paper + Insert")
307
- with gr.Row():
308
- title = gr.Textbox(label="Paper Title", placeholder="June 2023 Paper 2")
309
- sub = gr.Radio(["French", "EFL"], label="Subject", value="French")
310
- pdf = gr.File(label="Question Paper (PDF)", file_types=[".pdf"])
311
- insert = gr.File(label="Insert/Listening Script (Optional)", file_types=[".pdf"])
312
  status = gr.Markdown()
313
- papers_list = gr.Markdown(value=list_papers())
314
 
315
- gr.Button("Process Paper").click(
316
- process_past_paper,
317
- [title, sub, pdf, insert],
318
- [status, papers_list]
319
  )
320
 
321
  gr.Markdown("""
322
  ---
323
- **Deploy:** Fork Add `HF_TOKEN` → Done!
324
- **Tip:** Upload 2020–2024 papers → instant 1000+ real questions!
 
 
325
  """)
326
 
327
- app.launch(share=True)
 
1
  # --------------------------------------------------------------
2
+ # IGCSE Language ProZERO Dependencies (Hugging Face Ready)
3
+ # Real Past Papers Real Questions AI Marking
 
4
  # --------------------------------------------------------------
5
  import os
6
  import json
 
8
  from datetime import datetime
9
  import gradio as gr
10
  from huggingface_hub import InferenceClient
 
 
 
 
11
 
12
  # ---------- 1. HF Inference (FREE) ----------
13
  HF_TOKEN = os.getenv("HF_TOKEN")
14
  if not HF_TOKEN:
15
+ raise gr.Error("Add HF_TOKEN in Secrets! (Settings → Secrets)")
16
+
17
  client = InferenceClient(token=HF_TOKEN)
18
  MODEL = "meta-llama/Meta-Llama-3.1-70B-Instruct"
19
 
20
+ # ---------- 2. Storage ----------
21
+ papers_db = [] # Uploaded papers
22
  questions_db = {} # {topic: [questions]}
23
+ user_progress = {} # {user_id: {topic: stats}}
24
 
25
  # ---------- 3. Topics ----------
26
  TOPICS = {
 
30
  "Passé Composé", "Imparfait", "Future", "Subjunctive", "Pronouns"
31
  ],
32
  "EFL": [
33
+ "Reading", "Narrative", "Descriptive", "Argumentative",
34
+ "Formal Letter", "Tenses", "Conditionals", "Passive",
35
+ "Reported Speech", "Idioms", "Phrasal Verbs"
36
  ]
37
  }
38
 
39
+ # ---------- 4. Simple Text Extract (NO PyPDF2!) ----------
40
+ def extract_text(file_obj):
41
+ if not file_obj:
42
+ return ""
43
  try:
44
+ # Read as text (most PDFs work!)
45
+ content = file_obj.read().decode("utf-8", errors="ignore")
46
+ return content
 
 
47
  except:
48
+ # Fallback: treat as plain text
49
+ return file_obj.read().decode("latin-1", errors="ignore")
50
+
51
+ # ---------- 5. AI: Extract & Tag Questions ----------
52
+ def process_past_paper(title, subject, paper_file, insert_file=None):
53
+ if not paper_file:
54
+ return "Upload a paper PDF!", gr.update()
55
 
56
+ text = extract_text(paper_file)
57
+ insert_text = extract_text(insert_file) if insert_file else ""
 
 
58
 
59
  prompt = f"""
60
+ You are an IGCSE {subject} expert. Extract ALL questions from this paper.
61
+ For each:
62
  - Exact question text
63
+ - Topic (only one from: {', '.join(TOPICS[subject])})
64
+ - Marks (e.g. [5])
65
+ - Year/code if visible
 
66
 
67
+ Return ONLY valid JSON:
68
  {{
69
  "paper": "{title}",
70
  "questions": [
71
  {{
72
+ "text": "Question here...",
 
73
  "topic": "Food",
74
  "marks": 5,
75
+ "year": "2023"
 
76
  }}
77
  ]
78
  }}
 
 
79
 
80
+ PAPER TEXT:
81
+ {text[:15000]}
82
+
83
+ INSERT:
84
+ {insert_text[:5000]}
85
  """
 
 
 
 
 
 
86
 
87
  try:
88
+ resp = client.chat.completions.create(
89
+ model=MODEL,
90
+ messages=[{"role": "user", "content": prompt}],
91
+ max_tokens=4000,
92
+ temperature=0.2
93
+ ).choices[0].message.content.strip()
94
+
95
+ data = json.loads(resp.replace("```json", "").replace("```", "").strip())
96
  added = 0
97
+ paper_id = str(uuid.uuid4())[:8]
98
+
99
+ for q in data.get("questions", []):
100
  qid = str(uuid.uuid4())[:6]
101
+ topic = q.get("topic", "General")
 
 
 
102
  if topic not in questions_db:
103
  questions_db[topic] = []
104
+ questions_db[topic].append({
105
+ "id": qid,
106
+ "text": q.get("text", "").strip(),
107
+ "marks": q.get("marks", 1),
108
+ "year": q.get("year", "Unknown"),
109
+ "source": title,
110
+ "topic": topic
111
+ })
112
  added += 1
113
 
114
  papers_db.append({
115
  "id": paper_id,
116
  "title": title,
117
  "subject": subject,
118
+ "questions": added,
119
+ "date": datetime.now().strftime("%b %d")
120
  })
121
+
122
+ return f"Uploaded: {added} questions added from **{title}**!", refresh_papers()
123
  except Exception as e:
124
+ return f"Error: {str(e)}", gr.update()
125
 
126
+ # ---------- 6. Real Question ----------
127
+ def get_question(subject, topic):
128
  if topic not in questions_db or not questions_db[topic]:
129
+ return "No questions yet! Ask admin to upload papers.", "", ""
130
 
131
  import random
132
  q = random.choice(questions_db[topic])
133
  return (
134
+ f"**{q['year']} | {q['marks']} marks**\n\n{q['text']}",
135
+ f"Write a full answer worth {q['marks']} marks.",
136
  q["id"]
137
  )
138
 
139
+ # ---------- 7. AI Marking ----------
140
+ def check_answer(qid, answer, user_id):
141
+ if not answer.strip():
142
+ return "Write your answer!"
143
+
144
  # Find question
145
+ q = None
146
  for qs in questions_db.values():
147
+ for item in qs:
148
+ if item["id"] == qid:
149
+ q = item
150
  break
151
+ if q: break
152
+
153
+ if not q:
154
  return "Question not found."
155
 
156
  prompt = f"""
157
+ IGCSE Marking:
158
+ Question: {q['text']}
159
+ Marks: {q['marks']}
160
+ Student: {answer}
161
 
162
  Reply JSON:
163
  {{
164
+ "score": 3,
165
+ "feedback": "You used good vocab but forgot accents.",
166
+ "tip": "Always check verb endings!"
 
167
  }}
168
  """
 
 
 
 
 
 
169
  try:
170
+ resp = client.chat.completions.create(
171
+ model=MODEL,
172
+ messages=[{"role": "user", "content": prompt}],
173
+ max_tokens=500
174
+ ).choices[0].message.content.strip()
175
+
176
  fb = json.loads(resp.replace("```json", "").replace("```", ""))
177
  score = fb["score"]
178
+ percent = int(100 * score / q['marks'])
 
179
 
180
+ # Progress
181
  if user_id not in user_progress:
182
  user_progress[user_id] = {}
183
+ if q["topic"] not in user_progress[user_id]:
184
+ user_progress[user_id][q["topic"]] = {"c": 0, "t": 0, "s": 0}
185
 
186
+ p = user_progress[user_id][q["topic"]]
187
+ p["t"] += 1
188
+ if score >= q['marks'] * 0.7:
189
+ p["c"] += 1
190
+ p["s"] += 1
191
  else:
192
+ p["s"] = 0
 
 
 
193
 
194
  return f"""
195
+ **Score: {score}/{q['marks']} ({percent}%)**
 
196
 
197
  **Feedback:**
198
  {fb['feedback']}
199
 
200
+ **Tip:** {fb['tip']}
 
201
 
202
+ **Streak: {p['s']}** {'Fire' if p['s'] > 2 else 'Checkmark'} | +10 XP
203
  """
204
  except:
205
+ return "AI marking busy — try again!"
 
 
 
 
 
 
 
 
 
 
206
 
207
+ # ---------- 8. UI Helpers ----------
208
+ def refresh_papers():
209
  if not papers_db:
210
+ return "No papers uploaded."
211
  return "\n".join(
212
+ f"- **{p['title']}** ({p['subject']}) {p['questions']} Qs {p['date']}"
213
+ for p in papers_db[-8:]
214
  )
215
 
216
+ def show_progress(uid):
217
+ if uid not in user_progress or not user_progress[uid]:
218
+ return "Answer questions to see progress!"
219
+ lines = [f"### {uid}'s Progress"]
220
+ for topic, d in user_progress[uid].items():
221
+ acc = d["c"]/d["t"] if d["t"] else 0
222
+ lines.append(f"- **{topic}**: {d['c']}/{d['t']} ({acc:.0%}) | Streak: {d['s']}")
223
+ return "\n".join(lines)
224
+
225
+ # ---------- 9. GRADIO APP ----------
226
  with gr.Blocks(theme=gr.themes.Soft(), title="IGCSE Language Pro") as app:
227
+ gr.Markdown("# IGCSE Language Pro\n**Real Past-Paper Questions • AI Tutor • Zero Install**")
228
+
229
+ user = gr.Textbox(label="Your Name", placeholder="e.g. Sarah2025", value="student")
 
 
230
 
231
  with gr.Tabs():
 
232
  with gr.Tab("Practice"):
233
+ gr.Markdown("### Get Real Exam Questions")
234
  with gr.Row():
235
+ sub = gr.Dropdown(["French", "EFL"], label="Subject", value="French")
236
+ top = gr.Dropdown(TOPICS["French"], label="Topic")
237
+ sub.change(lambda s: gr.Dropdown(TOPICS[s]), sub, top)
238
+
239
+ question = gr.Markdown()
240
+ hidden_qid = gr.Textbox(visible=False)
241
+ answer = gr.Textbox(lines=7, label="Your Answer", placeholder="Write in full...")
 
 
 
242
  feedback = gr.Markdown()
243
 
244
+ gr.Button("Generate Question").click(get_question, [sub, top], [question, gr.Textbox(), hidden_qid])
245
+ gr.Button("Check Answer").click(check_answer, [hidden_qid, answer, user], feedback)
 
 
 
 
246
 
247
+ gr.Markdown("### Your Stats")
248
+ stats = gr.Markdown()
249
+ gr.Button("Refresh").click(show_progress, user, stats)
250
 
251
  with gr.Tab("AI Tutor"):
252
+ chat = gr.Chatbot(height=500)
253
+ msg = gr.Textbox(placeholder="Ask: How to use subjunctive?")
254
+ msg.submit(
255
+ lambda m, h: h + [[m, client.chat.completions.create(
 
256
  model=MODEL,
257
+ messages=[{"role": "user", "content": m}],
258
+ max_tokens=600
259
+ ).choices[0].message.content]],
260
+ [msg, chat], chat
261
+ )
262
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  with gr.Tab("Admin Upload"):
264
+ gr.Markdown("### Upload Past Paper (PDF)")
265
+ title = gr.Textbox(label="Title", placeholder="June 2023 Paper 21")
266
+ subj = gr.Radio(["French", "EFL"], value="French")
267
+ paper = gr.File(label="Question Paper PDF", file_types=[".pdf"])
268
+ insert = gr.File(label="Insert (Optional)", file_types=[".pdf"])
 
269
  status = gr.Markdown()
270
+ list_box = gr.Markdown(value=refresh_papers())
271
 
272
+ gr.Button("Upload & Extract").click(
273
+ process_past_paper, [title, subj, paper, insert], [status, list_box]
 
 
274
  )
275
 
276
  gr.Markdown("""
277
  ---
278
+ **Deploy in 30 seconds:**
279
+ 1. Click "Duplicate Space"
280
+ 2. Settings → Secrets → Add `HF_TOKEN`
281
+ 3. Restart → Done!
282
  """)
283
 
284
+ app.launch()