Files changed (1) hide show
  1. app.py +373 -430
app.py CHANGED
@@ -1,430 +1,373 @@
1
- import gradio as gr
2
- import os
3
- import threading
4
- import pathlib
5
- import base64
6
- import urllib.parse
7
-
8
- # ──────────────────────────────────────────────
9
- # FILE TEXT EXTRACTION
10
- # ──────────────────────────────────────────────
11
-
12
- SUPPORTED_EXT = (".pdf", ".docx", ".doc", ".txt",
13
- ".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff")
14
-
15
- def extract_text_from_file(filepath: str) -> str:
16
- """Extract plain text from PDF, DOCX, TXT, or image files."""
17
- if not filepath:
18
- return ""
19
- ext = pathlib.Path(filepath).suffix.lower()
20
- try:
21
- # ── PDF ──
22
- if ext == ".pdf":
23
- import fitz # pymupdf
24
- doc = fitz.open(filepath)
25
- return "\n".join(page.get_text() for page in doc).strip()
26
-
27
- # ── Word (.docx / .doc) ──
28
- elif ext in (".docx", ".doc"):
29
- from docx import Document
30
- doc = Document(filepath)
31
- return "\n".join(p.text for p in doc.paragraphs if p.text.strip()).strip()
32
-
33
- # ── Plain text ──
34
- elif ext == ".txt":
35
- with open(filepath, "r", encoding="utf-8", errors="replace") as f:
36
- return f.read().strip()
37
-
38
- # ── Images (OCR via pytesseract) ──
39
- elif ext in (".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff"):
40
- try:
41
- import pytesseract
42
- from PIL import Image
43
- img = Image.open(filepath)
44
- return pytesseract.image_to_string(img).strip()
45
- except Exception as ocr_err:
46
- return (
47
- f"⚠️ OCR failed: {ocr_err}\n"
48
- "Ensure Tesseract-OCR is installed: https://github.com/UB-Mannheim/tesseract/wiki"
49
- )
50
-
51
- else:
52
- return f"⚠️ Unsupported file type: {ext}"
53
-
54
- except Exception as e:
55
- return f"⚠️ Could not extract text: {e}"
56
-
57
-
58
- # ──────────────────────────────────────────────
59
- # MODEL CONFIGURATIONS (all run via transformers pipeline)
60
- # ──────────────────────────────────────────────
61
- #
62
- # Grouped by RAM tier so users can pick what fits their machine.
63
- # Models are downloaded from HF Hub on first use and cached locally.
64
-
65
- MODELS = {
66
- # ── Tier 1: Fast (<2 GB RAM) ───────────────────────────
67
- "⚑ Qwen2.5-0.5B [~1 GB | Fastest]": "Qwen/Qwen2.5-0.5B-Instruct",
68
- "πŸ’« Qwen2.5-1.5B [~2 GB | Fast]": "Qwen/Qwen2.5-1.5B-Instruct",
69
- # ── Tier 2: Balanced (2–4 GB RAM) ──────────────────────
70
- "πŸ”¬ DeepSeek-R1-Distill 1.5B [~2 GB]": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
71
- "🦩 Llama-3.2-1B-Instruct [~2 GB]": "meta-llama/Llama-3.2-1B-Instruct",
72
- "🦩 Llama-3.2-3B-Instruct [~4 GB]": "meta-llama/Llama-3.2-3B-Instruct",
73
- "πŸ’Ά Phi-3-mini-4k [~4 GB | Strong]": "microsoft/Phi-3-mini-4k-instruct",
74
- # ── Tier 3: Quality (4–8 GB RAM) ───────────────────────
75
- "πŸ’Ž Gemma-2-2B-it [~3 GB | Google]": "google/gemma-2-2b-it",
76
- "πŸ”₯ Qwen2.5-3B [~4 GB | Balanced]": "Qwen/Qwen2.5-3B-Instruct",
77
- "πŸ₯‡ Llama-3.1-8B-Instruct [~8 GB]": "meta-llama/Llama-3.1-8B-Instruct",
78
- "πŸ₯‡ Qwen2.5-7B [~8 GB | Best quality]": "Qwen/Qwen2.5-7B-Instruct",
79
- }
80
-
81
- ALL_MODEL_NAMES = list(MODELS.keys())
82
-
83
- # ──────────────────────────────────────────────
84
- # PIPELINE CACHE (lazy-loaded, thread-safe)
85
- # ──────────────────────────────────────────────
86
- _pipeline_cache: dict = {}
87
- _pipeline_lock = threading.Lock()
88
-
89
-
90
- def get_pipeline(model_id: str, hf_token: str = ""):
91
- """Download (on first use) and cache a transformers text-generation pipeline."""
92
- with _pipeline_lock:
93
- if model_id not in _pipeline_cache:
94
- try:
95
- from transformers import pipeline, AutoTokenizer
96
- token = hf_token.strip() if hf_token else None
97
- tok = AutoTokenizer.from_pretrained(model_id, token=token)
98
- pipe = pipeline(
99
- "text-generation",
100
- model=model_id,
101
- tokenizer=tok,
102
- device_map="cpu",
103
- dtype="auto",
104
- trust_remote_code=True,
105
- token=token,
106
- )
107
- # Avoid conflict with max_length=20 default in some models
108
- pipe.model.generation_config.max_length = None
109
- _pipeline_cache[model_id] = pipe
110
- except Exception as e:
111
- return None, str(e)
112
- return _pipeline_cache[model_id], None
113
-
114
-
115
- # ──────────────────────────────────────────────
116
- # INFERENCE
117
- # ──────────────────────────────────────────────
118
-
119
- SYSTEM_MSG = (
120
- "You are an expert educational assistant. "
121
- "Always respond with clean, well-structured Markdown text."
122
- )
123
-
124
-
125
- def ask_llm(model_label: str, prompt: str, hf_token: str = "") -> str:
126
- """Run the prompt through the transformers pipeline for the selected model."""
127
- model_id = MODELS[model_label]
128
- pipe, err = get_pipeline(model_id, hf_token)
129
- if err:
130
- return (
131
- f"❌ **Failed to load `{model_id}`:**\n```\n{err}\n```\n\n"
132
- "*Tip: Check your internet connection or choose a smaller model.*"
133
- )
134
- try:
135
- messages = [
136
- {"role": "system", "content": SYSTEM_MSG},
137
- {"role": "user", "content": prompt},
138
- ]
139
- if pipe is None:
140
- return "❌ **Pipeline error: Pipeline object is None.**"
141
-
142
- # Pass generation params to the call to avoid constructor deprecation
143
- out = pipe(
144
- messages,
145
- max_new_tokens=1024,
146
- pad_token_id=pipe.tokenizer.eos_token_id if (pipe.tokenizer and pipe.tokenizer.eos_token_id is not None) else 50256
147
- )
148
- generated = out[0]["generated_text"]
149
- if isinstance(generated, list):
150
- # Chat-template output β€” last element is the assistant reply
151
- return generated[-1]["content"]
152
- # Plain-string fallback β€” strip the echoed prompt
153
- return generated[len(str(messages)):].strip()
154
- except Exception as e:
155
- return f"❌ **Inference error:**\n```\n{str(e)}\n```"
156
-
157
-
158
- # ──────────────────────────────────────────────
159
- # PROMPTS
160
- # ──────────────────────────────────────────────
161
-
162
- def make_prompts(syllabus: str) -> dict:
163
- return {
164
- "lesson": (
165
- f"Create comprehensive, engaging lesson materials for the following syllabus/topic. "
166
- f"Use clear ## headings, bullet points, bold key terms, and concise explanations "
167
- f"suitable for a student.\n\nSyllabus/Topic:\n{syllabus}"
168
- ),
169
- "qa": (
170
- f"Generate 8 important exam-style questions with detailed model answers based on "
171
- f"this syllabus/topic. Number each Q&A pair clearly.\n\nSyllabus/Topic:\n{syllabus}"
172
- ),
173
- "mcq": (
174
- f"Generate 8 multiple-choice questions based on this syllabus/topic. "
175
- f"Each question must have 4 options (A–D). After all questions, list the correct "
176
- f"answers with a brief explanation.\n\nSyllabus/Topic:\n{syllabus}"
177
- ),
178
- "mindmap": (
179
- f"Create a high-level Flowchart or Mindmap for the following syllabus/topic using Mermaid.js syntax.\n"
180
- f"STRICT RULES:\n"
181
- f"- Output ONLY the mermaid code block (```mermaid ... ```).\n"
182
- f"- Use 'graph TD' (for flowcharts) or 'mindmap' structure.\n"
183
- f"- This will be converted into a static picture, so keep labels clear.\n"
184
- f"- No introductory text, no explanation outside the block.\n"
185
- f"- Avoid special characters in node labels.\n\n"
186
- f"Syllabus/Topic:\n{syllabus}"
187
- ),
188
- "infographic": (
189
- f"Create a highly visual text-based cheat sheet / infographic for this syllabus/topic. "
190
- f"Use emojis, ASCII section dividers, tables, bullet points, and bold highlights "
191
- f"to make it easy to scan, remember, and share.\n\nSyllabus/Topic:\n{syllabus}"
192
- ),
193
- }
194
-
195
-
196
- # ──────────────────────────────────────────────
197
- # MAIN GENERATION FUNCTION (progressive generator)
198
- # ──────────────────────────────────────────────
199
-
200
- def render_mermaid_as_image(text: str) -> str:
201
- """Extract Mermaid code block and convert it to a mermaid.ink image URL."""
202
- import re
203
- import json
204
- # Look for ```mermaid ... ``` block
205
- match = re.search(r'```mermaid\s+(.*?)\s+```', text, re.DOTALL)
206
- if not match:
207
- return text # Return raw text if no block is found
208
-
209
- mermaid_code = match.group(1).strip()
210
-
211
- # Base64 encode the code for mermaid.ink (requires JSON wrapping for the best compatibility)
212
- try:
213
- data = {
214
- "code": mermaid_code,
215
- "mermaid": {"theme": "default"},
216
- "updateEditor": False,
217
- "autoSync": True,
218
- "updateDiagram": True
219
- }
220
- json_str = json.dumps(data)
221
- encoded = base64.b64encode(json_str.encode('utf-8')).decode('utf-8')
222
- image_url = f"https://mermaid.ink/img/{encoded}?type=webp"
223
-
224
- # Return ONLY the image tag as requested ("picture only")
225
- return f"![Flowchart/Mindmap]({image_url})"
226
- except Exception as e:
227
- return f"*⚠️ Failed to render flowchart as image: {e}*\n\n```mermaid\n{mermaid_code}\n```"
228
-
229
- def generate_content(syllabus_text: str, uploaded_file, model_label: str, hf_token: str):
230
- # Merge pasted text + uploaded file text
231
- file_text = extract_text_from_file(uploaded_file) if uploaded_file else ""
232
- syllabus = (syllabus_text.strip() + "\n\n" + file_text).strip()
233
-
234
- if not syllabus:
235
- yield ("⚠️ Please paste a syllabus/topic **or** upload a file.", "", "", "", "")
236
- return
237
-
238
- model_id = MODELS[model_label]
239
- mode_note = f"*Model: **`{model_id}`***"
240
- prompts = make_prompts(syllabus)
241
-
242
- WAIT = "⏳ Waiting…"
243
- steps = [
244
- ("πŸ“– Generating Lesson Material… (1/5)", "lesson"),
245
- ("❓ Generating Q&A… (2/5)", "qa"),
246
- ("βœ… Generating MCQs… (3/5)", "mcq"),
247
- ("🧠 Generating Mindmap… (4/5)", "mindmap"),
248
- ("πŸ“Š Generating Cheat Sheet… (5/5)", "infographic"),
249
- ]
250
-
251
- results = [mode_note + "\n\n" + steps[0][0], WAIT, WAIT, WAIT, WAIT]
252
- yield tuple(results)
253
-
254
- for i, (status_msg, key) in enumerate(steps):
255
- result = ask_llm(model_label, prompts[key], hf_token)
256
-
257
- # Post-process mindmap to purely render as an image URL
258
- if key == "mindmap":
259
- result = render_mermaid_as_image(result)
260
-
261
- results[i] = mode_note + "\n\n" + result
262
- if i + 1 < len(steps):
263
- results[i + 1] = steps[i + 1][0]
264
- yield tuple(results)
265
-
266
-
267
- # ──────────────────────────────────────────────
268
- # GRADIO UI
269
- # ──────────────────────────────────────────────
270
-
271
- CSS = """
272
- @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
273
-
274
- body, .gradio-container {
275
- font-family: 'Inter', sans-serif !important;
276
- }
277
-
278
- .app-header {
279
- background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
280
- border-radius: 16px;
281
- padding: 28px 32px;
282
- margin-bottom: 8px;
283
- border: 1px solid rgba(99,102,241,0.3);
284
- }
285
-
286
- .app-header h1 {
287
- font-size: 2rem !important;
288
- font-weight: 700 !important;
289
- background: linear-gradient(90deg, #818cf8, #c084fc, #38bdf8);
290
- -webkit-background-clip: text;
291
- -webkit-text-fill-color: transparent;
292
- margin-bottom: 6px !important;
293
- }
294
-
295
- .app-header p {
296
- color: #94a3b8 !important;
297
- font-size: 0.95rem;
298
- }
299
-
300
- .generate-btn {
301
- background: linear-gradient(135deg, #6366f1, #8b5cf6) !important;
302
- border: none !important;
303
- border-radius: 12px !important;
304
- font-weight: 600 !important;
305
- font-size: 1rem !important;
306
- transition: all 0.2s ease !important;
307
- width: 100% !important;
308
- }
309
-
310
- .generate-btn:hover {
311
- transform: translateY(-2px) !important;
312
- box-shadow: 0 8px 25px rgba(99,102,241,0.4) !important;
313
- }
314
- """
315
-
316
- with gr.Blocks() as demo:
317
-
318
- # ── Header ──
319
- gr.HTML("""
320
- <div class="app-header">
321
- <h1>πŸŽ“ AI Study Material Generator</h1>
322
- <p>Generate lesson notes, Q&amp;A, MCQs, a Mindmap, and a Cheat Sheet from any topic
323
- or syllabus β€” using SOTA open-source LLMs running entirely on your CPU via
324
- <code>transformers.pipeline</code>. No API key needed.</p>
325
- </div>
326
- """)
327
-
328
- # ── Input Row ──
329
- with gr.Row(equal_height=False):
330
-
331
- # Left: syllabus input (paste OR upload)
332
- with gr.Column(scale=4):
333
- with gr.Tabs():
334
- with gr.TabItem("✏️ Paste Text"):
335
- syllabus_input = gr.Textbox(
336
- show_label=False,
337
- placeholder=(
338
- "Paste your syllabus, topic, or any content here…\n"
339
- "e.g. The Water Cycle, Neural Networks, World War II, Photosynthesis"
340
- ),
341
- lines=7,
342
- )
343
- with gr.TabItem("πŸ“‚ Upload File"):
344
- gr.Markdown(
345
- "Upload a **PDF**, **Word (.docx)**, **plain text (.txt)**, "
346
- "or **image** (PNG / JPG / WEBP) β€” text is extracted automatically."
347
- )
348
- file_input = gr.File(
349
- label="Upload syllabus file",
350
- file_types=[".pdf", ".docx", ".doc", ".txt",
351
- ".png", ".jpg", ".jpeg", ".webp", ".bmp"],
352
- file_count="single",
353
- )
354
- file_preview = gr.Textbox(
355
- label="Extracted text preview",
356
- lines=4,
357
- interactive=False,
358
- placeholder="Text extracted from the file will appear here…",
359
- )
360
- # Live preview when file is uploaded
361
- file_input.change(
362
- fn=lambda f: extract_text_from_file(f) if f else "",
363
- inputs=file_input,
364
- outputs=file_preview,
365
- )
366
-
367
- # Right: model selector + generate button
368
- with gr.Column(scale=2):
369
- model_selector = gr.Dropdown(
370
- choices=ALL_MODEL_NAMES,
371
- value=ALL_MODEL_NAMES[0],
372
- label="πŸ€– Model (all run locally via pipeline)",
373
- info=(
374
- "Tier 1 = fastest / least RAM. "
375
- "Tier 3 = best quality / needs 6–8 GB RAM. "
376
- "Models download on first use."
377
- ),
378
- )
379
- gr.Markdown(
380
- "<small>πŸ’‘ **Llama** & **Gemma** models may require a Hugging Face login "
381
- "(`huggingface-cli login`) or a Token to download.</small>"
382
- )
383
- hf_token_input = gr.Textbox(
384
- label="πŸ”‘ HF Token (optional)",
385
- info="Required for gated models. Your token stays private.",
386
- type="password",
387
- placeholder="hf_...",
388
- )
389
- generate_btn = gr.Button(
390
- "⚑ Generate Study Materials",
391
- variant="primary",
392
- size="lg",
393
- elem_classes=["generate-btn"],
394
- )
395
-
396
- gr.HTML("<hr style='margin:8px 0; border-color:rgba(99,102,241,0.2)'>")
397
-
398
- # ── Output Tabs ──
399
- with gr.Tabs():
400
- with gr.TabItem("πŸ“– Lesson Material"):
401
- lesson_output = gr.Markdown(value="*Results will appear here after generation.*")
402
- with gr.TabItem("❓ Q & A"):
403
- qa_output = gr.Markdown(value="*Results will appear here after generation.*")
404
- with gr.TabItem("βœ… MCQs"):
405
- mcq_output = gr.Markdown(value="*Results will appear here after generation.*")
406
- with gr.TabItem("🧠 Mindmap"):
407
- gr.Markdown("*The diagram is generated as an image (powered by mermaid.ink).*")
408
- mindmap_output = gr.Markdown(value="*Results will appear here after generation.*")
409
- with gr.TabItem("πŸ“Š Cheat Sheet"):
410
- infographic_output = gr.Markdown(value="*Results will appear here after generation.*")
411
-
412
- # ── Footer ──
413
- gr.HTML("""
414
- <div style='text-align:center; color:#64748b; font-size:0.8rem; margin-top:12px;'>
415
- Built with πŸ€— Gradio Β· Hugging Face Transformers β€” 100% open-source Β· runs offline on CPU
416
- </div>
417
- """)
418
-
419
- # ── Wire up button ──
420
- generate_btn.click(
421
- fn=generate_content,
422
- inputs=[syllabus_input, file_input, model_selector, hf_token_input],
423
- outputs=[lesson_output, qa_output, mcq_output, mindmap_output, infographic_output],
424
- )
425
-
426
- if __name__ == "__main__":
427
- demo.launch(
428
- theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"),
429
- css=CSS,
430
- )
 
1
+ from docx import Document
2
+ import pytesseract
3
+ from PIL import Image
4
+ import fitz
5
+ import gradio as gr
6
+ import threading
7
+ import pathlib
8
+
9
+ # --------------------------------------------------
10
+ # FILE TEXT EXTRACTION
11
+ # --------------------------------------------------
12
+
13
+ SUPPORTED_EXT = (".pdf",".docx",".txt",".png",".jpg",".jpeg",".webp",".bmp",".tiff")
14
+
15
+ def extract_text_from_file(filepath):
16
+
17
+ if not filepath:
18
+ return ""
19
+
20
+ if hasattr(filepath,"name"):
21
+ filepath = filepath.name
22
+
23
+ ext = pathlib.Path(filepath).suffix.lower()
24
+
25
+ try:
26
+
27
+ if ext == ".pdf":
28
+ doc = fitz.open(filepath)
29
+ text = []
30
+ for page in doc:
31
+ text.append(page.get_text())
32
+ return "\n".join(text)
33
+
34
+ elif ext == ".docx":
35
+ doc = Document(filepath)
36
+ return "\n".join(p.text for p in doc.paragraphs)
37
+
38
+ elif ext == ".txt":
39
+ with open(filepath,"r",encoding="utf-8",errors="ignore") as f:
40
+ return f.read()
41
+
42
+ elif ext in (".png",".jpg",".jpeg",".webp",".bmp",".tiff"):
43
+ try:
44
+ img = Image.open(filepath)
45
+ return pytesseract.image_to_string(img)
46
+ except Exception as e:
47
+ return "OCR failed: " + str(e)
48
+
49
+ else:
50
+ return "Unsupported file type: " + ext
51
+
52
+ except Exception as e:
53
+ return "Could not read file: " + str(e)
54
+
55
+
56
+ # --------------------------------------------------
57
+ # MODELS (verified HuggingFace IDs)
58
+ # --------------------------------------------------
59
+
60
+ MODELS = {
61
+ # < 1GB: Quick slide summaries or vocab lists
62
+ "Gemma 3 270M [0.6GB | Lightning-fast Edge]": "google/gemma-3-270m-it",
63
+ "Qwen 3 0.6B GGUF [0.5GB | Classroom Assistant]": "Qwen/Qwen3-0.6B-GGUF",
64
+ "TinyLlama 1.1B [0.5GB]": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
65
+
66
+ # 1-3GB: Smart mobile & basic assistants
67
+ "Qwen 3.5 2B [2.4GB | The Student Tutor]": "Qwen/Qwen3.5-2B",
68
+ "Phi-4 Mini [1.8GB | Logical Powerhouse]": "microsoft/Phi-4-mini-instruct",
69
+ "Gemma 3 1B [2.1GB | Stable & Coherent]": "google/gemma-3-1b-it",
70
+
71
+ # 3-8GB: The "Daily Driver" sweet spot
72
+ "Qwen 3.5 9B [7.8GB | BEST FOR LESSON PLANS]": "Qwen/Qwen3.5-9B",
73
+ "Llama 3.1 8B [5.2GB | Industry Standard]": "meta-llama/Meta-Llama-3.1-8B-Instruct",
74
+ "Mistral Small 3 [7.1GB | Concise & Accurate]": "mistralai/Mistral-Small-3-Instruct",
75
+ "Gemma 3 9B [6.3GB | Creative & Safe]": "google/gemma-3-9b-it",
76
+
77
+ # 8-12GB: Enhanced reasoning for complex curricula
78
+ "Qwen 3.5 35B-A3B [11.5GB | Elite Pedagogy MoE]": "Qwen/Qwen3.5-35B-A3B",
79
+ "Mistral Small 12B [9.5GB | Perfect VRAM Balance]": "mistralai/Mistral-Nemo-Instruct-2407",
80
+
81
+ # 12-20GB: Professional grade logic
82
+ "Qwen 3.5 27B [18GB | Dense Curriculum Architect]": "Qwen/Qwen3.5-27B",
83
+ "DeepSeek V3 Lite 21B [16.0GB | Academic Beast]": "deepseek-ai/DeepSeek-V3-Lite",
84
+
85
+ # > 20GB: The Frontier models
86
+ "Qwen 3.5 397B-A17B [75GB+ | Full Textbook Author]": "Qwen/Qwen3.5-397B-A17B",
87
+ "GPT-OSS 120B [72GB+ | SOTA Logic & Coding]": "openai/gpt-oss-120b"
88
+ }
89
+
90
+ ALL_MODEL_NAMES = list(MODELS.keys())
91
+
92
+
93
+ # --------------------------------------------------
94
+ # PIPELINE CACHE
95
+ # --------------------------------------------------
96
+
97
+ _pipeline_cache = {}
98
+ _pipeline_lock = threading.Lock()
99
+
100
+
101
+ def get_pipeline(model_id, hf_token=""):
102
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
103
+
104
+ with _pipeline_lock:
105
+ if model_id not in _pipeline_cache:
106
+ try:
107
+ token = hf_token.strip() if hf_token else None
108
+
109
+ tokenizer = AutoTokenizer.from_pretrained(
110
+ model_id,
111
+ token=token,
112
+ trust_remote_code=True
113
+ )
114
+
115
+ model = AutoModelForCausalLM.from_pretrained(
116
+ model_id,
117
+ token=token,
118
+ trust_remote_code=True,
119
+ device_map="cpu"
120
+ )
121
+
122
+ pipe = pipeline(
123
+ "text-generation",
124
+ model=model,
125
+ tokenizer=tokenizer
126
+ )
127
+
128
+ _pipeline_cache[model_id] = pipe
129
+
130
+ except Exception as e:
131
+ return None, str(e)
132
+
133
+ return _pipeline_cache[model_id], None
134
+
135
+
136
+ # --------------------------------------------------
137
+ # INFERENCE
138
+ # --------------------------------------------------
139
+
140
+ SYSTEM_MSG = "You are an expert educational assistant. Use markdown."
141
+
142
+
143
+ def ask_llm(model_label,prompt,hf_token=""):
144
+ model_id = MODELS[model_label]
145
+ pipe,err = get_pipeline(model_id,hf_token)
146
+
147
+ if err:
148
+ return "Model load error:\n" + err
149
+
150
+ try:
151
+ combined = SYSTEM_MSG + "\n\n" + prompt
152
+ out = pipe(
153
+ combined,
154
+ max_new_tokens=2048,
155
+ do_sample=True,
156
+ temperature=0.6,
157
+ top_p=0.9
158
+ )
159
+
160
+ text = out[0]["generated_text"]
161
+
162
+ # Remove prompt from output if echoed
163
+ if text.startswith(combined):
164
+ text = text[len(combined):]
165
+
166
+ return text.strip()
167
+
168
+ except Exception as e:
169
+ return "Inference error:\n" + str(e)
170
+
171
+
172
+ # --------------------------------------------------
173
+ # PROMPTS
174
+ # --------------------------------------------------
175
+
176
+ def make_prompts(topic):
177
+ return {
178
+ "lesson":"Create lesson notes with headings and bullet points.\n\nTopic:\n"+topic,
179
+ "qa":"Generate 8 exam questions with answers.\n\nTopic:\n"+topic,
180
+ "mcq":"Generate 8 MCQs with 4 options and answers.\n\nTopic:\n"+topic,
181
+ "summary":"Summarize the topic in 200 words.\n\nTopic:\n"+topic,
182
+ "infographic":"Create a cheat sheet using tables and bullet points.\n\nTopic:\n"+topic
183
+ }
184
+
185
+
186
+ def generate_content(text,file,model_label,token):
187
+
188
+ file_text = extract_text_from_file(file) if file else ""
189
+
190
+ syllabus = (text + "\n\n" + file_text).strip()
191
+
192
+ if not syllabus:
193
+ yield ("Provide topic or file","","","","")
194
+ return
195
+
196
+ prompts = make_prompts(syllabus)
197
+
198
+ WAIT = "Generating..."
199
+
200
+ results = [WAIT,WAIT,WAIT,WAIT,WAIT]
201
+
202
+ yield tuple(results)
203
+
204
+ order = ["lesson","qa","mcq","summary","infographic"]
205
+
206
+ for i,key in enumerate(order):
207
+ res = ask_llm(model_label,prompts[key],token)
208
+ results[i] = res
209
+ yield tuple(results)
210
+
211
+
212
+ # --------------------------------------------------
213
+ # UI
214
+ # --------------------------------------------------
215
+
216
+ CSS = """
217
+ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
218
+
219
+ body, .gradio-container {
220
+ font-family: 'Inter', sans-serif !important;
221
+ }
222
+
223
+ .app-header {
224
+ background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
225
+ border-radius: 16px;
226
+ padding: 28px 32px;
227
+ margin-bottom: 8px;
228
+ border: 1px solid rgba(99,102,241,0.3);
229
+ }
230
+
231
+ .app-header h1 {
232
+ font-size: 2rem !important;
233
+ font-weight: 700 !important;
234
+ background: linear-gradient(90deg, #818cf8, #c084fc, #38bdf8);
235
+ -webkit-background-clip: text;
236
+ -webkit-text-fill-color: transparent;
237
+ margin-bottom: 6px !important;
238
+ }
239
+
240
+ .app-header p {
241
+ color: #94a3b8 !important;
242
+ font-size: 0.95rem;
243
+ }
244
+
245
+ .generate-btn {
246
+ background: linear-gradient(135deg, #6366f1, #8b5cf6) !important;
247
+ border: none !important;
248
+ border-radius: 12px !important;
249
+ font-weight: 600 !important;
250
+ font-size: 1rem !important;
251
+ transition: all 0.2s ease !important;
252
+ width: 100% !important;
253
+ }
254
+
255
+ .generate-btn:hover {
256
+ transform: translateY(-2px) !important;
257
+ box-shadow: 0 8px 25px rgba(99,102,241,0.4) !important;
258
+ }
259
+ """
260
+
261
+ with gr.Blocks() as demo:
262
+
263
+ # ── Header ──
264
+ gr.HTML("""
265
+ <div class="app-header">
266
+ <h1>πŸŽ“ AI Study Material Generator</h1>
267
+ <p>Generate lesson notes, Q&amp;A, MCQs, a Mindmap, and a Cheat Sheet from any topic
268
+ or syllabus β€” using SOTA open-source LLMs running entirely on your CPU via
269
+ <code>transformers.pipeline</code>. No API key needed.</p>
270
+ </div>
271
+ """)
272
+
273
+ # ── Input Row ──
274
+ with gr.Row(equal_height=False):
275
+
276
+ # Left: syllabus input (paste OR upload)
277
+ with gr.Column(scale=4):
278
+ with gr.Tabs():
279
+ with gr.TabItem("✏️ Paste Text"):
280
+ text_input = gr.Textbox(
281
+ show_label=False,
282
+ placeholder=(
283
+ "Paste your syllabus, topic, or any content here…\n"
284
+ "e.g. The Water Cycle, Neural Networks, World War II, Photosynthesis"
285
+ ),
286
+ lines=7,
287
+ )
288
+ with gr.TabItem("πŸ“‚ Upload File"):
289
+ gr.Markdown(
290
+ "Upload a **PDF**, **Word (.docx)**, **plain text (.txt)**, "
291
+ "or **image** (PNG / JPG / WEBP) β€” text is extracted automatically."
292
+ )
293
+ file_input = gr.File(
294
+ label="Upload syllabus file",
295
+ file_types=[".pdf", ".docx", ".doc", ".txt",
296
+ ".png", ".jpg", ".jpeg", ".webp", ".bmp"],
297
+ file_count="single",
298
+ )
299
+ file_preview = gr.Textbox(
300
+ label="Extracted text preview",
301
+ lines=4,
302
+ interactive=False,
303
+ placeholder="Text extracted from the file will appear here…",
304
+ )
305
+ # Live preview when file is uploaded
306
+ file_input.change(
307
+ fn=lambda f: extract_text_from_file(f) if f else "",
308
+ inputs=file_input,
309
+ outputs=file_preview,
310
+ )
311
+
312
+ # Right: model selector + generate button
313
+ with gr.Column(scale=2):
314
+ model_selector = gr.Dropdown(
315
+ choices=ALL_MODEL_NAMES,
316
+ value=ALL_MODEL_NAMES[0],
317
+ label="πŸ€– Model (all run locally via pipeline)",
318
+ info=(
319
+ "Tier 1 = fastest / least RAM. "
320
+ "Tier 3 = best quality / needs 6–8 GB RAM. "
321
+ "Models download on first use."
322
+ ),
323
+ )
324
+ gr.Markdown(
325
+ "<small>πŸ’‘ **Llama** & **Gemma** models may require a Hugging Face login "
326
+ "(`huggingface-cli login`) or a Token to download.</small>"
327
+ )
328
+ token_box = gr.Textbox(
329
+ label="πŸ”‘ HF Token (optional)",
330
+ info="Required for gated models. Your token stays private.",
331
+ type="password",
332
+ placeholder="hf_...",
333
+ )
334
+ btn = gr.Button(
335
+ "⚑ Generate Study Materials",
336
+ variant="primary",
337
+ size="lg",
338
+ elem_classes=["generate-btn"],
339
+ )
340
+
341
+ gr.HTML("<hr style='margin:8px 0; border-color:rgba(99,102,241,0.2)'>")
342
+
343
+ # ── Output Tabs ──
344
+ with gr.Tabs():
345
+ with gr.TabItem("πŸ“– Lesson Material"):
346
+ lesson = gr.Markdown(value="*Results will appear here after generation.*")
347
+ with gr.TabItem("❓ Q & A"):
348
+ qa = gr.Markdown(value="*Results will appear here after generation.*")
349
+ with gr.TabItem("βœ… MCQs"):
350
+ mcq = gr.Markdown(value="*Results will appear here after generation.*")
351
+ with gr.TabItem("πŸ“ Summary"):
352
+ summary = gr.Markdown(value="*Results will appear here after generation.*")
353
+ with gr.TabItem("πŸ“Š Cheat Sheet"):
354
+ cheat = gr.Markdown(value="*Results will appear here after generation.*")
355
+
356
+ # ── Footer ──
357
+ gr.HTML("""
358
+ <div style='text-align:center; color:#64748b; font-size:0.8rem; margin-top:12px;'>
359
+ Built with πŸ€— Gradio Β· Hugging Face Transformers β€” 100% open-source Β· runs offline on CPU
360
+ </div>
361
+ """)
362
+
363
+ # ── Wire up button ──
364
+ btn.click(
365
+ fn=generate_content,
366
+ inputs=[text_input, file_input, model_selector, token_box],
367
+ outputs=[lesson, qa, mcq, summary, cheat],
368
+ )
369
+
370
+ demo.launch(
371
+ theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"),
372
+ css=CSS,
373
+ )