Spaces:

Barat123
/

STASS

Sleeping

App Files Files Community

Update app.py

by Harikrishna-Srinivasan - opened Mar 7

base: refs/heads/main

←

from: refs/pr/3

Discussion Files changed

+373

-430

Files changed (1) hide show

app.py +373 -430

app.py CHANGED Viewed

@@ -1,430 +1,373 @@
-import gradio as gr
-import os
-import threading
-import pathlib
-import base64
-import urllib.parse
-# ──────────────────────────────────────────────
-# FILE TEXT EXTRACTION
-# ──────────────────────────────────────────────
-SUPPORTED_EXT = (".pdf", ".docx", ".doc", ".txt",
-                 ".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff")
-def extract_text_from_file(filepath: str) -> str:
-    """Extract plain text from PDF, DOCX, TXT, or image files."""
-    if not filepath:
-        return ""
-    ext = pathlib.Path(filepath).suffix.lower()
-    try:
-        # ── PDF ──
-        if ext == ".pdf":
-            import fitz  # pymupdf
-            doc = fitz.open(filepath)
-            return "\n".join(page.get_text() for page in doc).strip()
-        # ── Word (.docx / .doc) ──
-        elif ext in (".docx", ".doc"):
-            from docx import Document
-            doc = Document(filepath)
-            return "\n".join(p.text for p in doc.paragraphs if p.text.strip()).strip()
-        # ── Plain text ──
-        elif ext == ".txt":
-            with open(filepath, "r", encoding="utf-8", errors="replace") as f:
-                return f.read().strip()
-        # ── Images (OCR via pytesseract) ──
-        elif ext in (".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff"):
-            try:
-                import pytesseract
-                from PIL import Image
-                img = Image.open(filepath)
-                return pytesseract.image_to_string(img).strip()
-            except Exception as ocr_err:
-                return (
-                    f"⚠️ OCR failed: {ocr_err}\n"
-                    "Ensure Tesseract-OCR is installed: https://github.com/UB-Mannheim/tesseract/wiki"
-                )
-        else:
-            return f"⚠️ Unsupported file type: {ext}"
-    except Exception as e:
-        return f"⚠️ Could not extract text: {e}"
-# ──────────────────────────────────────────────
-# MODEL CONFIGURATIONS  (all run via transformers pipeline)
-# ──────────────────────────────────────────────
-#
-# Grouped by RAM tier so users can pick what fits their machine.
-# Models are downloaded from HF Hub on first use and cached locally.
-MODELS = {
-    # ── Tier 1: Fast  (<2 GB RAM) ───────────────────────────
-    "⚡ Qwen2.5-0.5B  [~1 GB | Fastest]":       "Qwen/Qwen2.5-0.5B-Instruct",
-    "💫 Qwen2.5-1.5B  [~2 GB | Fast]":           "Qwen/Qwen2.5-1.5B-Instruct",
-    # ── Tier 2: Balanced  (2–4 GB RAM) ──────────────────────
-    "🔬 DeepSeek-R1-Distill 1.5B [~2 GB]":       "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
-    "🦩 Llama-3.2-1B-Instruct [~2 GB]":          "meta-llama/Llama-3.2-1B-Instruct",
-    "🦩 Llama-3.2-3B-Instruct [~4 GB]":          "meta-llama/Llama-3.2-3B-Instruct",
-    "💶 Phi-3-mini-4k [~4 GB | Strong]":         "microsoft/Phi-3-mini-4k-instruct",
-    # ── Tier 3: Quality  (4–8 GB RAM) ───────────────────────
-    "💎 Gemma-2-2B-it [~3 GB | Google]":         "google/gemma-2-2b-it",
-    "🔥 Qwen2.5-3B  [~4 GB | Balanced]":         "Qwen/Qwen2.5-3B-Instruct",
-    "🥇 Llama-3.1-8B-Instruct [~8 GB]":          "meta-llama/Llama-3.1-8B-Instruct",
-    "🥇 Qwen2.5-7B  [~8 GB | Best quality]":     "Qwen/Qwen2.5-7B-Instruct",
-}
-ALL_MODEL_NAMES = list(MODELS.keys())
-# ──────────────────────────────────────────────
-# PIPELINE CACHE  (lazy-loaded, thread-safe)
-# ──────────────────────────────────────────────
-_pipeline_cache: dict = {}
-_pipeline_lock = threading.Lock()
-def get_pipeline(model_id: str, hf_token: str = ""):
-    """Download (on first use) and cache a transformers text-generation pipeline."""
-    with _pipeline_lock:
-        if model_id not in _pipeline_cache:
-            try:
-                from transformers import pipeline, AutoTokenizer
-                token = hf_token.strip() if hf_token else None
-                tok = AutoTokenizer.from_pretrained(model_id, token=token)
-                pipe = pipeline(
-                    "text-generation",
-                    model=model_id,
-                    tokenizer=tok,
-                    device_map="cpu",
-                    dtype="auto",
-                    trust_remote_code=True,
-                    token=token,
-                )
-                # Avoid conflict with max_length=20 default in some models
-                pipe.model.generation_config.max_length = None
-                _pipeline_cache[model_id] = pipe
-            except Exception as e:
-                return None, str(e)
-    return _pipeline_cache[model_id], None
-# ──────────────────────────────────────────────
-# INFERENCE
-# ──────────────────────────────────────────────
-SYSTEM_MSG = (
-    "You are an expert educational assistant. "
-    "Always respond with clean, well-structured Markdown text."
-)
-def ask_llm(model_label: str, prompt: str, hf_token: str = "") -> str:
-    """Run the prompt through the transformers pipeline for the selected model."""
-    model_id = MODELS[model_label]
-    pipe, err = get_pipeline(model_id, hf_token)
-    if err:
-        return (
-            f"❌ **Failed to load `{model_id}`:**\n```\n{err}\n```\n\n"
-            "*Tip: Check your internet connection or choose a smaller model.*"
-        )
-    try:
-        messages = [
-            {"role": "system", "content": SYSTEM_MSG},
-            {"role": "user",   "content": prompt},
-        ]
-        if pipe is None:
-            return "❌ **Pipeline error: Pipeline object is None.**"
-        # Pass generation params to the call to avoid constructor deprecation
-        out = pipe(
-            messages,
-            max_new_tokens=1024,
-            pad_token_id=pipe.tokenizer.eos_token_id if (pipe.tokenizer and pipe.tokenizer.eos_token_id is not None) else 50256
-        )
-        generated = out[0]["generated_text"]
-        if isinstance(generated, list):
-            # Chat-template output — last element is the assistant reply
-            return generated[-1]["content"]
-        # Plain-string fallback — strip the echoed prompt
-        return generated[len(str(messages)):].strip()
-    except Exception as e:
-        return f"❌ **Inference error:**\n```\n{str(e)}\n```"
-# ──────────────────────────────────────────────
-# PROMPTS
-# ──────────────────────────────────────────────
-def make_prompts(syllabus: str) -> dict:
-    return {
-        "lesson": (
-            f"Create comprehensive, engaging lesson materials for the following syllabus/topic. "
-            f"Use clear ## headings, bullet points, bold key terms, and concise explanations "
-            f"suitable for a student.\n\nSyllabus/Topic:\n{syllabus}"
-        ),
-        "qa": (
-            f"Generate 8 important exam-style questions with detailed model answers based on "
-            f"this syllabus/topic. Number each Q&A pair clearly.\n\nSyllabus/Topic:\n{syllabus}"
-        ),
-        "mcq": (
-            f"Generate 8 multiple-choice questions based on this syllabus/topic. "
-            f"Each question must have 4 options (A–D). After all questions, list the correct "
-            f"answers with a brief explanation.\n\nSyllabus/Topic:\n{syllabus}"
-        ),
-        "mindmap": (
-            f"Create a high-level Flowchart or Mindmap for the following syllabus/topic using Mermaid.js syntax.\n"
-            f"STRICT RULES:\n"
-            f"- Output ONLY the mermaid code block (```mermaid ... ```).\n"
-            f"- Use 'graph TD' (for flowcharts) or 'mindmap' structure.\n"
-            f"- This will be converted into a static picture, so keep labels clear.\n"
-            f"- No introductory text, no explanation outside the block.\n"
-            f"- Avoid special characters in node labels.\n\n"
-            f"Syllabus/Topic:\n{syllabus}"
-        ),
-        "infographic": (
-            f"Create a highly visual text-based cheat sheet / infographic for this syllabus/topic. "
-            f"Use emojis, ASCII section dividers, tables, bullet points, and bold highlights "
-            f"to make it easy to scan, remember, and share.\n\nSyllabus/Topic:\n{syllabus}"
-        ),
-    }
-# ──────────────────────────────────────────────
-# MAIN GENERATION FUNCTION (progressive generator)
-# ──────────────────────────────────────────────
-def render_mermaid_as_image(text: str) -> str:
-    """Extract Mermaid code block and convert it to a mermaid.ink image URL."""
-    import re
-    import json
-    # Look for ```mermaid ... ``` block
-    match = re.search(r'```mermaid\s+(.*?)\s+```', text, re.DOTALL)
-    if not match:
-        return text # Return raw text if no block is found
-    mermaid_code = match.group(1).strip()
-    # Base64 encode the code for mermaid.ink (requires JSON wrapping for the best compatibility)
-    try:
-        data = {
-            "code": mermaid_code,
-            "mermaid": {"theme": "default"},
-            "updateEditor": False,
-            "autoSync": True,
-            "updateDiagram": True
-        }
-        json_str = json.dumps(data)
-        encoded = base64.b64encode(json_str.encode('utf-8')).decode('utf-8')
-        image_url = f"https://mermaid.ink/img/{encoded}?type=webp"
-        # Return ONLY the image tag as requested ("picture only")
-        return f"![Flowchart/Mindmap]({image_url})"
-    except Exception as e:
-        return f"*⚠️ Failed to render flowchart as image: {e}*\n\n```mermaid\n{mermaid_code}\n```"
-def generate_content(syllabus_text: str, uploaded_file, model_label: str, hf_token: str):
-    # Merge pasted text + uploaded file text
-    file_text = extract_text_from_file(uploaded_file) if uploaded_file else ""
-    syllabus = (syllabus_text.strip() + "\n\n" + file_text).strip()
-    if not syllabus:
-        yield ("⚠️ Please paste a syllabus/topic **or** upload a file.", "", "", "", "")
-        return
-    model_id = MODELS[model_label]
-    mode_note = f"*Model: **`{model_id}`***"
-    prompts = make_prompts(syllabus)
-    WAIT = "⏳ Waiting…"
-    steps = [
-        ("📖 Generating Lesson Material… (1/5)", "lesson"),
-        ("❓ Generating Q&A… (2/5)",             "qa"),
-        ("✅ Generating MCQs… (3/5)",             "mcq"),
-        ("🧠 Generating Mindmap… (4/5)",          "mindmap"),
-        ("📊 Generating Cheat Sheet… (5/5)",      "infographic"),
-    ]
-    results = [mode_note + "\n\n" + steps[0][0], WAIT, WAIT, WAIT, WAIT]
-    yield tuple(results)
-    for i, (status_msg, key) in enumerate(steps):
-        result = ask_llm(model_label, prompts[key], hf_token)
-        # Post-process mindmap to purely render as an image URL
-        if key == "mindmap":
-            result = render_mermaid_as_image(result)
-        results[i] = mode_note + "\n\n" + result
-        if i + 1 < len(steps):
-            results[i + 1] = steps[i + 1][0]
-        yield tuple(results)
-# ──────────────────────────────────────────────
-# GRADIO UI
-# ──────────────────────────────────────────────
-CSS = """
-@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
-body, .gradio-container {
-    font-family: 'Inter', sans-serif !important;
-}
-.app-header {
-    background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
-    border-radius: 16px;
-    padding: 28px 32px;
-    margin-bottom: 8px;
-    border: 1px solid rgba(99,102,241,0.3);
-}
-.app-header h1 {
-    font-size: 2rem !important;
-    font-weight: 700 !important;
-    background: linear-gradient(90deg, #818cf8, #c084fc, #38bdf8);
-    -webkit-background-clip: text;
-    -webkit-text-fill-color: transparent;
-    margin-bottom: 6px !important;
-}
-.app-header p {
-    color: #94a3b8 !important;
-    font-size: 0.95rem;
-}
-.generate-btn {
-    background: linear-gradient(135deg, #6366f1, #8b5cf6) !important;
-    border: none !important;
-    border-radius: 12px !important;
-    font-weight: 600 !important;
-    font-size: 1rem !important;
-    transition: all 0.2s ease !important;
-    width: 100% !important;
-}
-.generate-btn:hover {
-    transform: translateY(-2px) !important;
-    box-shadow: 0 8px 25px rgba(99,102,241,0.4) !important;
-}
-"""
-with gr.Blocks() as demo:
-    # ── Header ──
-    gr.HTML("""
-    <div class="app-header">
-      <h1>🎓 AI Study Material Generator</h1>
-      <p>Generate lesson notes, Q&amp;A, MCQs, a Mindmap, and a Cheat Sheet from any topic
-         or syllabus — using SOTA open-source LLMs running entirely on your CPU via
-         <code>transformers.pipeline</code>. No API key needed.</p>
-    </div>
-    """)
-    # ── Input Row ──
-    with gr.Row(equal_height=False):
-        # Left: syllabus input (paste OR upload)
-        with gr.Column(scale=4):
-            with gr.Tabs():
-                with gr.TabItem("✏️ Paste Text"):
-                    syllabus_input = gr.Textbox(
-                        show_label=False,
-                        placeholder=(
-                            "Paste your syllabus, topic, or any content here…\n"
-                            "e.g. The Water Cycle, Neural Networks, World War II, Photosynthesis"
-                        ),
-                        lines=7,
-                    )
-                with gr.TabItem("📂 Upload File"):
-                    gr.Markdown(
-                        "Upload a **PDF**, **Word (.docx)**, **plain text (.txt)**, "
-                        "or **image** (PNG / JPG / WEBP) — text is extracted automatically."
-                    )
-                    file_input = gr.File(
-                        label="Upload syllabus file",
-                        file_types=[".pdf", ".docx", ".doc", ".txt",
-                                    ".png", ".jpg", ".jpeg", ".webp", ".bmp"],
-                        file_count="single",
-                    )
-                    file_preview = gr.Textbox(
-                        label="Extracted text preview",
-                        lines=4,
-                        interactive=False,
-                        placeholder="Text extracted from the file will appear here…",
-                    )
-                    # Live preview when file is uploaded
-                    file_input.change(
-                        fn=lambda f: extract_text_from_file(f) if f else "",
-                        inputs=file_input,
-                        outputs=file_preview,
-                    )
-        # Right: model selector + generate button
-        with gr.Column(scale=2):
-            model_selector = gr.Dropdown(
-                choices=ALL_MODEL_NAMES,
-                value=ALL_MODEL_NAMES[0],
-                label="🤖 Model  (all run locally via pipeline)",
-                info=(
-                    "Tier 1 = fastest / least RAM. "
-                    "Tier 3 = best quality / needs 6–8 GB RAM. "
-                    "Models download on first use."
-                ),
-            )
-            gr.Markdown(
-                "<small>💡 **Llama** & **Gemma** models may require a Hugging Face login "
-                "(`huggingface-cli login`) or a Token to download.</small>"
-            )
-            hf_token_input = gr.Textbox(
-                label="🔑 HF Token (optional)",
-                info="Required for gated models. Your token stays private.",
-                type="password",
-                placeholder="hf_...",
-            )
-            generate_btn = gr.Button(
-                "⚡ Generate Study Materials",
-                variant="primary",
-                size="lg",
-                elem_classes=["generate-btn"],
-            )
-    gr.HTML("<hr style='margin:8px 0; border-color:rgba(99,102,241,0.2)'>")
-    # ── Output Tabs ──
-    with gr.Tabs():
-        with gr.TabItem("📖 Lesson Material"):
-            lesson_output = gr.Markdown(value="*Results will appear here after generation.*")
-        with gr.TabItem("❓ Q & A"):
-            qa_output = gr.Markdown(value="*Results will appear here after generation.*")
-        with gr.TabItem("✅ MCQs"):
-            mcq_output = gr.Markdown(value="*Results will appear here after generation.*")
-        with gr.TabItem("🧠 Mindmap"):
-            gr.Markdown("*The diagram is generated as an image (powered by mermaid.ink).*")
-            mindmap_output = gr.Markdown(value="*Results will appear here after generation.*")
-        with gr.TabItem("📊 Cheat Sheet"):
-            infographic_output = gr.Markdown(value="*Results will appear here after generation.*")
-    # ── Footer ──
-    gr.HTML("""
-    <div style='text-align:center; color:#64748b; font-size:0.8rem; margin-top:12px;'>
-        Built with 🤗 Gradio · Hugging Face Transformers — 100% open-source · runs offline on CPU
-    </div>
-    """)
-    # ── Wire up button ──
-    generate_btn.click(
-        fn=generate_content,
-        inputs=[syllabus_input, file_input, model_selector, hf_token_input],
-        outputs=[lesson_output, qa_output, mcq_output, mindmap_output, infographic_output],
-    )
-if __name__ == "__main__":
-    demo.launch(
-        theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"),
-        css=CSS,
-    )

+from docx import Document
+import pytesseract
+from PIL import Image
+import fitz
+import gradio as gr
+import threading
+import pathlib
+# --------------------------------------------------
+# FILE TEXT EXTRACTION
+# --------------------------------------------------
+SUPPORTED_EXT = (".pdf",".docx",".txt",".png",".jpg",".jpeg",".webp",".bmp",".tiff")
+def extract_text_from_file(filepath):
+    if not filepath:
+        return ""
+    if hasattr(filepath,"name"):
+        filepath = filepath.name
+    ext = pathlib.Path(filepath).suffix.lower()
+    try:
+        if ext == ".pdf":
+            doc = fitz.open(filepath)
+            text = []
+            for page in doc:
+                text.append(page.get_text())
+            return "\n".join(text)
+        elif ext == ".docx":
+            doc = Document(filepath)
+            return "\n".join(p.text for p in doc.paragraphs)
+        elif ext == ".txt":
+            with open(filepath,"r",encoding="utf-8",errors="ignore") as f:
+                return f.read()
+        elif ext in (".png",".jpg",".jpeg",".webp",".bmp",".tiff"):
+            try:
+                img = Image.open(filepath)
+                return pytesseract.image_to_string(img)
+            except Exception as e:
+                return "OCR failed: " + str(e)
+        else:
+            return "Unsupported file type: " + ext
+    except Exception as e:
+        return "Could not read file: " + str(e)
+# --------------------------------------------------
+# MODELS  (verified HuggingFace IDs)
+# --------------------------------------------------
+MODELS = {
+    # < 1GB: Quick slide summaries or vocab lists
+    "Gemma 3 270M [0.6GB | Lightning-fast Edge]": "google/gemma-3-270m-it",
+    "Qwen 3 0.6B GGUF [0.5GB | Classroom Assistant]": "Qwen/Qwen3-0.6B-GGUF",
+    "TinyLlama 1.1B [0.5GB]": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+    # 1-3GB: Smart mobile & basic assistants
+    "Qwen 3.5 2B [2.4GB | The Student Tutor]": "Qwen/Qwen3.5-2B",
+    "Phi-4 Mini [1.8GB | Logical Powerhouse]": "microsoft/Phi-4-mini-instruct",
+    "Gemma 3 1B [2.1GB | Stable & Coherent]": "google/gemma-3-1b-it",
+    # 3-8GB: The "Daily Driver" sweet spot
+    "Qwen 3.5 9B [7.8GB | BEST FOR LESSON PLANS]": "Qwen/Qwen3.5-9B",
+    "Llama 3.1 8B [5.2GB | Industry Standard]": "meta-llama/Meta-Llama-3.1-8B-Instruct",
+    "Mistral Small 3 [7.1GB | Concise & Accurate]": "mistralai/Mistral-Small-3-Instruct",
+    "Gemma 3 9B [6.3GB | Creative & Safe]": "google/gemma-3-9b-it",
+    # 8-12GB: Enhanced reasoning for complex curricula
+    "Qwen 3.5 35B-A3B [11.5GB | Elite Pedagogy MoE]": "Qwen/Qwen3.5-35B-A3B",
+    "Mistral Small 12B [9.5GB | Perfect VRAM Balance]": "mistralai/Mistral-Nemo-Instruct-2407",
+    # 12-20GB: Professional grade logic
+    "Qwen 3.5 27B [18GB | Dense Curriculum Architect]": "Qwen/Qwen3.5-27B",
+    "DeepSeek V3 Lite 21B [16.0GB | Academic Beast]": "deepseek-ai/DeepSeek-V3-Lite",
+    # > 20GB: The Frontier models
+    "Qwen 3.5 397B-A17B [75GB+ | Full Textbook Author]": "Qwen/Qwen3.5-397B-A17B",
+    "GPT-OSS 120B [72GB+ | SOTA Logic & Coding]": "openai/gpt-oss-120b"
+}
+ALL_MODEL_NAMES = list(MODELS.keys())
+# --------------------------------------------------
+# PIPELINE CACHE
+# --------------------------------------------------
+_pipeline_cache = {}
+_pipeline_lock = threading.Lock()
+def get_pipeline(model_id, hf_token=""):
+    from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+    with _pipeline_lock:
+        if model_id not in _pipeline_cache:
+            try:
+                token = hf_token.strip() if hf_token else None
+                tokenizer = AutoTokenizer.from_pretrained(
+                    model_id,
+                    token=token,
+                    trust_remote_code=True
+                )
+                model = AutoModelForCausalLM.from_pretrained(
+                    model_id,
+                    token=token,
+                    trust_remote_code=True,
+                    device_map="cpu"
+                )
+                pipe = pipeline(
+                    "text-generation",
+                    model=model,
+                    tokenizer=tokenizer
+                )
+                _pipeline_cache[model_id] = pipe
+            except Exception as e:
+                return None, str(e)
+    return _pipeline_cache[model_id], None
+# --------------------------------------------------
+# INFERENCE
+# --------------------------------------------------
+SYSTEM_MSG = "You are an expert educational assistant. Use markdown."
+def ask_llm(model_label,prompt,hf_token=""):
+    model_id = MODELS[model_label]
+    pipe,err = get_pipeline(model_id,hf_token)
+    if err:
+        return "Model load error:\n" + err
+    try:
+        combined = SYSTEM_MSG + "\n\n" + prompt
+        out = pipe(
+            combined,
+            max_new_tokens=2048,
+            do_sample=True,
+            temperature=0.6,
+            top_p=0.9
+        )
+        text = out[0]["generated_text"]
+        # Remove prompt from output if echoed
+        if text.startswith(combined):
+            text = text[len(combined):]
+        return text.strip()
+    except Exception as e:
+        return "Inference error:\n" + str(e)
+# --------------------------------------------------
+# PROMPTS
+# --------------------------------------------------
+def make_prompts(topic):
+    return {
+        "lesson":"Create lesson notes with headings and bullet points.\n\nTopic:\n"+topic,
+        "qa":"Generate 8 exam questions with answers.\n\nTopic:\n"+topic,
+        "mcq":"Generate 8 MCQs with 4 options and answers.\n\nTopic:\n"+topic,
+        "summary":"Summarize the topic in 200 words.\n\nTopic:\n"+topic,
+        "infographic":"Create a cheat sheet using tables and bullet points.\n\nTopic:\n"+topic
+    }
+def generate_content(text,file,model_label,token):
+    file_text = extract_text_from_file(file) if file else ""
+    syllabus = (text + "\n\n" + file_text).strip()
+    if not syllabus:
+        yield ("Provide topic or file","","","","")
+        return
+    prompts = make_prompts(syllabus)
+    WAIT = "Generating..."
+    results = [WAIT,WAIT,WAIT,WAIT,WAIT]
+    yield tuple(results)
+    order = ["lesson","qa","mcq","summary","infographic"]
+    for i,key in enumerate(order):
+        res = ask_llm(model_label,prompts[key],token)
+        results[i] = res
+        yield tuple(results)
+# --------------------------------------------------
+# UI
+# --------------------------------------------------
+CSS = """
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
+body, .gradio-container {
+    font-family: 'Inter', sans-serif !important;
+}
+.app-header {
+    background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%);
+    border-radius: 16px;
+    padding: 28px 32px;
+    margin-bottom: 8px;
+    border: 1px solid rgba(99,102,241,0.3);
+}
+.app-header h1 {
+    font-size: 2rem !important;
+    font-weight: 700 !important;
+    background: linear-gradient(90deg, #818cf8, #c084fc, #38bdf8);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    margin-bottom: 6px !important;
+}
+.app-header p {
+    color: #94a3b8 !important;
+    font-size: 0.95rem;
+}
+.generate-btn {
+    background: linear-gradient(135deg, #6366f1, #8b5cf6) !important;
+    border: none !important;
+    border-radius: 12px !important;
+    font-weight: 600 !important;
+    font-size: 1rem !important;
+    transition: all 0.2s ease !important;
+    width: 100% !important;
+}
+.generate-btn:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 8px 25px rgba(99,102,241,0.4) !important;
+}
+"""
+with gr.Blocks() as demo:
+    # ── Header ──
+    gr.HTML("""
+    <div class="app-header">
+      <h1>🎓 AI Study Material Generator</h1>
+      <p>Generate lesson notes, Q&amp;A, MCQs, a Mindmap, and a Cheat Sheet from any topic
+         or syllabus — using SOTA open-source LLMs running entirely on your CPU via
+         <code>transformers.pipeline</code>. No API key needed.</p>
+    </div>
+    """)
+    # ── Input Row ──
+    with gr.Row(equal_height=False):
+        # Left: syllabus input (paste OR upload)
+        with gr.Column(scale=4):
+            with gr.Tabs():
+                with gr.TabItem("✏️ Paste Text"):
+                    text_input = gr.Textbox(
+                        show_label=False,
+                        placeholder=(
+                            "Paste your syllabus, topic, or any content here…\n"
+                            "e.g. The Water Cycle, Neural Networks, World War II, Photosynthesis"
+                        ),
+                        lines=7,
+                    )
+                with gr.TabItem("📂 Upload File"):
+                    gr.Markdown(
+                        "Upload a **PDF**, **Word (.docx)**, **plain text (.txt)**, "
+                        "or **image** (PNG / JPG / WEBP) — text is extracted automatically."
+                    )
+                    file_input = gr.File(
+                        label="Upload syllabus file",
+                        file_types=[".pdf", ".docx", ".doc", ".txt",
+                                    ".png", ".jpg", ".jpeg", ".webp", ".bmp"],
+                        file_count="single",
+                    )
+                    file_preview = gr.Textbox(
+                        label="Extracted text preview",
+                        lines=4,
+                        interactive=False,
+                        placeholder="Text extracted from the file will appear here…",
+                    )
+                    # Live preview when file is uploaded
+                    file_input.change(
+                        fn=lambda f: extract_text_from_file(f) if f else "",
+                        inputs=file_input,
+                        outputs=file_preview,
+                    )
+        # Right: model selector + generate button
+        with gr.Column(scale=2):
+            model_selector = gr.Dropdown(
+                choices=ALL_MODEL_NAMES,
+                value=ALL_MODEL_NAMES[0],
+                label="🤖 Model  (all run locally via pipeline)",
+                info=(
+                    "Tier 1 = fastest / least RAM. "
+                    "Tier 3 = best quality / needs 6–8 GB RAM. "
+                    "Models download on first use."
+                ),
+            )
+            gr.Markdown(
+                "<small>💡 **Llama** & **Gemma** models may require a Hugging Face login "
+                "(`huggingface-cli login`) or a Token to download.</small>"
+            )
+            token_box = gr.Textbox(
+                label="🔑 HF Token (optional)",
+                info="Required for gated models. Your token stays private.",
+                type="password",
+                placeholder="hf_...",
+            )
+            btn = gr.Button(
+                "⚡ Generate Study Materials",
+                variant="primary",
+                size="lg",
+                elem_classes=["generate-btn"],
+            )
+    gr.HTML("<hr style='margin:8px 0; border-color:rgba(99,102,241,0.2)'>")
+    # ── Output Tabs ──
+    with gr.Tabs():
+        with gr.TabItem("📖 Lesson Material"):
+            lesson = gr.Markdown(value="*Results will appear here after generation.*")
+        with gr.TabItem("❓ Q & A"):
+            qa = gr.Markdown(value="*Results will appear here after generation.*")
+        with gr.TabItem("✅ MCQs"):
+            mcq = gr.Markdown(value="*Results will appear here after generation.*")
+        with gr.TabItem("📝 Summary"):
+            summary = gr.Markdown(value="*Results will appear here after generation.*")
+        with gr.TabItem("📊 Cheat Sheet"):
+            cheat = gr.Markdown(value="*Results will appear here after generation.*")
+    # ── Footer ──
+    gr.HTML("""
+    <div style='text-align:center; color:#64748b; font-size:0.8rem; margin-top:12px;'>
+        Built with 🤗 Gradio · Hugging Face Transformers — 100% open-source · runs offline on CPU
+    </div>
+    """)
+    # ── Wire up button ──
+    btn.click(
+        fn=generate_content,
+        inputs=[text_input, file_input, model_selector, token_box],
+        outputs=[lesson, qa, mcq, summary, cheat],
+    )
+demo.launch(
+    theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"),
+    css=CSS,
+)