Spaces:

Barat123
/

STASS

Sleeping

App Files Files Community

Update app.py

by Harikrishna-Srinivasan - opened Mar 10

base: refs/heads/main

←

from: refs/pr/6

Discussion Files changed

+121

-165

Files changed (1) hide show

app.py +121 -165

app.py CHANGED Viewed

@@ -7,13 +7,32 @@ import threading
 import pathlib
 import os
 # --------------------------------------------------
 # FILE TEXT EXTRACTION
 # --------------------------------------------------
-SUPPORTED_EXT = (".pdf",".docx",".txt",".png",".jpg",".jpeg",".webp",".bmp",".tiff")
 def extract_text_from_file(filepath):
     if not filepath:
         return ""
@@ -23,6 +42,7 @@ def extract_text_from_file(filepath):
     ext = pathlib.Path(filepath).suffix.lower()
     try:
         if ext == ".pdf":
             doc = fitz.open(filepath)
             text = []
@@ -39,9 +59,11 @@ def extract_text_from_file(filepath):
                 return f.read()
         elif ext in (".png",".jpg",".jpeg",".webp",".bmp",".tiff"):
             try:
                 img = Image.open(filepath)
                 return pytesseract.image_to_string(img)
             except Exception as e:
                 return "OCR failed: " + str(e)
@@ -51,42 +73,34 @@ def extract_text_from_file(filepath):
     except Exception as e:
         return "Could not read file: " + str(e)
 # --------------------------------------------------
-# MODELS  (verified HuggingFace IDs)
 # --------------------------------------------------
 MODELS = {
-    # < 1GB: Quick slide summaries or vocab lists
     "Gemma 3 270M [0.6GB | Lightning-fast Edge]": "google/gemma-3-270m-it",
     "Qwen 3 0.6B GGUF [0.5GB | Classroom Assistant]": "Qwen/Qwen3-0.6B-GGUF",
     "TinyLlama 1.1B [0.5GB]": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    # 1-3GB: Smart mobile & basic assistants
     "Qwen 3.5 2B [2.4GB | The Student Tutor]": "Qwen/Qwen3.5-2B",
     "Phi-4 Mini [1.8GB | Logical Powerhouse]": "microsoft/Phi-4-mini-instruct",
     "Gemma 3 1B [2.1GB | Stable & Coherent]": "google/gemma-3-1b-it",
-    # 3-8GB: The "Daily Driver" sweet spot
     "Qwen 3.5 9B [7.8GB | BEST FOR LESSON PLANS]": "Qwen/Qwen3.5-9B",
     "Llama 3.1 8B [5.2GB | Industry Standard]": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "Mistral Small 3 [7.1GB | Concise & Accurate]": "mistralai/Mistral-Small-3-Instruct",
     "Gemma 3 9B [6.3GB | Creative & Safe]": "google/gemma-3-9b-it",
-    # 8-12GB: Enhanced reasoning for complex curricula
-    "Qwen 3.5 35B-A3B [11.5GB | Elite Pedagogy MoE]": "Qwen/Qwen3.5-35B-A3B",
     "Mistral Small 12B [9.5GB | Perfect VRAM Balance]": "mistralai/Mistral-Nemo-Instruct-2407",
-    # 12-20GB: Professional grade logic
     "Qwen 3.5 27B [18GB | Dense Curriculum Architect]": "Qwen/Qwen3.5-27B",
-    "DeepSeek V3 Lite 21B [16.0GB | Academic Beast]": "deepseek-ai/DeepSeek-V3-Lite",
-    # > 20GB: The Frontier models
-    "Qwen 3.5 397B-A17B [75GB+ | Full Textbook Author]": "Qwen/Qwen3.5-397B-A17B",
-    "GPT-OSS 120B [72GB+ | SOTA Logic & Coding]": "openai/gpt-oss-120b"
 }
 ALL_MODEL_NAMES = list(MODELS.keys())
 # --------------------------------------------------
 # PIPELINE CACHE
 # --------------------------------------------------
@@ -94,25 +108,33 @@ ALL_MODEL_NAMES = list(MODELS.keys())
 _pipeline_cache = {}
 _pipeline_lock = threading.Lock()
 def get_pipeline(model_id, hf_token):
     from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
     with _pipeline_lock:
         if model_id not in _pipeline_cache:
             try:
-                token = hf_token.strip() if hf_token else None
                 tokenizer = AutoTokenizer.from_pretrained(
                     model_id,
-                    token=token,
-                    trust_remote_code=True
                 )
                 model = AutoModelForCausalLM.from_pretrained(
                     model_id,
-                    token=token,
-                    trust_remote_code=True,
-                    device_map="cpu"
                 )
                 pipe = pipeline(
@@ -128,21 +150,28 @@ def get_pipeline(model_id, hf_token):
     return _pipeline_cache[model_id], None
 # --------------------------------------------------
 # INFERENCE
 # --------------------------------------------------
 SYSTEM_MSG = "You are an expert educational assistant. Use markdown."
-def ask_llm(model_label, prompt, hf_token=os.getenv("hgface_tok")):
     model_id = MODELS[model_label]
-    pipe,err = get_pipeline(model_id, hf_token)
     if err:
         return "Model load error:\n" + err
     try:
         combined = SYSTEM_MSG + "\n\n" + prompt
         out = pipe(
             combined,
             max_new_tokens=2048,
@@ -153,7 +182,6 @@ def ask_llm(model_label, prompt, hf_token=os.getenv("hgface_tok")):
         text = out[0]["generated_text"]
-        # Remove prompt from output if echoed
         if text.startswith(combined):
             text = text[len(combined):]
@@ -162,19 +190,32 @@ def ask_llm(model_label, prompt, hf_token=os.getenv("hgface_tok")):
     except Exception as e:
         return "Inference error:\n" + str(e)
 # --------------------------------------------------
 # PROMPTS
 # --------------------------------------------------
 def make_prompts(topic):
     return {
-        "lesson": "Create a lesson plan with headings and bullet points.\n\nTopic:\n"+topic,
-        "qa": "Generate 10 exam questions with answers.\n\nTopic:\n"+topic,
-        "mcq": "Generate 10 MCQs with 4 options and answers.\n\nTopic:\n"+topic,
-        "summary": "Summarize the topic in 250-300 words.\n\nTopic:\n"+topic,
-        "infographic": "Create a cheat sheet using tables and bullet points.\n\nTopic:\n"+topic
     }
 def generate_content(text, file, model_label, token):
     file_text = extract_text_from_file(file) if file else ""
@@ -185,175 +226,90 @@ def generate_content(text, file, model_label, token):
         return
     prompts = make_prompts(syllabus)
     WAIT = "Generating..."
-    results = [WAIT, WAIT, WAIT, WAIT, WAIT]
     yield tuple(results)
     order = ["lesson","qa","mcq","summary","infographic"]
-    for i,key in enumerate(order):
         res = ask_llm(model_label, prompts[key], token)
         results[i] = res
         yield tuple(results)
 # --------------------------------------------------
 # UI
 # --------------------------------------------------
 CSS = """
-@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
-body, .gradio-container {
-    font-family: 'Inter', sans-serif !important;
-}
-.app-header {
-    background: linear-gradient(135deg,
-#1a1a2e 0%,
-#16213e 50%,
-#0f3460 100%);
-    border-radius: 16px;
-    padding: 28px 32px;
-    margin-bottom: 8px;
-    border: 1px solid rgba(99,102,241,0.3);
-}
-.app-header h1 {
-    font-size: 2rem !important;
-    font-weight: 700 !important;
-    background: linear-gradient(90deg,
-#818cf8,
-#c084fc,
-#38bdf8);
-    -webkit-background-clip: text;
-    -webkit-text-fill-color: transparent;
-    margin-bottom: 6px !important;
-}
-.app-header p {
-    color:
-#94a3b8 !important;
-    font-size: 0.95rem;
-}
-.generate-btn {
-    background: linear-gradient(135deg,
-#6366f1,
-#8b5cf6) !important;
-    border: none !important;
-    border-radius: 12px !important;
-    font-weight: 600 !important;
-    font-size: 1rem !important;
-    transition: all 0.2s ease !important;
-    width: 100% !important;
-}
-.generate-btn:hover {
-    transform: translateY(-2px) !important;
-    box-shadow: 0 8px 25px rgba(99,102,241,0.4) !important;
 }
 """
 with gr.Blocks() as demo:
-    # ── Header ──
-    gr.HTML("""
-    <div class="app-header">
-      <h1>🎓 AI Study Material Generator</h1>
-      <p>Generate lesson notes, Q&amp;A, MCQs, a Mindmap, and a Cheat Sheet from any topic
-         or syllabus — using SOTA open-source LLMs running entirely on your CPU via transformers.pipeline. No API key needed.</p>
-    </div>
-    """)
-    # ── Input Row ──
-    with gr.Row(equal_height=False):
-        # Left: syllabus input (paste OR upload)
-        with gr.Column(scale=4):
-            with gr.Tabs():
-                with gr.TabItem("✏️ Paste Text"):
-                    text_input = gr.Textbox(
-                        show_label=False,
-                        placeholder=(
-                            "Paste your syllabus, topic, or any content here…\n"
-                            "e.g. The Water Cycle, Neural Networks, World War II, Photosynthesis"
-                        ),
-                        lines=7,
-                    )
-                with gr.TabItem("📂 Upload File"):
-                    gr.Markdown(
-                        "Upload a **PDF**, **Word (.docx)**, **plain text (.txt)**, "
-                        "or **image** (PNG / JPG / WEBP) — text is extracted automatically."
-                    )
-                    file_input = gr.File(
-                        label="Upload syllabus file",
-                        file_types=[".pdf", ".docx", ".doc", ".txt",
-                                    ".png", ".jpg", ".jpeg", ".webp", ".bmp"],
-                        file_count="single",
-                    )
-                    file_preview = gr.Textbox(
-                        label="Extracted text preview",
-                        lines=4,
-                        interactive=False,
-                        placeholder="Text extracted from the file will appear here…",
-                    )
-                    # Live preview when file is uploaded
-                    file_input.change(
-                        fn=lambda f: extract_text_from_file(f) if f else "",
-                        inputs=file_input,
-                        outputs=file_preview,
-                    )
-        # Right: model selector + generate button
-        with gr.Column(scale=2):
             model_selector = gr.Dropdown(
                 choices=ALL_MODEL_NAMES,
                 value=ALL_MODEL_NAMES[0],
-                label="🤖 Model  (all run locally via pipeline)",
-                info=(
-                    "Tier 1 = fastest / least RAM. "
-                    "Tier 3 = best quality / needs 6–8 GB RAM. "
-                    "Models download on first use."
-                ),
             )
             token_box = gr.Textbox(
-                label="🔑 HF Token (optional)",
-                info="Required for gated models. Your token stays private.",
-                type="password",
-                placeholder="hf_...",
-            )
-            btn = gr.Button(
-                "⚡ Generate Study Materials",
-                variant="primary",
-                size="lg",
-                elem_classes=["generate-btn"],
             )
-    gr.HTML("<hr style='margin:8px 0; border-color:rgba(99,102,241,0.2)'>")
-    # ── Output Tabs ──
     with gr.Tabs():
-        with gr.TabItem("📖 Lesson Plan"):
-            lesson = gr.Markdown(value="*Results will appear here after generation.*")
-        with gr.TabItem("❓ Q & A"):
-            qa = gr.Markdown(value="*Results will appear here after generation.*")
-        with gr.TabItem("✅ MCQs"):
-            mcq = gr.Markdown(value="*Results will appear here after generation.*")
-        with gr.TabItem("📝 Summary"):
-            summary = gr.Markdown(value="*Results will appear here after generation.*")
-        with gr.TabItem("📊 Cheat Sheet"):
-            cheat = gr.Markdown(value="*Results will appear here after generation.*")
-    # ── Footer ──
-    gr.HTML("""
-    <div style='text-align:center; color:
-#64748b; font-size:0.8rem; margin-top:12px;'>
-        Built with 🤗 Gradio · Hugging Face Transformers — 100% open-source · runs offline on CPU
-    </div>
-    """)
-    # ── Wire up button ──
     btn.click(
         fn=generate_content,
-        inputs=[text_input, file_input, model_selector, token_box],
-        outputs=[lesson, qa, mcq, summary, cheat],
     )
 demo.launch(
-    theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="purple"),
-    css=CSS,
 )

 import pathlib
 import os
+# --------------------------------------------------
+# TOKEN RESOLUTION
+# --------------------------------------------------
+def resolve_token(ui_token):
+    if ui_token.strip():
+        return ui_token.strip()
+    env_token = os.getenv("hgface_tok")
+    if env_token:
+        return env_token.strip()
+    return ""
 # --------------------------------------------------
 # FILE TEXT EXTRACTION
 # --------------------------------------------------
+SUPPORTED_EXT = (
+    ".pdf",".docx",".txt",".png",".jpg",".jpeg",".webp",".bmp",".tiff"
+)
 def extract_text_from_file(filepath):
     if not filepath:
         return ""
     ext = pathlib.Path(filepath).suffix.lower()
     try:
         if ext == ".pdf":
             doc = fitz.open(filepath)
             text = []
                 return f.read()
         elif ext in (".png",".jpg",".jpeg",".webp",".bmp",".tiff"):
             try:
                 img = Image.open(filepath)
                 return pytesseract.image_to_string(img)
             except Exception as e:
                 return "OCR failed: " + str(e)
     except Exception as e:
         return "Could not read file: " + str(e)
 # --------------------------------------------------
+# MODELS
 # --------------------------------------------------
 MODELS = {
     "Gemma 3 270M [0.6GB | Lightning-fast Edge]": "google/gemma-3-270m-it",
     "Qwen 3 0.6B GGUF [0.5GB | Classroom Assistant]": "Qwen/Qwen3-0.6B-GGUF",
     "TinyLlama 1.1B [0.5GB]": "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
     "Qwen 3.5 2B [2.4GB | The Student Tutor]": "Qwen/Qwen3.5-2B",
     "Phi-4 Mini [1.8GB | Logical Powerhouse]": "microsoft/Phi-4-mini-instruct",
     "Gemma 3 1B [2.1GB | Stable & Coherent]": "google/gemma-3-1b-it",
     "Qwen 3.5 9B [7.8GB | BEST FOR LESSON PLANS]": "Qwen/Qwen3.5-9B",
     "Llama 3.1 8B [5.2GB | Industry Standard]": "meta-llama/Meta-Llama-3.1-8B-Instruct",
     "Mistral Small 3 [7.1GB | Concise & Accurate]": "mistralai/Mistral-Small-3-Instruct",
     "Gemma 3 9B [6.3GB | Creative & Safe]": "google/gemma-3-9b-it",
     "Mistral Small 12B [9.5GB | Perfect VRAM Balance]": "mistralai/Mistral-Nemo-Instruct-2407",
     "Qwen 3.5 27B [18GB | Dense Curriculum Architect]": "Qwen/Qwen3.5-27B",
 }
 ALL_MODEL_NAMES = list(MODELS.keys())
 # --------------------------------------------------
 # PIPELINE CACHE
 # --------------------------------------------------
 _pipeline_cache = {}
 _pipeline_lock = threading.Lock()
 def get_pipeline(model_id, hf_token):
     from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
     with _pipeline_lock:
         if model_id not in _pipeline_cache:
             try:
+                kwargs = {
+                    "trust_remote_code": True
+                }
+                if hf_token:
+                    kwargs["token"] = hf_token
                 tokenizer = AutoTokenizer.from_pretrained(
                     model_id,
+                    **kwargs
                 )
                 model = AutoModelForCausalLM.from_pretrained(
                     model_id,
+                    device_map="cpu",
+                    **kwargs
                 )
                 pipe = pipeline(
     return _pipeline_cache[model_id], None
 # --------------------------------------------------
 # INFERENCE
 # --------------------------------------------------
 SYSTEM_MSG = "You are an expert educational assistant. Use markdown."
+def ask_llm(model_label, prompt, hf_token=""):
+    token = resolve_token(hf_token)
     model_id = MODELS[model_label]
+    pipe, err = get_pipeline(model_id, token)
     if err:
         return "Model load error:\n" + err
     try:
         combined = SYSTEM_MSG + "\n\n" + prompt
         out = pipe(
             combined,
             max_new_tokens=2048,
         text = out[0]["generated_text"]
         if text.startswith(combined):
             text = text[len(combined):]
     except Exception as e:
         return "Inference error:\n" + str(e)
 # --------------------------------------------------
 # PROMPTS
 # --------------------------------------------------
 def make_prompts(topic):
     return {
+        "lesson":
+        "Create a lesson plan with headings and bullet points.\n\nTopic:\n"+topic,
+        "qa":
+        "Generate 10 exam questions with answers.\n\nTopic:\n"+topic,
+        "mcq":
+        "Generate 10 MCQs with 4 options and answers.\n\nTopic:\n"+topic,
+        "summary":
+        "Summarize the topic in 250-300 words.\n\nTopic:\n"+topic,
+        "infographic":
+        "Create a cheat sheet using tables and bullet points.\n\nTopic:\n"+topic
     }
 def generate_content(text, file, model_label, token):
     file_text = extract_text_from_file(file) if file else ""
         return
     prompts = make_prompts(syllabus)
     WAIT = "Generating..."
+    results = [WAIT,WAIT,WAIT,WAIT,WAIT]
     yield tuple(results)
     order = ["lesson","qa","mcq","summary","infographic"]
+    for i, key in enumerate(order):
         res = ask_llm(model_label, prompts[key], token)
         results[i] = res
         yield tuple(results)
 # --------------------------------------------------
 # UI
 # --------------------------------------------------
 CSS = """
+body,.gradio-container{
+font-family:Inter,sans-serif!important;
 }
 """
 with gr.Blocks() as demo:
+    gr.Markdown("# 🎓 AI Study Material Generator")
+    with gr.Row():
+        with gr.Column():
+            text_input = gr.Textbox(
+                placeholder="Paste syllabus or topic",
+                lines=6
+            )
+            file_input = gr.File(
+                label="Upload syllabus file"
+            )
+        with gr.Column():
             model_selector = gr.Dropdown(
                 choices=ALL_MODEL_NAMES,
                 value=ALL_MODEL_NAMES[0],
+                label="Model"
             )
             token_box = gr.Textbox(
+                label="HF Token (optional)",
+                type="password"
             )
+            btn = gr.Button("Generate")
     with gr.Tabs():
+        with gr.TabItem("Lesson"):
+            lesson = gr.Markdown()
+        with gr.TabItem("Q&A"):
+            qa = gr.Markdown()
+        with gr.TabItem("MCQ"):
+            mcq = gr.Markdown()
+        with gr.TabItem("Summary"):
+            summary = gr.Markdown()
+        with gr.TabItem("Cheat Sheet"):
+            cheat = gr.Markdown()
     btn.click(
         fn=generate_content,
+        inputs=[text_input,file_input,model_selector,token_box],
+        outputs=[lesson,qa,mcq,summary,cheat]
     )
 demo.launch(
+    theme=gr.themes.Soft(
+        primary_hue="indigo",
+        secondary_hue="purple"
+    ),
+    css=CSS
 )