Spaces:
Running
Running
| from docx import Document | |
| import pytesseract | |
| from PIL import Image | |
| import fitz | |
| import gradio as gr | |
| import threading | |
| import pathlib | |
| import os | |
| # -------------------------------------------------- | |
| # TOKEN RESOLUTION | |
| # -------------------------------------------------- | |
| def resolve_token(ui_token): | |
| if ui_token.strip(): | |
| return ui_token.strip() | |
| env_token = os.getenv("hgface_tok") | |
| if env_token: | |
| return env_token.strip() | |
| return "" | |
| # -------------------------------------------------- | |
| # FILE TEXT EXTRACTION | |
| # -------------------------------------------------- | |
| SUPPORTED_EXT = ( | |
| ".pdf", ".docx", ".txt", ".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff" | |
| ) | |
| def extract_text_from_file(filepath): | |
| if not filepath: | |
| return "" | |
| if hasattr(filepath,"name"): | |
| filepath = filepath.name | |
| ext = pathlib.Path(filepath).suffix.lower() | |
| try: | |
| if ext == ".pdf": | |
| doc = fitz.open(filepath) | |
| text = [] | |
| for page in doc: | |
| text.append(page.get_text()) | |
| return "\n".join(text) | |
| elif ext == ".docx": | |
| doc = Document(filepath) | |
| return "\n".join(p.text for p in doc.paragraphs) | |
| elif ext == ".txt": | |
| with open(filepath,"r", encoding="utf-8", errors="ignore") as f: | |
| return f.read() | |
| elif ext in (".png", ".jpg", ".jpeg", ".webp", ".bmp", ".tiff"): | |
| try: | |
| img = Image.open(filepath) | |
| return pytesseract.image_to_string(img) | |
| except Exception as e: | |
| return "OCR failed: " + str(e) | |
| else: | |
| return "Unsupported file type: " + ext | |
| except Exception as e: | |
| return "Could not read file: " + str(e) | |
| # -------------------------------------------------- | |
| # MODELS | |
| # -------------------------------------------------- | |
| MODELS = { | |
| "Gemma 3 270M [0.6GB | Lightning-fast Edge]": "google/gemma-3-270m-it", | |
| "Qwen 3 0.6B GGUF [0.5GB | Classroom Assistant]": "Qwen/Qwen3-0.6B-GGUF", | |
| "TinyLlama 1.1B [0.5GB]": "TinyLlama/TinyLlama-1.1B-Chat-v1.0", | |
| "Qwen 3.5 2B [2.4GB | The Student Tutor]": "Qwen/Qwen3.5-2B", | |
| "Phi-4 Mini [1.8GB | Logical Powerhouse]": "microsoft/Phi-4-mini-instruct", | |
| "Gemma 3 1B [2.1GB | Stable & Coherent]": "google/gemma-3-1b-it", | |
| "Qwen 3.5 9B [7.8GB | BEST FOR LESSON PLANS]": "Qwen/Qwen3.5-9B", | |
| "Llama 3.1 8B [5.2GB | Industry Standard]": "meta-llama/Meta-Llama-3.1-8B-Instruct", | |
| "Mistral Small 3 [7.1GB | Concise & Accurate]": "mistralai/Mistral-Small-3-Instruct", | |
| "Gemma 3 9B [6.3GB | Creative & Safe]": "google/gemma-3-9b-it", | |
| "Mistral Small 12B [9.5GB | Perfect VRAM Balance]": "mistralai/Mistral-Nemo-Instruct-2407", | |
| "Qwen 3.5 27B [18GB | Dense Curriculum Architect]": "Qwen/Qwen3.5-27B", | |
| } | |
| ALL_MODEL_NAMES = list(MODELS.keys()) | |
| # -------------------------------------------------- | |
| # PIPELINE CACHE | |
| # -------------------------------------------------- | |
| _pipeline_cache = {} | |
| _pipeline_lock = threading.Lock() | |
| def get_pipeline(model_id, hf_token): | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| with _pipeline_lock: | |
| if model_id not in _pipeline_cache: | |
| try: | |
| kwargs = { | |
| "trust_remote_code": True | |
| } | |
| if hf_token: | |
| kwargs["token"] = hf_token | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| model_id, | |
| **kwargs | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| device_map="cpu", | |
| **kwargs | |
| ) | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer | |
| ) | |
| _pipeline_cache[model_id] = pipe | |
| except Exception as e: | |
| return None, str(e) | |
| return _pipeline_cache[model_id], None | |
| # -------------------------------------------------- | |
| # INFERENCE | |
| # -------------------------------------------------- | |
| SYSTEM_MSG = "You are an expert educational assistant. Use markdown." | |
| def ask_llm(model_label, prompt, hf_token=""): | |
| token = resolve_token(hf_token) | |
| model_id = MODELS[model_label] | |
| pipe, err = get_pipeline(model_id, token) | |
| if err: | |
| return "Model load error:\n" + err | |
| try: | |
| combined = SYSTEM_MSG + "\n\n" + prompt | |
| out = pipe( | |
| combined, | |
| max_new_tokens=2048, | |
| do_sample=True, | |
| temperature=0.6, | |
| top_p=0.9, | |
| repetition_penalty=1.15, | |
| no_repeat_ngram_size=3 | |
| ) | |
| text = out[0]["generated_text"] | |
| if text.startswith(combined): | |
| text = text[len(combined):] | |
| return text.strip() | |
| except Exception as e: | |
| return "Inference error:\n" + str(e) | |
| # -------------------------------------------------- | |
| # PROMPTS | |
| # -------------------------------------------------- | |
| def make_prompts(topic): | |
| return { | |
| "lesson": | |
| "Create a structured lesson plan for classroom teaching.\n" | |
| "Include:\n" | |
| "- Learning objectives\n" | |
| "- Introduction\n" | |
| "- Concept explanation\n" | |
| "- Examples\n" | |
| "- Case study\n" | |
| "- Classroom activity\n" | |
| "- Assessment\n\n" | |
| "Topic:\n"+topic, | |
| "qa": | |
| "Generate 10 exam questions with answers.\n\nTopic:\n"+topic, | |
| "mcq": | |
| "Generate 10 MCQs with 4 options and answers.\n\nTopic:\n"+topic, | |
| "summary": | |
| "Summarize the topic in 250-300 words.\n\nTopic:\n"+topic, | |
| } | |
| def generate_content(text, file, model_label, token): | |
| file_text = extract_text_from_file(file) if file else "" | |
| syllabus = (text + "\n\n" + file_text).strip() | |
| if not syllabus: | |
| yield ("Provide topic or file","","","","") | |
| return | |
| prompts = make_prompts(syllabus) | |
| WAIT = "Generating..." | |
| results = [WAIT,WAIT,WAIT,WAIT,WAIT] | |
| yield tuple(results) | |
| order = ["lesson", "qa", "mcq", "summary"] | |
| for i, key in enumerate(order): | |
| res = ask_llm(model_label, prompts[key], token) | |
| results[i] = res | |
| yield tuple(results) | |
| # -------------------------------------------------- | |
| # UI | |
| # -------------------------------------------------- | |
| CSS = """ | |
| body,.gradio-container{ | |
| font-family:Inter,sans-serif!important; | |
| } | |
| """ | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# 🎓 AI Study Material Generator") | |
| with gr.Row(): | |
| with gr.Column(): | |
| text_input = gr.Textbox( | |
| placeholder="Paste syllabus or topic", | |
| lines=6 | |
| ) | |
| file_input = gr.File( | |
| label="Upload syllabus file" | |
| ) | |
| with gr.Column(): | |
| model_selector = gr.Dropdown( | |
| choices=ALL_MODEL_NAMES, | |
| value=ALL_MODEL_NAMES[0], | |
| label="Model" | |
| ) | |
| token_box = gr.Textbox( | |
| label="HF Token (optional)", | |
| type="password" | |
| ) | |
| btn = gr.Button("Generate") | |
| with gr.Tabs(): | |
| with gr.TabItem("Lesson Plan"): | |
| lesson = gr.Markdown() | |
| with gr.TabItem("Q&A"): | |
| qa = gr.Markdown() | |
| with gr.TabItem("MCQ"): | |
| mcq = gr.Markdown() | |
| with gr.TabItem("Summary"): | |
| summary = gr.Markdown() | |
| btn.click( | |
| fn=generate_content, | |
| inputs=[text_input,file_input,model_selector,token_box], | |
| outputs=[lesson, qa, mcq, summary] | |
| ) | |
| demo.launch( | |
| theme=gr.themes.Soft( | |
| primary_hue="indigo", | |
| secondary_hue="purple" | |
| ), | |
| css=CSS | |
| ) |