import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer from peft import PeftModel import torch print("🔄 Loading CCISD TEKS Educational Assistant...") print(" This may take 1-2 minutes on first launch...") # Load base model + LoRA adapter BASE_MODEL = "mistralai/Mistral-7B-Instruct-v0.2" ADAPTER_MODEL = "robworks-software/ccisd-teks-educator-mistral7b" print(f"đŸ“Ĩ Loading base model: {BASE_MODEL}...") tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) # Check if GPU is available device = "cuda" if torch.cuda.is_available() else "cpu" print(f" Using device: {device}") # Load base model (use bfloat16 for CPU compatibility) model = AutoModelForCausalLM.from_pretrained( BASE_MODEL, torch_dtype=torch.bfloat16 if device == "cpu" else torch.float16, low_cpu_mem_usage=True ) print(f"🔧 Loading LoRA adapter: {ADAPTER_MODEL}...") model = PeftModel.from_pretrained(model, ADAPTER_MODEL) model = model.to(device) model.eval() # Set to evaluation mode print("✅ Model loaded successfully!") def chat(message, history): """ Generate response using Mistral chat format. Args: message: Current user message history: List of [user_msg, bot_msg] pairs Returns: Generated response string """ # Simplified prompt - just the current message prompt = f"[INST] You are a Texas TEKS educational expert. Answer this question clearly and helpfully:\n\n{message} [/INST]" # Tokenize inputs = tokenizer(prompt, return_tensors="pt").to(device) # Generate response with better parameters with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=300, min_new_tokens=50, temperature=0.8, top_p=0.95, top_k=50, do_sample=True, repetition_penalty=1.1, pad_token_id=tokenizer.eos_token_id, eos_token_id=tokenizer.eos_token_id, ) # Decode response response = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract answer after [/INST] if "[/INST]" in response: response = response.split("[/INST]")[-1].strip() # Clean up any remaining artifacts if response.startswith(message): response = response[len(message):].strip() return response # Create Gradio ChatInterface demo = gr.ChatInterface( fn=chat, title="🎓 CCISD TEKS Educational Assistant", description=""" **Powered by Mistral-7B fine-tuned on 4,224 enhanced CCISD TEKS examples** Ask questions about: - 📚 TEKS standard definitions and explanations - 👨‍đŸĢ Teaching strategies and lesson planning - 🌎 Real-world applications - ✅ Assessment and quiz generation - đŸŽ¯ Learning objectives and prerequisites - 💡 Common student misconceptions *Trained on Texas TEKS standards with real-world educational data.* """, examples=[ "What is TEKS standard MATH.A1.3.B?", "How should I teach the scientific method to 7th grade students?", "What are real-world applications of linear equations?", "Create a quiz question to assess understanding of photosynthesis", "What are common misconceptions students have about fractions?", "Explain ELAR.9.1.A and suggest a lesson activity", ], cache_examples=False, # Don't pre-compute example responses (too slow on CPU) theme="soft", chatbot=gr.Chatbot(height=500), textbox=gr.Textbox(placeholder="Ask me about Texas TEKS standards...", container=False, scale=7), retry_btn="🔄 Retry", undo_btn="â†Šī¸ Undo", clear_btn="đŸ—‘ī¸ Clear", ) # Launch the app if __name__ == "__main__": demo.launch()