import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer import spaces MODEL_NAME = "ubiodee/Plutus_Tutor_new" # ------------ Tokenizer cache ------------ _TOKENIZER = None def get_tokenizer(): global _TOKENIZER if _TOKENIZER is None: tok = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True) # Ensure pad/eos exist to avoid generation crashes if tok.pad_token_id is None: if tok.eos_token_id is not None: tok.pad_token = tok.eos_token elif tok.bos_token_id is not None: tok.pad_token = tok.bos_token else: tok.add_special_tokens({"pad_token": "[PAD]"}) _TOKENIZER = tok return _TOKENIZER # ------------ Prompt builder ------------ def build_instructions(personality, level, topic): return ( f"You are a friendly Plutus AI tutor for a {personality} learner at {level} level.\n" f"Topic: {topic}\n\n" "Explain in a conversational, easy tone with concrete examples.\n" "Keep it complete and around 120–160 words.\n" "End with a one-line takeaway starting with 'Takeaway:'." ) def build_model_input(tokenizer, personality, level, topic): user_msg = build_instructions(personality, level, topic) if hasattr(tokenizer, "apply_chat_template"): messages = [ {"role": "system", "content": "You are a helpful Cardano Plutus tutor."}, {"role": "user", "content": user_msg}, ] prompt_str = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) return prompt_str else: return ( "System: You are a helpful Cardano Plutus tutor.\n\n" f"User: {user_msg}\n\nAssistant:" ) # ------------ GPU/CPU generation ------------ @spaces.GPU def generate_on_gpu(personality, level, topic, max_new_tokens=100, min_new_tokens=32): # Log GPU availability for debugging print(f"CUDA available: {torch.cuda.is_available()}") if torch.cuda.is_available(): print(f"GPU device: {torch.cuda.get_device_name(0)}") tokenizer = get_tokenizer() prompt = build_model_input(tokenizer, personality, level, topic) try: # Try loading model on GPU with 4-bit quantization model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, load_in_4bit=True, device_map="auto", ) device = next(model.parameters()).device except Exception as e: print(f"GPU loading failed: {e}. Falling back to CPU.") # Fallback to CPU with FP16 model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float16, device_map="cpu", ) device = torch.device("cpu") model.eval() inputs = tokenizer(prompt, return_tensors="pt") input_len = inputs["input_ids"].shape[1] inputs = {k: v.to(device) for k, v in inputs.items()} with torch.inference_mode(): outputs = model.generate( **inputs, max_new_tokens=max_new_tokens, min_new_tokens=min_new_tokens, temperature=0.5, top_p=0.95, do_sample=True, repetition_penalty=1.05, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.pad_token_id, ) # Decode and clean up gen_ids = outputs[0][input_len:] text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip() if not text: text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip() if text.startswith(prompt): text = text[len(prompt):].lstrip() # Cleanup try: del model if torch.cuda.is_available(): torch.cuda.empty_cache() except Exception: pass return text if text else "Generation failed. Try regenerating or adjusting parameters." # ------------ Orchestrator with retry logic ------------ def orchestrator(personality, level, topic, max_retries=3): if not personality or not level or not topic: return "Select your personality, expertise, and topic to get a tailored explanation." for attempt in range(max_retries): try: return generate_on_gpu(personality, level, topic) except Exception as e: print(f"[Attempt {attempt + 1}/{max_retries}] ZeroGPU error: {type(e).__name__}: {e}") if attempt == max_retries - 1: return ( "GPU was not available after multiple attempts. " "Click **Regenerate** or try again later." ) # ------------ Gradio UI ------------ with gr.Blocks(theme="default") as iface: gr.Markdown( "## Cardano Plutus AI Assistant\n" "Pick your **Learning Personality**, **Expertise Level**, and **Topic**, then click **Generate**." ) with gr.Row(): personality = gr.Dropdown( choices=["Dyslexic", "Autistic", "Expressive"], label="Learning Personality", value=None, allow_custom_value=False, scale=1, ) level = gr.Dropdown( choices=["Beginner", "Intermediate", "Advanced"], label="Expertise Level", value=None, allow_custom_value=False, scale=1, ) topic = gr.Dropdown( choices=[ "Plutus Basics", "Smart Contracts", "Cardano Blockchain", "Validator Scripts", "Plutus Tx", "Datum and Redeemer", "Time Handling in Plutus", "Off-Chain Code", "On-Chain Constraints", "Plutus Core", "Transaction Validation", "Cardano Node Integration", ], label="Topic", value=None, allow_custom_value=False, scale=2, ) with gr.Row(): generate_btn = gr.Button("Generate") regen = gr.Button("🔁 Regenerate") output = gr.Textbox( label="Model Response", lines=12, interactive=False, show_copy_button=True, placeholder="Your tailored explanation will appear here…", ) generate_btn.click(orchestrator, [personality, level, topic], output, queue=True) regen.click(orchestrator, [personality, level, topic], output, queue=True) # Enable queue iface.queue() if __name__ == "__main__": iface.launch(server_name="0.0.0.0", server_port=7860)