Remostart's picture
Update app.py
d5aec37 verified
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import spaces
MODEL_NAME = "ubiodee/Plutus_Tutor_new"
# ------------ Tokenizer cache ------------
_TOKENIZER = None
def get_tokenizer():
global _TOKENIZER
if _TOKENIZER is None:
tok = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
# Ensure pad/eos exist to avoid generation crashes
if tok.pad_token_id is None:
if tok.eos_token_id is not None:
tok.pad_token = tok.eos_token
elif tok.bos_token_id is not None:
tok.pad_token = tok.bos_token
else:
tok.add_special_tokens({"pad_token": "[PAD]"})
_TOKENIZER = tok
return _TOKENIZER
# ------------ Prompt builder ------------
def build_instructions(personality, level, topic):
return (
f"You are a friendly Plutus AI tutor for a {personality} learner at {level} level.\n"
f"Topic: {topic}\n\n"
"Explain in a conversational, easy tone with concrete examples.\n"
"Keep it complete and around 120–160 words.\n"
"End with a one-line takeaway starting with 'Takeaway:'."
)
def build_model_input(tokenizer, personality, level, topic):
user_msg = build_instructions(personality, level, topic)
if hasattr(tokenizer, "apply_chat_template"):
messages = [
{"role": "system", "content": "You are a helpful Cardano Plutus tutor."},
{"role": "user", "content": user_msg},
]
prompt_str = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
return prompt_str
else:
return (
"System: You are a helpful Cardano Plutus tutor.\n\n"
f"User: {user_msg}\n\nAssistant:"
)
# ------------ GPU/CPU generation ------------
@spaces.GPU
def generate_on_gpu(personality, level, topic, max_new_tokens=100, min_new_tokens=32):
# Log GPU availability for debugging
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
print(f"GPU device: {torch.cuda.get_device_name(0)}")
tokenizer = get_tokenizer()
prompt = build_model_input(tokenizer, personality, level, topic)
try:
# Try loading model on GPU with 4-bit quantization
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
load_in_4bit=True,
device_map="auto",
)
device = next(model.parameters()).device
except Exception as e:
print(f"GPU loading failed: {e}. Falling back to CPU.")
# Fallback to CPU with FP16
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16,
device_map="cpu",
)
device = torch.device("cpu")
model.eval()
inputs = tokenizer(prompt, return_tensors="pt")
input_len = inputs["input_ids"].shape[1]
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.inference_mode():
outputs = model.generate(
**inputs,
max_new_tokens=max_new_tokens,
min_new_tokens=min_new_tokens,
temperature=0.5,
top_p=0.95,
do_sample=True,
repetition_penalty=1.05,
eos_token_id=tokenizer.eos_token_id,
pad_token_id=tokenizer.pad_token_id,
)
# Decode and clean up
gen_ids = outputs[0][input_len:]
text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
if not text:
text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
if text.startswith(prompt):
text = text[len(prompt):].lstrip()
# Cleanup
try:
del model
if torch.cuda.is_available():
torch.cuda.empty_cache()
except Exception:
pass
return text if text else "Generation failed. Try regenerating or adjusting parameters."
# ------------ Orchestrator with retry logic ------------
def orchestrator(personality, level, topic, max_retries=3):
if not personality or not level or not topic:
return "Select your personality, expertise, and topic to get a tailored explanation."
for attempt in range(max_retries):
try:
return generate_on_gpu(personality, level, topic)
except Exception as e:
print(f"[Attempt {attempt + 1}/{max_retries}] ZeroGPU error: {type(e).__name__}: {e}")
if attempt == max_retries - 1:
return (
"GPU was not available after multiple attempts. "
"Click **Regenerate** or try again later."
)
# ------------ Gradio UI ------------
with gr.Blocks(theme="default") as iface:
gr.Markdown(
"## Cardano Plutus AI Assistant\n"
"Pick your **Learning Personality**, **Expertise Level**, and **Topic**, then click **Generate**."
)
with gr.Row():
personality = gr.Dropdown(
choices=["Dyslexic", "Autistic", "Expressive"],
label="Learning Personality",
value=None,
allow_custom_value=False,
scale=1,
)
level = gr.Dropdown(
choices=["Beginner", "Intermediate", "Advanced"],
label="Expertise Level",
value=None,
allow_custom_value=False,
scale=1,
)
topic = gr.Dropdown(
choices=[
"Plutus Basics",
"Smart Contracts",
"Cardano Blockchain",
"Validator Scripts",
"Plutus Tx",
"Datum and Redeemer",
"Time Handling in Plutus",
"Off-Chain Code",
"On-Chain Constraints",
"Plutus Core",
"Transaction Validation",
"Cardano Node Integration",
],
label="Topic",
value=None,
allow_custom_value=False,
scale=2,
)
with gr.Row():
generate_btn = gr.Button("Generate")
regen = gr.Button("🔁 Regenerate")
output = gr.Textbox(
label="Model Response",
lines=12,
interactive=False,
show_copy_button=True,
placeholder="Your tailored explanation will appear here…",
)
generate_btn.click(orchestrator, [personality, level, topic], output, queue=True)
regen.click(orchestrator, [personality, level, topic], output, queue=True)
# Enable queue
iface.queue()
if __name__ == "__main__":
iface.launch(server_name="0.0.0.0", server_port=7860)