Spaces:

Remostart
/

Plutus_PersonalisedTutor

Sleeping

App Files Files Community

Plutus_PersonalisedTutor / app.py

Remostart

Update app.py

d5aec37 verified 4 months ago

raw

history blame contribute delete

6.72 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import spaces

	MODEL_NAME = "ubiodee/Plutus_Tutor_new"

	# ------------ Tokenizer cache ------------
	_TOKENIZER = None
	def get_tokenizer():
	global _TOKENIZER
	if _TOKENIZER is None:
	tok = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
	# Ensure pad/eos exist to avoid generation crashes
	if tok.pad_token_id is None:
	if tok.eos_token_id is not None:
	tok.pad_token = tok.eos_token
	elif tok.bos_token_id is not None:
	tok.pad_token = tok.bos_token
	else:
	tok.add_special_tokens({"pad_token": "[PAD]"})
	_TOKENIZER = tok
	return _TOKENIZER

	# ------------ Prompt builder ------------
	def build_instructions(personality, level, topic):
	return (
	f"You are a friendly Plutus AI tutor for a {personality} learner at {level} level.\n"
	f"Topic: {topic}\n\n"
	"Explain in a conversational, easy tone with concrete examples.\n"
	"Keep it complete and around 120–160 words.\n"
	"End with a one-line takeaway starting with 'Takeaway:'."
	)

	def build_model_input(tokenizer, personality, level, topic):
	user_msg = build_instructions(personality, level, topic)

	if hasattr(tokenizer, "apply_chat_template"):
	messages = [
	{"role": "system", "content": "You are a helpful Cardano Plutus tutor."},
	{"role": "user", "content": user_msg},
	]
	prompt_str = tokenizer.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)
	return prompt_str
	else:
	return (
	"System: You are a helpful Cardano Plutus tutor.\n\n"
	f"User: {user_msg}\n\nAssistant:"
	)

	# ------------ GPU/CPU generation ------------
	@spaces.GPU
	def generate_on_gpu(personality, level, topic, max_new_tokens=100, min_new_tokens=32):
	# Log GPU availability for debugging
	print(f"CUDA available: {torch.cuda.is_available()}")
	if torch.cuda.is_available():
	print(f"GPU device: {torch.cuda.get_device_name(0)}")

	tokenizer = get_tokenizer()
	prompt = build_model_input(tokenizer, personality, level, topic)

	try:
	# Try loading model on GPU with 4-bit quantization
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	load_in_4bit=True,
	device_map="auto",
	)
	device = next(model.parameters()).device
	except Exception as e:
	print(f"GPU loading failed: {e}. Falling back to CPU.")
	# Fallback to CPU with FP16
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float16,
	device_map="cpu",
	)
	device = torch.device("cpu")

	model.eval()
	inputs = tokenizer(prompt, return_tensors="pt")
	input_len = inputs["input_ids"].shape[1]
	inputs = {k: v.to(device) for k, v in inputs.items()}

	with torch.inference_mode():
	outputs = model.generate(
	**inputs,
	max_new_tokens=max_new_tokens,
	min_new_tokens=min_new_tokens,
	temperature=0.5,
	top_p=0.95,
	do_sample=True,
	repetition_penalty=1.05,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.pad_token_id,
	)

	# Decode and clean up
	gen_ids = outputs[0][input_len:]
	text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
	if not text:
	text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
	if text.startswith(prompt):
	text = text[len(prompt):].lstrip()

	# Cleanup
	try:
	del model
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	except Exception:
	pass

	return text if text else "Generation failed. Try regenerating or adjusting parameters."

	# ------------ Orchestrator with retry logic ------------
	def orchestrator(personality, level, topic, max_retries=3):
	if not personality or not level or not topic:
	return "Select your personality, expertise, and topic to get a tailored explanation."

	for attempt in range(max_retries):
	try:
	return generate_on_gpu(personality, level, topic)
	except Exception as e:
	print(f"[Attempt {attempt + 1}/{max_retries}] ZeroGPU error: {type(e).__name__}: {e}")
	if attempt == max_retries - 1:
	return (
	"GPU was not available after multiple attempts. "
	"Click Regenerate or try again later."
	)

	# ------------ Gradio UI ------------
	with gr.Blocks(theme="default") as iface:
	gr.Markdown(
	"## Cardano Plutus AI Assistant\n"
	"Pick your Learning Personality, Expertise Level, and Topic, then click Generate."
	)

	with gr.Row():
	personality = gr.Dropdown(
	choices=["Dyslexic", "Autistic", "Expressive"],
	label="Learning Personality",
	value=None,
	allow_custom_value=False,
	scale=1,
	)
	level = gr.Dropdown(
	choices=["Beginner", "Intermediate", "Advanced"],
	label="Expertise Level",
	value=None,
	allow_custom_value=False,
	scale=1,
	)
	topic = gr.Dropdown(
	choices=[
	"Plutus Basics",
	"Smart Contracts",
	"Cardano Blockchain",
	"Validator Scripts",
	"Plutus Tx",
	"Datum and Redeemer",
	"Time Handling in Plutus",
	"Off-Chain Code",
	"On-Chain Constraints",
	"Plutus Core",
	"Transaction Validation",
	"Cardano Node Integration",
	],
	label="Topic",
	value=None,
	allow_custom_value=False,
	scale=2,
	)

	with gr.Row():
	generate_btn = gr.Button("Generate")
	regen = gr.Button("🔁 Regenerate")

	output = gr.Textbox(
	label="Model Response",
	lines=12,
	interactive=False,
	show_copy_button=True,
	placeholder="Your tailored explanation will appear here…",
	)

	generate_btn.click(orchestrator, [personality, level, topic], output, queue=True)
	regen.click(orchestrator, [personality, level, topic], output, queue=True)

	# Enable queue
	iface.queue()

	if __name__ == "__main__":
	iface.launch(server_name="0.0.0.0", server_port=7860)