Spaces:

Remostart
/

Plutus_PersonalisedTutor

Sleeping

App Files Files Community

Remostart commited on Sep 7, 2025

Commit

14b43fc

verified ·

1 Parent(s): 3061d35

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -48

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import spaces
 MODEL_NAME = "ubiodee/Plutus_Tutor_new"
-# ---------------- Tokenizer (cached) ----------------
 _TOKENIZER = None
 def get_tokenizer():
     global _TOKENIZER
@@ -16,47 +16,27 @@ def get_tokenizer():
         _TOKENIZER = tok
     return _TOKENIZER
-# ---------------- Prompt ----------------
 def build_prompt(personality, level, topic):
     return (
         f"You are a friendly Plutus AI tutor for a {personality} learner at {level} level.\n"
         f"Topic: {topic}\n\n"
-        f"Explain in a conversational tone, with simple language and concrete examples.\n"
-        f"Keep it focused and complete in about 120–180 words.\n"
-        f"End with a single-sentence takeaway starting with 'Takeaway:'.\n"
     )
-# ---------------- CPU path (fallback) ----------------
-def generate_cpu(personality, level, topic, max_new_tokens=200):
-    tokenizer = get_tokenizer()
-    prompt = build_prompt(personality, level, topic)
-    inputs = tokenizer(prompt, return_tensors="pt")
-    with torch.inference_mode():
-        model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)  # CPU load
-        model.eval()
-        outputs = model.generate(
-            **inputs,
-            max_new_tokens=max_new_tokens,
-            temperature=0.2,
-            top_p=0.9,
-            do_sample=True,
-            eos_token_id=tokenizer.eos_token_id,
-            pad_token_id=tokenizer.pad_token_id,
-        )
-    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    if text.startswith(prompt):
-        text = text[len(prompt):].lstrip()
-    return text
-# ---------------- GPU path (ZeroGPU) ----------------
 @spaces.GPU
-def generate_gpu(personality, level, topic, max_new_tokens=240):
     tokenizer = get_tokenizer()
     prompt = build_prompt(personality, level, topic)
-    # Prefer 4-bit to reduce VRAM; fall back to fp16 if unavailable
     try:
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_NAME,
@@ -71,26 +51,29 @@ def generate_gpu(personality, level, topic, max_new_tokens=240):
         )
     model.eval()
     device = next(model.parameters()).device
     inputs = tokenizer(prompt, return_tensors="pt")
     inputs = {k: v.to(device) for k, v in inputs.items()}
     with torch.inference_mode():
         outputs = model.generate(
             **inputs,
-            max_new_tokens=max_new_tokens,
-            temperature=0.15,
             top_p=0.9,
             do_sample=True,
             eos_token_id=tokenizer.eos_token_id,
             pad_token_id=tokenizer.pad_token_id,
         )
-    text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    if text.startswith(prompt):
-        text = text[len(prompt):].lstrip()
-    # Free VRAM ASAP
     try:
         del model
         if torch.cuda.is_available():
@@ -98,25 +81,31 @@ def generate_gpu(personality, level, topic, max_new_tokens=240):
     except Exception:
         pass
     return text
-# ---------------- Orchestrator ----------------
 def orchestrator(personality, level, topic):
     if not personality or not level or not topic:
         return "Select your personality, expertise, and topic to get a tailored explanation."
-    # Try GPU first, hide errors from user, log to console
     try:
-        return generate_gpu(personality, level, topic)
     except Exception as e:
-        print(f"[GPU fallback] {type(e).__name__}: {e}")
-        return generate_cpu(personality, level, topic)
-# ---------------- Gradio UI ----------------
 with gr.Blocks(theme="default") as iface:
     gr.Markdown(
         "## Cardano Plutus AI Assistant\n"
-        "Choose your **Learning Personality**, **Expertise Level**, and **Topic**. "
-        "An answer will be generated automatically."
     )
     with gr.Row():
@@ -162,7 +151,8 @@ with gr.Blocks(theme="default") as iface:
         label="Model Response",
         lines=12,
         interactive=False,
-        show_copy_button=True
     )
     def _maybe_generate(p, l, t):
@@ -175,7 +165,7 @@ with gr.Blocks(theme="default") as iface:
     topic.change(_maybe_generate, [personality, level, topic], output, queue=True)
     regen.click(orchestrator, [personality, level, topic], output, queue=True)
-# Keep simple for broad Gradio compatibility
 iface.queue()
 if __name__ == "__main__":

 MODEL_NAME = "ubiodee/Plutus_Tutor_new"
+# ------------ Tokenizer cache ------------
 _TOKENIZER = None
 def get_tokenizer():
     global _TOKENIZER
         _TOKENIZER = tok
     return _TOKENIZER
+# ------------ Prompt builder ------------
 def build_prompt(personality, level, topic):
     return (
         f"You are a friendly Plutus AI tutor for a {personality} learner at {level} level.\n"
         f"Topic: {topic}\n\n"
+        "Explain in a conversational, easy tone with concrete examples.\n"
+        "Keep it complete, focused, and around 120–160 words.\n"
+        "End with a one-line takeaway starting with 'Takeaway:'.\n"
     )
+# ------------ GPU-only generation ------------
 @spaces.GPU
+def generate_on_gpu(personality, level, topic, max_new_tokens=160):
+    """
+    Runs ONLY when ZeroGPU grants a GPU.
+    Loads model per-call, generates, decodes ONLY new tokens, frees VRAM.
+    """
     tokenizer = get_tokenizer()
     prompt = build_prompt(personality, level, topic)
+    # Try 4-bit for VRAM; fall back to fp16 if not available
     try:
         model = AutoModelForCausalLM.from_pretrained(
             MODEL_NAME,
         )
     model.eval()
+    # Move inputs to model device
     device = next(model.parameters()).device
     inputs = tokenizer(prompt, return_tensors="pt")
+    input_len = inputs["input_ids"].shape[1]
     inputs = {k: v.to(device) for k, v in inputs.items()}
     with torch.inference_mode():
         outputs = model.generate(
             **inputs,
+            max_new_tokens=max_new_tokens,   # keep small for ZeroGPU time/VRAM
+            temperature=0.2,
             top_p=0.9,
             do_sample=True,
+            repetition_penalty=1.05,
             eos_token_id=tokenizer.eos_token_id,
             pad_token_id=tokenizer.pad_token_id,
         )
+    # Decode ONLY the newly generated tokens (avoids prompt-echo trimming issues)
+    gen_ids = outputs[0][input_len:]
+    text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
+    # Cleanup VRAM
     try:
         del model
         if torch.cuda.is_available():
     except Exception:
         pass
+    # Fallback guard: ensure we return something readable
+    if not text:
+        text = "Takeaway: Generation finished but returned empty text. Try again or choose a different topic."
     return text
+# ------------ Orchestrator (no CPU fallback) ------------
 def orchestrator(personality, level, topic):
     if not personality or not level or not topic:
         return "Select your personality, expertise, and topic to get a tailored explanation."
     try:
+        return generate_on_gpu(personality, level, topic)
     except Exception as e:
+        # Don’t crash silently; show a friendly message
+        print(f"[ZeroGPU error] {type(e).__name__}: {e}")
+        return (
+            "GPU was not available or the job was interrupted. "
+            "Please click **Regenerate** or change a selection to try again."
+        )
+# ------------ Gradio UI ------------
 with gr.Blocks(theme="default") as iface:
     gr.Markdown(
         "## Cardano Plutus AI Assistant\n"
+        "Pick your **Learning Personality**, **Expertise Level**, and **Topic**. "
+        "The answer will generate automatically."
     )
     with gr.Row():
         label="Model Response",
         lines=12,
         interactive=False,
+        show_copy_button=True,
+        placeholder="Your tailored explanation will appear here…",
     )
     def _maybe_generate(p, l, t):
     topic.change(_maybe_generate, [personality, level, topic], output, queue=True)
     regen.click(orchestrator, [personality, level, topic], output, queue=True)
+# Enable queue with broad compatibility
 iface.queue()
 if __name__ == "__main__":