Remostart commited on
Commit
14b43fc
·
verified ·
1 Parent(s): 3061d35

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -48
app.py CHANGED
@@ -5,7 +5,7 @@ import spaces
5
 
6
  MODEL_NAME = "ubiodee/Plutus_Tutor_new"
7
 
8
- # ---------------- Tokenizer (cached) ----------------
9
  _TOKENIZER = None
10
  def get_tokenizer():
11
  global _TOKENIZER
@@ -16,47 +16,27 @@ def get_tokenizer():
16
  _TOKENIZER = tok
17
  return _TOKENIZER
18
 
19
- # ---------------- Prompt ----------------
20
  def build_prompt(personality, level, topic):
21
  return (
22
  f"You are a friendly Plutus AI tutor for a {personality} learner at {level} level.\n"
23
  f"Topic: {topic}\n\n"
24
- f"Explain in a conversational tone, with simple language and concrete examples.\n"
25
- f"Keep it focused and complete in about 120–180 words.\n"
26
- f"End with a single-sentence takeaway starting with 'Takeaway:'.\n"
27
  )
28
 
29
- # ---------------- CPU path (fallback) ----------------
30
- def generate_cpu(personality, level, topic, max_new_tokens=200):
31
- tokenizer = get_tokenizer()
32
- prompt = build_prompt(personality, level, topic)
33
- inputs = tokenizer(prompt, return_tensors="pt")
34
-
35
- with torch.inference_mode():
36
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) # CPU load
37
- model.eval()
38
- outputs = model.generate(
39
- **inputs,
40
- max_new_tokens=max_new_tokens,
41
- temperature=0.2,
42
- top_p=0.9,
43
- do_sample=True,
44
- eos_token_id=tokenizer.eos_token_id,
45
- pad_token_id=tokenizer.pad_token_id,
46
- )
47
-
48
- text = tokenizer.decode(outputs[0], skip_special_tokens=True)
49
- if text.startswith(prompt):
50
- text = text[len(prompt):].lstrip()
51
- return text
52
-
53
- # ---------------- GPU path (ZeroGPU) ----------------
54
  @spaces.GPU
55
- def generate_gpu(personality, level, topic, max_new_tokens=240):
 
 
 
 
56
  tokenizer = get_tokenizer()
57
  prompt = build_prompt(personality, level, topic)
58
 
59
- # Prefer 4-bit to reduce VRAM; fall back to fp16 if unavailable
60
  try:
61
  model = AutoModelForCausalLM.from_pretrained(
62
  MODEL_NAME,
@@ -71,26 +51,29 @@ def generate_gpu(personality, level, topic, max_new_tokens=240):
71
  )
72
  model.eval()
73
 
 
74
  device = next(model.parameters()).device
75
  inputs = tokenizer(prompt, return_tensors="pt")
 
76
  inputs = {k: v.to(device) for k, v in inputs.items()}
77
 
78
  with torch.inference_mode():
79
  outputs = model.generate(
80
  **inputs,
81
- max_new_tokens=max_new_tokens,
82
- temperature=0.15,
83
  top_p=0.9,
84
  do_sample=True,
 
85
  eos_token_id=tokenizer.eos_token_id,
86
  pad_token_id=tokenizer.pad_token_id,
87
  )
88
 
89
- text = tokenizer.decode(outputs[0], skip_special_tokens=True)
90
- if text.startswith(prompt):
91
- text = text[len(prompt):].lstrip()
92
 
93
- # Free VRAM ASAP
94
  try:
95
  del model
96
  if torch.cuda.is_available():
@@ -98,25 +81,31 @@ def generate_gpu(personality, level, topic, max_new_tokens=240):
98
  except Exception:
99
  pass
100
 
 
 
 
101
  return text
102
 
103
- # ---------------- Orchestrator ----------------
104
  def orchestrator(personality, level, topic):
105
  if not personality or not level or not topic:
106
  return "Select your personality, expertise, and topic to get a tailored explanation."
107
- # Try GPU first, hide errors from user, log to console
108
  try:
109
- return generate_gpu(personality, level, topic)
110
  except Exception as e:
111
- print(f"[GPU fallback] {type(e).__name__}: {e}")
112
- return generate_cpu(personality, level, topic)
 
 
 
 
113
 
114
- # ---------------- Gradio UI ----------------
115
  with gr.Blocks(theme="default") as iface:
116
  gr.Markdown(
117
  "## Cardano Plutus AI Assistant\n"
118
- "Choose your **Learning Personality**, **Expertise Level**, and **Topic**. "
119
- "An answer will be generated automatically."
120
  )
121
 
122
  with gr.Row():
@@ -162,7 +151,8 @@ with gr.Blocks(theme="default") as iface:
162
  label="Model Response",
163
  lines=12,
164
  interactive=False,
165
- show_copy_button=True
 
166
  )
167
 
168
  def _maybe_generate(p, l, t):
@@ -175,7 +165,7 @@ with gr.Blocks(theme="default") as iface:
175
  topic.change(_maybe_generate, [personality, level, topic], output, queue=True)
176
  regen.click(orchestrator, [personality, level, topic], output, queue=True)
177
 
178
- # Keep simple for broad Gradio compatibility
179
  iface.queue()
180
 
181
  if __name__ == "__main__":
 
5
 
6
  MODEL_NAME = "ubiodee/Plutus_Tutor_new"
7
 
8
+ # ------------ Tokenizer cache ------------
9
  _TOKENIZER = None
10
  def get_tokenizer():
11
  global _TOKENIZER
 
16
  _TOKENIZER = tok
17
  return _TOKENIZER
18
 
19
+ # ------------ Prompt builder ------------
20
  def build_prompt(personality, level, topic):
21
  return (
22
  f"You are a friendly Plutus AI tutor for a {personality} learner at {level} level.\n"
23
  f"Topic: {topic}\n\n"
24
+ "Explain in a conversational, easy tone with concrete examples.\n"
25
+ "Keep it complete, focused, and around 120–160 words.\n"
26
+ "End with a one-line takeaway starting with 'Takeaway:'.\n"
27
  )
28
 
29
+ # ------------ GPU-only generation ------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  @spaces.GPU
31
+ def generate_on_gpu(personality, level, topic, max_new_tokens=160):
32
+ """
33
+ Runs ONLY when ZeroGPU grants a GPU.
34
+ Loads model per-call, generates, decodes ONLY new tokens, frees VRAM.
35
+ """
36
  tokenizer = get_tokenizer()
37
  prompt = build_prompt(personality, level, topic)
38
 
39
+ # Try 4-bit for VRAM; fall back to fp16 if not available
40
  try:
41
  model = AutoModelForCausalLM.from_pretrained(
42
  MODEL_NAME,
 
51
  )
52
  model.eval()
53
 
54
+ # Move inputs to model device
55
  device = next(model.parameters()).device
56
  inputs = tokenizer(prompt, return_tensors="pt")
57
+ input_len = inputs["input_ids"].shape[1]
58
  inputs = {k: v.to(device) for k, v in inputs.items()}
59
 
60
  with torch.inference_mode():
61
  outputs = model.generate(
62
  **inputs,
63
+ max_new_tokens=max_new_tokens, # keep small for ZeroGPU time/VRAM
64
+ temperature=0.2,
65
  top_p=0.9,
66
  do_sample=True,
67
+ repetition_penalty=1.05,
68
  eos_token_id=tokenizer.eos_token_id,
69
  pad_token_id=tokenizer.pad_token_id,
70
  )
71
 
72
+ # Decode ONLY the newly generated tokens (avoids prompt-echo trimming issues)
73
+ gen_ids = outputs[0][input_len:]
74
+ text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
75
 
76
+ # Cleanup VRAM
77
  try:
78
  del model
79
  if torch.cuda.is_available():
 
81
  except Exception:
82
  pass
83
 
84
+ # Fallback guard: ensure we return something readable
85
+ if not text:
86
+ text = "Takeaway: Generation finished but returned empty text. Try again or choose a different topic."
87
  return text
88
 
89
+ # ------------ Orchestrator (no CPU fallback) ------------
90
  def orchestrator(personality, level, topic):
91
  if not personality or not level or not topic:
92
  return "Select your personality, expertise, and topic to get a tailored explanation."
 
93
  try:
94
+ return generate_on_gpu(personality, level, topic)
95
  except Exception as e:
96
+ # Don’t crash silently; show a friendly message
97
+ print(f"[ZeroGPU error] {type(e).__name__}: {e}")
98
+ return (
99
+ "GPU was not available or the job was interrupted. "
100
+ "Please click **Regenerate** or change a selection to try again."
101
+ )
102
 
103
+ # ------------ Gradio UI ------------
104
  with gr.Blocks(theme="default") as iface:
105
  gr.Markdown(
106
  "## Cardano Plutus AI Assistant\n"
107
+ "Pick your **Learning Personality**, **Expertise Level**, and **Topic**. "
108
+ "The answer will generate automatically."
109
  )
110
 
111
  with gr.Row():
 
151
  label="Model Response",
152
  lines=12,
153
  interactive=False,
154
+ show_copy_button=True,
155
+ placeholder="Your tailored explanation will appear here…",
156
  )
157
 
158
  def _maybe_generate(p, l, t):
 
165
  topic.change(_maybe_generate, [personality, level, topic], output, queue=True)
166
  regen.click(orchestrator, [personality, level, topic], output, queue=True)
167
 
168
+ # Enable queue with broad compatibility
169
  iface.queue()
170
 
171
  if __name__ == "__main__":