Remostart commited on
Commit
af096d6
·
verified ·
1 Parent(s): 0f367cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +130 -57
app.py CHANGED
@@ -1,66 +1,139 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import AutoModelForCausalLM, AutoTokenizer
4
- import spaces
5
 
6
- # Load model & tokenizer
7
  MODEL_NAME = "ubiodee/Plutus_Tutor_new"
8
 
9
- # Initialize tokenizer and model
10
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
11
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
12
- model.eval()
13
-
14
- # Response generation function
15
- @spaces.GPU
16
- def generate_response(personality, level, topic):
17
- # Construct a structured prompt incorporating user selections
18
- full_prompt = (
19
  f"You are a Plutus AI Assistant tailored for a {personality} learner "
20
  f"at {level} level, focusing on {topic}. Provide a clear, concise, "
21
  f"and tailored explanation of {topic}, suitable for the specified personality and expertise level."
22
  )
23
-
24
- inputs = tokenizer(full_prompt, return_tensors="pt").to("cuda")
25
 
26
- with torch.no_grad():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  outputs = model.generate(
28
  **inputs,
29
- max_new_tokens=250,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  temperature=0.1,
31
  top_p=0.1,
32
  do_sample=True,
33
  eos_token_id=tokenizer.eos_token_id,
34
  pad_token_id=tokenizer.pad_token_id,
35
  )
36
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
37
 
38
- # Remove the full prompt from the output to return only the answer
39
- if response.startswith(full_prompt):
40
- response = response[len(full_prompt):].strip()
 
 
 
 
 
 
 
 
 
 
41
 
42
- return response
 
 
 
43
 
44
- # Gradio interface
 
 
 
 
 
 
 
 
 
 
45
  with gr.Blocks(theme="default") as iface:
46
  gr.Markdown(
47
- """
48
- # Cardano Plutus AI Assistant
49
- Select your learning personality, expertise level, and topic to get a tailored explanation about Plutus or Cardano.
50
- The content will be generated automatically upon selection.
51
- """
52
  )
53
-
54
  with gr.Row():
55
  personality = gr.Dropdown(
56
  choices=["Dyslexic", "Autistic", "Expressive"],
57
- label="Select Your Learning Personality",
58
- value="Expressive"
 
 
59
  )
60
  level = gr.Dropdown(
61
  choices=["Beginner", "Intermediate", "Advanced"],
62
- label="Select Your Expertise Level",
63
- value="Beginner"
 
 
64
  )
65
  topic = gr.Dropdown(
66
  choices=[
@@ -75,31 +148,31 @@ with gr.Blocks(theme="default") as iface:
75
  "On-Chain Constraints",
76
  "Plutus Core",
77
  "Transaction Validation",
78
- "Cardano Node Integration"
79
  ],
80
- label="Select Topic",
81
- value="Plutus Basics"
 
 
82
  )
83
-
84
- output = gr.Textbox(label="Model Response")
85
-
86
- # Trigger generation on any dropdown change
87
- personality.change(
88
- fn=generate_response,
89
- inputs=[personality, level, topic],
90
- outputs=output
91
- )
92
- level.change(
93
- fn=generate_response,
94
- inputs=[personality, level, topic],
95
- outputs=output
96
- )
97
- topic.change(
98
- fn=generate_response,
99
- inputs=[personality, level, topic],
100
- outputs=output
101
- )
102
 
103
- # Launch the app (Hugging Face Spaces handles this automatically)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  if __name__ == "__main__":
105
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
4
+ import spaces
5
 
 
6
  MODEL_NAME = "ubiodee/Plutus_Tutor_new"
7
 
8
+ # --------- Lightweight utilities ----------
9
+ def build_prompt(personality, level, topic):
10
+ return (
 
 
 
 
 
 
 
11
  f"You are a Plutus AI Assistant tailored for a {personality} learner "
12
  f"at {level} level, focusing on {topic}. Provide a clear, concise, "
13
  f"and tailored explanation of {topic}, suitable for the specified personality and expertise level."
14
  )
 
 
15
 
16
+ def _ensure_tokenizer():
17
+ tok = AutoTokenizer.from_pretrained(MODEL_NAME)
18
+ if tok.pad_token_id is None:
19
+ tok.pad_token = tok.eos_token
20
+ return tok
21
+
22
+ # CPU fallback (slow, but prevents total failure)
23
+ def generate_cpu(personality, level, topic, max_new_tokens=250):
24
+ tokenizer = _ensure_tokenizer()
25
+ prompt = build_prompt(personality, level, topic)
26
+ inputs = tokenizer(prompt, return_tensors="pt")
27
+ # Small settings for CPU to avoid long stalls
28
+ with torch.inference_mode():
29
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME) # CPU load
30
+ model.eval()
31
  outputs = model.generate(
32
  **inputs,
33
+ max_new_tokens=min(max_new_tokens, 128),
34
+ temperature=0.2,
35
+ top_p=0.9,
36
+ do_sample=True,
37
+ eos_token_id=tokenizer.eos_token_id,
38
+ pad_token_id=tokenizer.pad_token_id,
39
+ )
40
+ text = tokenizer.decode(outputs[0], skip_special_tokens=True)
41
+ if text.startswith(prompt):
42
+ text = text[len(prompt):].strip()
43
+ return text
44
+
45
+ @spaces.GPU
46
+ def generate_gpu(personality, level, topic, max_new_tokens=250):
47
+ """
48
+ Runs ONLY under a granted GPU.
49
+ Loads the model in 4-bit to fit ZeroGPU VRAM, generates, then frees VRAM.
50
+ """
51
+ tokenizer = _ensure_tokenizer()
52
+ prompt = build_prompt(personality, level, topic)
53
+
54
+ # Prefer 4-bit to minimize VRAM on ZeroGPU
55
+ try:
56
+ model = AutoModelForCausalLM.from_pretrained(
57
+ MODEL_NAME,
58
+ load_in_4bit=True,
59
+ device_map="auto",
60
+ )
61
+ except Exception:
62
+ # If 4-bit isn’t available for this arch, fallback to fp16 on GPU
63
+ model = AutoModelForCausalLM.from_pretrained(
64
+ MODEL_NAME,
65
+ torch_dtype=torch.float16,
66
+ device_map="auto",
67
+ )
68
+
69
+ model.eval()
70
+
71
+ device = next(model.parameters()).device
72
+ inputs = tokenizer(prompt, return_tensors="pt")
73
+ inputs = {k: v.to(device) for k, v in inputs.items()}
74
+
75
+ with torch.inference_mode():
76
+ outputs = model.generate(
77
+ **inputs,
78
+ max_new_tokens=max_new_tokens,
79
  temperature=0.1,
80
  top_p=0.1,
81
  do_sample=True,
82
  eos_token_id=tokenizer.eos_token_id,
83
  pad_token_id=tokenizer.pad_token_id,
84
  )
 
85
 
86
+ text = tokenizer.decode(outputs[0], skip_special_tokens=True)
87
+ if text.startswith(prompt):
88
+ text = text[len(prompt):].strip()
89
+
90
+ # Free VRAM ASAP
91
+ try:
92
+ del model
93
+ if torch.cuda.is_available():
94
+ torch.cuda.empty_cache()
95
+ except Exception:
96
+ pass
97
+
98
+ return text
99
 
100
+ def orchestrator(personality, level, topic):
101
+ # Don’t run until all selections are made
102
+ if not personality or not level or not topic:
103
+ return "Select your personality, expertise, and topic to get a tailored explanation."
104
 
105
+ # Try GPU path first; if ZeroGPU refuses/throws, fallback to CPU
106
+ try:
107
+ return generate_gpu(personality, level, topic)
108
+ except RuntimeError as e:
109
+ # Typical ZeroGPU worker errors show here – fall back gracefully
110
+ return f"(GPU unavailable, using CPU fallback)\n\n{generate_cpu(personality, level, topic)}"
111
+ except Exception as e:
112
+ # Any other unexpected issue – try CPU anyway
113
+ return f"(GPU error: {type(e).__name__})\n\n{generate_cpu(personality, level, topic)}"
114
+
115
+ # --------- Gradio UI ----------
116
  with gr.Blocks(theme="default") as iface:
117
  gr.Markdown(
118
+ "## Cardano Plutus AI Assistant\n"
119
+ "Choose your **Learning Personality**, **Expertise Level**, and **Topic**. "
120
+ "An answer will be generated automatically."
 
 
121
  )
122
+
123
  with gr.Row():
124
  personality = gr.Dropdown(
125
  choices=["Dyslexic", "Autistic", "Expressive"],
126
+ label="Learning Personality",
127
+ value=None,
128
+ allow_custom_value=False,
129
+ scale=1
130
  )
131
  level = gr.Dropdown(
132
  choices=["Beginner", "Intermediate", "Advanced"],
133
+ label="Expertise Level",
134
+ value=None,
135
+ allow_custom_value=False,
136
+ scale=1
137
  )
138
  topic = gr.Dropdown(
139
  choices=[
 
148
  "On-Chain Constraints",
149
  "Plutus Core",
150
  "Transaction Validation",
151
+ "Cardano Node Integration",
152
  ],
153
+ label="Topic",
154
+ value=None,
155
+ allow_custom_value=False,
156
+ scale=2
157
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
+ with gr.Row():
160
+ regen = gr.Button("🔁 Regenerate")
161
+ output = gr.Textbox(label="Model Response", lines=12, interactive=False, show_copy_button=True)
162
+
163
+ # Auto-generate when any dropdown changes (only once all three have values)
164
+ def _maybe_generate(p, l, t):
165
+ if p and l and t:
166
+ return orchestrator(p, l, t)
167
+ return "Select your personality, expertise, and topic to get a tailored explanation."
168
+
169
+ personality.change(_maybe_generate, [personality, level, topic], output, queue=True)
170
+ level.change(_maybe_generate, [personality, level, topic], output, queue=True)
171
+ topic.change(_maybe_generate, [personality, level, topic], output, queue=True)
172
+ regen.click(orchestrator, [personality, level, topic], output, queue=True)
173
+
174
+ # Enable request queueing (helps with ZeroGPU scheduling)
175
+ iface.queue(concurrency_count=1, max_size=8)
176
+
177
  if __name__ == "__main__":
178
+ iface.launch(server_name="0.0.0.0", server_port=7860)