Remostart commited on
Commit
3e776fc
·
verified ·
1 Parent(s): 41206c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -109
app.py CHANGED
@@ -1,54 +1,33 @@
1
  import gradio as gr
2
  import torch
3
- import logging, traceback
4
- from transformers import (
5
- AutoModelForCausalLM,
6
- AutoTokenizer,
7
- TextIteratorStreamer,
8
- StoppingCriteria,
9
- StoppingCriteriaList,
10
- )
11
  from threading import Thread
12
 
13
- # ---------------- Logging ----------------
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
17
- # ---------------- Model & Tokenizer ----------------
18
  MODEL_NAME = "ubiodee/Plutus_Tutor_new"
19
 
20
  try:
21
  logger.info("Loading tokenizer...")
22
- tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
23
-
24
  logger.info("Loading model...")
25
- has_cuda = torch.cuda.is_available()
26
- dtype = torch.float16 if has_cuda else torch.float32 # safer on CPU
27
  model = AutoModelForCausalLM.from_pretrained(
28
  MODEL_NAME,
29
  device_map="auto",
30
- torch_dtype=dtype,
31
- low_cpu_mem_usage=True,
32
  )
33
  model.eval()
34
-
35
- # Ensure pad/eos are sensible; if we add a token, resize embeddings
36
- added = False
37
- if tokenizer.pad_token_id is None:
38
- if tokenizer.eos_token is not None:
39
- tokenizer.pad_token = tokenizer.eos_token
40
- else:
41
- tokenizer.add_special_tokens({"pad_token": "</s>"})
42
- added = True
43
- if added:
44
- model.resize_token_embeddings(len(tokenizer))
45
-
46
  logger.info("Model and tokenizer loaded successfully.")
47
  except Exception as e:
48
  logger.error(f"Error loading model or tokenizer: {str(e)}")
49
  raise
50
 
51
- # ---------------- UI Options ----------------
52
  PERSONALITY_TYPES = ["Autistic", "Dyslexic", "Expressive", "Nerd", "Visual", "Other"]
53
  PROGRAMMING_LEVELS = ["Beginner", "Intermediate", "Professional"]
54
  TOPICS = [
@@ -57,117 +36,89 @@ TOPICS = [
57
  "Smart Contracts",
58
  "Versioning in Plutus",
59
  "Monad",
60
- "Other",
61
  ]
62
 
63
- # ---------------- Prompting ----------------
64
- END_SENTINEL = "[END]"
65
-
66
  def create_prompt(personality, level, topic):
67
- return (
68
- f"Explain {topic} in Plutus for a {level} programmer with {personality} traits. "
69
- f"Use only basic words and clear examples. Use a physical object analogy (e.g., a lock or checklist) tied to {topic}. "
70
- f"Avoid jargon like 'blockchain,' 'ledger,' 'Haskell,' 'decentralized,' 'cyber,' 'e-commerce,' 'formal verification,' or 'immutability.' "
71
- f"Use short sentences (6-8 words). Use exactly 3 numbered points for key ideas. Each point must have 5-10 words. "
72
- f"Bold the first word of each point. Structure the response: 2-sentence introduction, 3 numbered points, 1-sentence conclusion. "
73
- f"For Autistic traits, use literal language, numbered lists, and **bold key terms**. Repeat key ideas for clarity. "
74
- f"Avoid abstract terms unless concrete. Do not repeat the topic or prompt. Do not simulate a conversation, ask questions, or discuss unrelated topics. "
75
- f"Use a direct, instructional tone without 'I' or 'we'. "
76
- f"End with a summary sentence on {topic}'s importance, then write {END_SENTINEL} and nothing else."
77
- )
78
-
79
- # ---------------- Stop on substring ----------------
80
- class StopOnSubstrings(StoppingCriteria):
81
- def __init__(self, tokenizer, stop_strings):
82
- self.stop_ids = [tokenizer.encode(s, add_special_tokens=False) for s in stop_strings]
83
-
84
- def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
85
- for seq in self.stop_ids:
86
- L = len(seq)
87
- if L and input_ids.shape[1] >= L:
88
- if torch.equal(input_ids[0, -L:], torch.tensor(seq, device=input_ids.device)):
89
- return True
90
- return False
91
 
92
- # ---------------- Generation (STREAMING) ----------------
93
  def generate_response(personality, level, topic):
94
  try:
95
  logger.info("Processing selections...")
96
  prompt = create_prompt(personality, level, topic)
97
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
98
-
99
- # Keep your original streaming pattern; avoid version-sensitive args
100
- streamer = TextIteratorStreamer(
101
- tokenizer,
102
- skip_prompt=True,
103
- skip_special_tokens=True,
104
- # no timeout arg (some Gradio/HF versions don't support it)
105
- )
106
-
107
- stopping = StoppingCriteriaList([StopOnSubstrings(tokenizer, [END_SENTINEL])])
108
-
109
  generation_kwargs = {
110
  **inputs,
111
  "streamer": streamer,
112
- "max_new_tokens": 200, # fits your format comfortably
113
- "do_sample": False, # deterministic to avoid tail babble
114
- "no_repeat_ngram_size": 3, # loop guard
115
- "repetition_penalty": 1.1, # mild anti-babble
116
- "pad_token_id": tokenizer.pad_token_id,
117
- "stopping_criteria": stopping,
118
- "use_cache": True,
119
  }
120
-
121
- # Only pass eos_token_id if it exists (avoid None issues)
122
- if tokenizer.eos_token_id is not None:
123
- generation_kwargs["eos_token_id"] = tokenizer.eos_token_id
124
-
125
- thread = Thread(target=model.generate, kwargs=generation_kwargs, daemon=True)
126
  thread.start()
127
-
128
  generated_text = ""
129
  for new_text in streamer:
130
  generated_text += new_text
131
-
132
- # Hard stop the moment we see the sentinel
133
- if END_SENTINEL in generated_text:
134
- yield generated_text.split(END_SENTINEL)[0].rstrip()
135
- return
136
-
137
  yield generated_text.strip()
138
-
139
  logger.info("Response generated successfully.")
140
- except Exception:
141
- err = traceback.format_exc()
142
- logger.error(err)
143
- # Show full traceback in UI for quick debugging
144
- yield "Error:\n" + err
145
 
146
- # ---------------- Gradio UI ----------------
147
  with gr.Blocks(title="Cardano Plutus AI Assistant") as demo:
148
  gr.Markdown("### Your Personalised Plutus Tutor")
149
  gr.Markdown("Select your personality type, programming level, and topic, then click Generate.")
150
-
151
- personality = gr.Dropdown(choices=PERSONALITY_TYPES, label="Personality Type", value="Autistic")
152
- level = gr.Dropdown(choices=PROGRAMMING_LEVELS, label="Programming Level", value="Beginner")
153
- topic = gr.Dropdown(choices=TOPICS, label="Topic", value="Introduction to Validation")
154
-
 
 
 
 
 
 
 
 
 
 
 
 
155
  generate_btn = gr.Button("Generate")
156
-
157
  output = gr.Textbox(
158
  label="Model Response",
159
  show_label=True,
160
  lines=10,
161
- placeholder="Generated content will appear here...",
162
  )
163
-
164
  generate_btn.click(
165
  fn=generate_response,
166
  inputs=[personality, level, topic],
167
- outputs=output,
168
  )
169
 
170
- logger.info("Launching Gradio interface...")
171
- # Keep it version-agnostic: enable queueing without extra args
172
- demo.queue()
173
- demo.launch()
 
 
 
 
1
  import gradio as gr
2
  import torch
3
+ import logging
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
 
 
 
 
 
5
  from threading import Thread
6
 
7
+ # Set up logging
8
  logging.basicConfig(level=logging.INFO)
9
  logger = logging.getLogger(__name__)
10
 
11
+ # Load model & tokenizer
12
  MODEL_NAME = "ubiodee/Plutus_Tutor_new"
13
 
14
  try:
15
  logger.info("Loading tokenizer...")
16
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 
17
  logger.info("Loading model...")
 
 
18
  model = AutoModelForCausalLM.from_pretrained(
19
  MODEL_NAME,
20
  device_map="auto",
21
+ torch_dtype=torch.float16,
22
+ low_cpu_mem_usage=True
23
  )
24
  model.eval()
 
 
 
 
 
 
 
 
 
 
 
 
25
  logger.info("Model and tokenizer loaded successfully.")
26
  except Exception as e:
27
  logger.error(f"Error loading model or tokenizer: {str(e)}")
28
  raise
29
 
30
+ # Define options for dropdowns
31
  PERSONALITY_TYPES = ["Autistic", "Dyslexic", "Expressive", "Nerd", "Visual", "Other"]
32
  PROGRAMMING_LEVELS = ["Beginner", "Intermediate", "Professional"]
33
  TOPICS = [
 
36
  "Smart Contracts",
37
  "Versioning in Plutus",
38
  "Monad",
39
+ "Other"
40
  ]
41
 
42
+ # Improved prompt template for autism-friendly, focused response
 
 
43
  def create_prompt(personality, level, topic):
44
+ return f"Explain {topic} in Plutus for a {level} programmer with {personality} traits. Use only basic words and clear examples, like comparing validation to a lock. Avoid jargon like 'blockchain,' 'ledger,' 'Haskell,' 'decentralized,' 'formal verification,' or 'immutability.' Use short sentences (8 words or less). Use exactly 3 bullet points for key ideas. Each point must be under 15 words. Include one simple analogy. Structure the response: 2-sentence introduction, 3 bullet points, 1-sentence conclusion. For Autistic traits, use literal language, avoid abstract terms, and ensure a predictable format. Do not repeat the topic or prompt. Do not simulate a conversation, ask questions, or list unrelated terms. Use a direct, instructional tone without 'I,' 'we,' or conversational phrases. End with a summary sentence on the topic’s importance. Add extra line breaks between sections for readability."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
+ # Response function with improved parameters
47
  def generate_response(personality, level, topic):
48
  try:
49
  logger.info("Processing selections...")
50
  prompt = create_prompt(personality, level, topic)
51
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
52
+
53
+ # Use streamer for token-by-token generation
54
+ streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
55
+
 
 
 
 
 
 
 
56
  generation_kwargs = {
57
  **inputs,
58
  "streamer": streamer,
59
+ "max_new_tokens": 700, # Increased to avoid cut-offs
60
+ "do_sample": True,
61
+ "temperature": 0.2, # Lowered for more focused output
62
+ "top_p": 0.2, # Lowered for more focused output
63
+ "repetition_penalty": 1.5, # Increased to prevent repetition
64
+ "eos_token_id": tokenizer.eos_token_id,
65
+ "pad_token_id": tokenizer.pad_token_id
66
  }
67
+
68
+ # Run generation in a separate thread
69
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
 
 
 
70
  thread.start()
71
+
72
  generated_text = ""
73
  for new_text in streamer:
74
  generated_text += new_text
 
 
 
 
 
 
75
  yield generated_text.strip()
76
+
77
  logger.info("Response generated successfully.")
78
+ except Exception as e:
79
+ logger.error(f"Error during generation: {str(e)}")
80
+ yield f"Error: {str(e)}"
 
 
81
 
82
+ # Gradio UI with dropdowns and button
83
  with gr.Blocks(title="Cardano Plutus AI Assistant") as demo:
84
  gr.Markdown("### Your Personalised Plutus Tutor")
85
  gr.Markdown("Select your personality type, programming level, and topic, then click Generate.")
86
+
87
+ personality = gr.Dropdown(
88
+ choices=PERSONALITY_TYPES,
89
+ label="Personality Type",
90
+ value="Autistic"
91
+ )
92
+ level = gr.Dropdown(
93
+ choices=PROGRAMMING_LEVELS,
94
+ label="Programming Level",
95
+ value="Beginner"
96
+ )
97
+ topic = gr.Dropdown(
98
+ choices=TOPICS,
99
+ label="Topic",
100
+ value="Introduction to Validation"
101
+ )
102
+
103
  generate_btn = gr.Button("Generate")
104
+
105
  output = gr.Textbox(
106
  label="Model Response",
107
  show_label=True,
108
  lines=10,
109
+ placeholder="Generated content will appear here..."
110
  )
111
+
112
  generate_btn.click(
113
  fn=generate_response,
114
  inputs=[personality, level, topic],
115
+ outputs=output
116
  )
117
 
118
+ # Launch the app
119
+ try:
120
+ logger.info("Launching Gradio interface...")
121
+ demo.launch()
122
+ except Exception as e:
123
+ logger.error(f"Error launching Gradio: {str(e)}")
124
+ raise