acecalisto3 commited on
Commit
69352c0
Β·
verified Β·
1 Parent(s): 87e1772

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +261 -57
app.py CHANGED
@@ -1,67 +1,271 @@
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
3
  import torch
 
 
 
 
 
 
 
4
 
5
  class VibeThinker:
6
- def __init__(self, model_path="WeiboAI/VibeThinker-1.5B"):
7
  self.model_path = model_path
8
- self.model = AutoModelForCausalLM.from_pretrained(
9
- self.model_path,
10
- low_cpu_mem_usage=True,
11
- torch_dtype=torch.bfloat16,
12
- device_map="auto"
13
- )
14
- self.tokenizer = AutoTokenizer.from_pretrained(self.model_path, trust_remote_code=True)
15
-
16
- def infer_text(self, messages):
17
- text = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
18
- model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
19
-
20
- generation_config = dict(
21
- max_new_tokens=4096,
22
- do_sample=True,
23
- temperature=0.6,
24
- top_p=0.95,
25
- top_k=None
26
- )
27
- generated_ids = self.model.generate(
28
- **model_inputs,
29
- generation_config=GenerationConfig(**generation_config)
30
- )
31
- generated_ids = [
32
- output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
33
- ]
34
-
35
- response = self.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
36
- return response
37
-
38
- model = VibeThinker()
39
-
40
- def chatbot_response(message, history):
41
- messages = [{"role": "system", "content": "You are a helpful assistant that optimizes HTML for Joomla Yootheme Builder. Generate optimized scripts based on user-provided HTML."}]
42
- for user_msg, assistant_msg in history:
43
- messages.append({"role": "user", "content": user_msg})
44
- if assistant_msg:
45
- messages.append({"role": "assistant", "content": assistant_msg})
46
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
- response = model.infer_text(messages)
49
- return response
50
-
51
- with gr.Blocks() as demo:
52
- gr.Markdown("# Joomla Yootheme Builder Optimizer\n[Built with anycoder](https://huggingface.co/spaces/akhaliq/anycoder)")
53
- gr.ChatInterface(
54
- chatbot_response,
55
- chatbot=gr.Chatbot(height=500),
56
- textbox=gr.Textbox(placeholder="Provide HTML to optimize for Joomla Yootheme Builder", container=False, scale=7),
57
- title="Joomla Optimizer Chatbot",
58
- description="Chat with the VibeThinker model to optimize your HTML for Joomla Yootheme Builder.",
59
- theme="soft",
60
- examples=["Optimize this HTML for Joomla: <html>...</html>"],
61
- retry_btn="Retry",
62
- undo_btn="Undo",
63
- clear_btn="Clear"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  )
65
 
66
  if __name__ == "__main__":
67
- demo.launch()
 
 
 
 
 
 
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig
3
  import torch
4
+ import logging
5
+ from typing import List, Dict, Any
6
+ from functools import partial
7
+
8
+ # Configure logging
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
 
12
  class VibeThinker:
13
+ def __init__(self, model_path: str = "WeiboAI/VibeThinker-1.5B"):
14
  self.model_path = model_path
15
+ logger.info(f"Loading model {model_path}...")
16
+
17
+ try:
18
+ # Use trust_remote_code only if absolutely required (VibeThinker needs it)
19
+ self.tokenizer = AutoTokenizer.from_pretrained(
20
+ model_path,
21
+ trust_remote_code=True,
22
+ padding_side="left" # Important for generation
23
+ )
24
+
25
+ # Add pad token if missing (common with some custom models)
26
+ if self.tokenizer.pad_token is None:
27
+ self.tokenizer.pad_token = self.tokenizer.eos_token
28
+ self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
29
+
30
+ self.model = AutoModelForCausalLM.from_pretrained(
31
+ model_path,
32
+ torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
33
+ device_map="auto",
34
+ low_cpu_mem_usage=True,
35
+ trust_remote_code=True,
36
+ )
37
+
38
+ self.model.eval()
39
+ logger.info("Model loaded successfully.")
40
+
41
+ except Exception as e:
42
+ logger.error(f"Failed to load model: {e}")
43
+ raise
44
+
45
+ def infer_text(self, messages: List[Dict[str, str]], **gen_kwargs) -> str:
46
+ try:
47
+ # Apply chat template safely
48
+ text = self.tokenizer.apply_chat_template(
49
+ messages,
50
+ tokenize=False,
51
+ add_generation_prompt=True
52
+ )
53
+
54
+ inputs = self.tokenizer(
55
+ text,
56
+ return_tensors="pt",
57
+ truncation=True,
58
+ max_length=8192 # Prevent OOM on very long histories
59
+ ).to(self.model.device)
60
+
61
+ # Default generation config (tuned for quality + coherence)
62
+ default_gen = {
63
+ "max_new_tokens": 2048,
64
+ "do_sample": True,
65
+ "temperature": 0.7,
66
+ "top_p": 0.90,
67
+ "top_k": 50,
68
+ "repetition_penalty": 1.1,
69
+ "eos_token_id": self.tokenizer.eos_token_id,
70
+ "pad_token_id": self.tokenizer.pad_token_id,
71
+ }
72
+ default_gen.update(gen_kwargs)
73
+
74
+ with torch.no_grad():
75
+ generated_ids = self.model.generate(
76
+ **inputs,
77
+ generation_config=GenerationConfig(**default_gen)
78
+ )
79
+
80
+ # Decode only the newly generated part
81
+ response_ids = generated_ids[0][inputs.input_ids.shape[-1]:]
82
+ response = self.tokenizer.decode(response_ids, skip_special_tokens=True).strip()
83
+
84
+ return response
85
+
86
+ except torch.cuda.OutOfMemoryError:
87
+ torch.cuda.empty_cache()
88
+ return "❌ GPU ran out of memory. Please shorten your conversation history or try again."
89
+ except Exception as e:
90
+ logger.error(f"Generation error: {e}")
91
+ return f"❌ An error occurred during generation: {str(e)}"
92
+
93
+
94
+ # === Initialize model once (global) ===
95
+ try:
96
+ model = VibeThinker()
97
+ except Exception:
98
+ model = None
99
+ error_msg = "Failed to load VibeThinker model. The app will run in fallback mode."
100
+ logger.error(error_msg)
101
+
102
+ # === System prompt (clear, focused, and optimized for Joomla/Yootheme) ===
103
+ SYSTEM_PROMPT = """
104
+ You are an expert Joomla developer specializing in YOOtheme Pro Builder (dynamic content, custom elements, layout library).
105
+ Your task is to convert or optimize any provided HTML/CSS/JS into clean, high-performance code that works perfectly inside YOOtheme Pro elements (HTML, Custom Element, Code element, etc.).
106
+
107
+ Rules:
108
+ - Always use inline styles or scoped CSS when needed (no external files unless requested).
109
+ - Prefer YOOtheme dynamic tags {{ }} when relevant.
110
+ - Ensure responsive design (use uk-grid, uk-width-*, flex, etc.).
111
+ - Optimize for performance: minify when possible, avoid heavy frameworks.
112
+ - Wrap JavaScript in <script> tags with defer if needed.
113
+ - Output ONLY the final optimized code unless the user asks for explanation.
114
+ - If the input is already good, enhance it (accessibility, speed, modern syntax).
115
+ """
116
+
117
+ def build_messages(history: List[List[Any]], user_message: str) -> List[Dict[str, str]]:
118
+ messages = [{"role": "system", "content": SYSTEM_PROMPT}]
119
+
120
+ for human, assistant in history:
121
+ if human:
122
+ messages.append({"role": "user", "content": human})
123
+ if assistant:
124
+ messages.append({"role": "assistant", "content": assistant})
125
 
126
+ messages.append({"role": "user", "content": user_message})
127
+ return messages
128
+
129
+ def chatbot_response(message: str, history: List[List[str]]) -> str:
130
+ if model is None:
131
+ return "🚨 Model failed to load. Please check server logs."
132
+
133
+ messages = build_messages(history, message)
134
+
135
+ # Stream the response using Gradio's streaming
136
+ for chunk in stream_response(messages):
137
+ yield chunk
138
+
139
+ def stream_response(messages: List[Dict[str, str]]):
140
+ if model is None:
141
+ yield "Model not available."
142
+ return
143
+
144
+ try:
145
+ text = model.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
146
+ inputs = model.tokenizer(text, return_tensors="pt").to(model.model.device)
147
+
148
+ streamer = partial(model.model.generate,
149
+ **inputs,
150
+ streamer=None, # We'll do manual streaming for better control
151
+ max_new_tokens=2048,
152
+ do_sample=True,
153
+ temperature=0.7,
154
+ top_p=0.90,
155
+ repetition_penalty=1.1,
156
+ pad_token_id=model.tokenizer.pad_token_id)
157
+
158
+ generated_text = ""
159
+ for new_token in streamer:
160
+ # This is a simplified streaming approach; for real token-by-token streaming use TextIteratorStreamer
161
+ pass # Replace with real streaming if needed (see below for full streaming version)
162
+
163
+ # Simpler: just return full response (still fast with bfloat16)
164
+ response = model.infer_text(messages)
165
+ yield response
166
+
167
+ except Exception as e:
168
+ yield f"Error: {str(e)}"
169
+
170
+
171
+ # === Proper streaming version (recommended) ===
172
+ from transformers import TextIteratorStreamer
173
+ import threading
174
+
175
+ def chatbot_response_stream(message: str, history: List[List[str]]):
176
+ if model is None:
177
+ yield "🚨 Model failed to load."
178
+ return
179
+
180
+ messages = build_messages(history, message)
181
+ text = model.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
182
+ inputs = model.tokenizer(text, return_tensors="pt").to(model.model.device)
183
+
184
+ streamer = TextIteratorStreamer(model.tokenizer, skip_prompt=True, skip_special_tokens=True)
185
+
186
+ generation_kwargs = {
187
+ "inputs": inputs.input_ids,
188
+ "streamer": streamer,
189
+ "max_new_tokens": 2048,
190
+ "do_sample": True,
191
+ "temperature": 0.7,
192
+ "top_p": 0.90,
193
+ "top_k": 50,
194
+ "repetition_penalty": 1.1,
195
+ "pad_token_id": model.tokenizer.pad_token_id,
196
+ }
197
+
198
+ thread = threading.Thread(target=model.model.generate, kwargs=generation_kwargs)
199
+ thread.start()
200
+
201
+ generated_text = ""
202
+ for new_text in streamer:
203
+ generated_text += new_text
204
+ yield generated_text
205
+
206
+ # === Gradio Interface ===
207
+ with gr.Blocks(
208
+ theme=gr.themes.Soft(),
209
+ title="Joomla YOOtheme Pro Optimizer",
210
+ css="""
211
+ .gradio-container {max-width: 1000px !important; margin: auto;}
212
+ footer {display: none !important;}
213
+ """
214
+ ) as demo:
215
+ gr.Markdown(
216
+ """
217
+ # πŸš€ Joomla YOOtheme Pro Optimizer
218
+ Powered by **WeiboAI/VibeThinker-1.5B** βˆ™ Real-time streaming βˆ™ Optimized for YOOtheme Builder
219
+ [Built with ❀️ using Anycoder](https://huggingface.co/spaces/akhaliq/anycoder) |
220
+ [Model](https://huggingface.co/WeiboAI/VibeThinker-1.5B) βˆ™
221
+ [Report issues](https://github.com/your-repo)
222
+ """
223
+ )
224
+
225
+ chat = gr.ChatInterface(
226
+ fn=chatbot_response_stream,
227
+ chatbot=gr.Chatbot(
228
+ height=600,
229
+ show_copy_button=True,
230
+ avatar_images=(
231
+ "https://em-content.zobj.net/source/twitter/53/robot_1f916.png",
232
+ "https://yootheme.com/site/templates/yootheme/images/yootheme/logo.svg"
233
+ ),
234
+ render_markdown=True
235
+ ),
236
+ textbox=gr.Textbox(
237
+ placeholder="Paste your HTML/CSS/JS here and ask to optimize for YOOtheme Pro Builder...",
238
+ container=False,
239
+ scale=7,
240
+ autofocus=True
241
+ ),
242
+ examples=[
243
+ ["Make this Bootstrap card work perfectly in YOOtheme Pro as a custom element"],
244
+ ["Convert this Tailwind section to pure UIKit + YOOtheme dynamic content"],
245
+ ["Optimize this heavy JS animation for YOOtheme Code element (no jQuery)"],
246
+ ],
247
+ cache_examples=False,
248
+ retry_btn="πŸ”„ Retry",
249
+ undo_btn="β†Ά Undo",
250
+ clear_btn="πŸ—‘οΈ Clear Chat",
251
+ submit_btn="Optimize β†’"
252
+ )
253
+
254
+ gr.Markdown(
255
+ """
256
+ ### Tips:
257
+ - Paste raw HTML, full pages, or just snippets
258
+ - Ask for dynamic content (`{{ article.title }}`, etc.)
259
+ - Request minification, accessibility improvements, or UIKit conversion
260
+ - Streaming responses appear in real-time
261
+ """
262
  )
263
 
264
  if __name__ == "__main__":
265
+ demo.queue(max_size=20).launch(
266
+ server_name="0.0.0.0",
267
+ server_port=7860,
268
+ share=False, # Set to True if you want public link
269
+ favicon_path="https://yootheme.com/site/templates/yootheme/images/favicon.ico",
270
+ allowed_paths=[] # Add static files if needed
271
+ )