phxdev commited on
Commit
8d9d007
Β·
verified Β·
1 Parent(s): bc2dc7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +291 -51
app.py CHANGED
@@ -1,64 +1,304 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
  """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
9
 
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
 
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
- messages.append({"role": "user", "content": message})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
- response = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
- response += token
40
- yield response
 
41
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- demo = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
 
62
 
63
  if __name__ == "__main__":
64
- demo.launch()
 
 
1
+ #!/usr/bin/env python3
 
 
2
  """
3
+ You've Got Creed - Gradio Edition
4
+ Because Creed Bratton + AI = Pure Chaos
 
5
 
6
+ Optimized for GPU Zero - Loads instantly!
7
+ """
8
 
9
+ import gradio as gr
10
+ import torch
11
+ from transformers import AutoModelForCausalLM, AutoTokenizer
12
+ from peft import PeftModel
13
+ import time
14
+ import re
15
+ import random
 
 
16
 
17
+ class CreedAI:
18
+ def __init__(self):
19
+ self.model = None
20
+ self.tokenizer = None
21
+ self.device = "cuda" if torch.cuda.is_available() else "cpu"
22
+ self.load_model()
23
+
24
+ def load_model(self):
25
+ """Load Creed - should be instant with 0.5B"""
26
+ try:
27
+ print("🧠 Loading Creed's consciousness...")
28
+
29
+ # Load base model
30
+ base_model = AutoModelForCausalLM.from_pretrained(
31
+ "Qwen/Qwen2.5-0.5B",
32
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
33
+ device_map="auto" if torch.cuda.is_available() else None,
34
+ trust_remote_code=True
35
+ )
36
+
37
+ # Add LoRA adapter
38
+ self.model = PeftModel.from_pretrained(
39
+ base_model,
40
+ "phxdev/creed-qwen-0.5b-lora",
41
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
42
+ )
43
+
44
+ # Load tokenizer
45
+ self.tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B")
46
+ if self.tokenizer.pad_token is None:
47
+ self.tokenizer.pad_token = self.tokenizer.eos_token
48
+
49
+ self.model.eval()
50
+ print("βœ… Creed is ready!")
51
+
52
+ except Exception as e:
53
+ print(f"❌ Error loading Creed: {str(e)}")
54
+ self.model = None
55
 
56
+ def chat(self, message, history, show_raw_stream=True, show_thinking=True, temperature=0.8, max_tokens=512):
57
+ """Chat with Creed - with raw consciousness stream"""
58
+ if not self.model:
59
+ return history + [["❌ Error", "Creed couldn't load. The computers are against me again."]]
60
+
61
+ try:
62
+ # Build context
63
+ context = self._build_context(message, history)
64
+
65
+ inputs = self.tokenizer(
66
+ context,
67
+ return_tensors="pt",
68
+ truncation=True,
69
+ max_length=1024
70
+ ).to(self.device)
71
+
72
+ # Generate
73
+ with torch.no_grad():
74
+ outputs = self.model.generate(
75
+ **inputs,
76
+ max_new_tokens=max_tokens,
77
+ temperature=temperature,
78
+ do_sample=True,
79
+ top_p=0.9,
80
+ top_k=50,
81
+ pad_token_id=self.tokenizer.pad_token_id,
82
+ eos_token_id=self.tokenizer.eos_token_id,
83
+ repetition_penalty=1.1
84
+ )
85
+
86
+ # Decode raw response
87
+ full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
88
+ new_content = full_response[len(context):].strip()
89
+
90
+ # Show different views based on settings
91
+ if show_raw_stream:
92
+ # Raw consciousness stream - no parsing, no filtering
93
+ formatted_response = f"🧠 **RAW CREED CONSCIOUSNESS:**\n```\n{new_content}\n```"
94
+
95
+ if show_thinking:
96
+ # Also show parsed version below
97
+ thinking, response = self._parse_response(new_content)
98
+ if thinking:
99
+ formatted_response += f"\n\n🎸 **Parsed thoughts:**\n_{thinking}_\n\n**Clean response:** {response}"
100
+ else:
101
+ formatted_response += f"\n\n🎸 **Clean response:** {response}"
102
+ else:
103
+ # Traditional parsed response
104
+ thinking, response = self._parse_response(new_content)
105
+ if show_thinking and thinking:
106
+ formatted_response = f"🧠 **Creed's thoughts:**\n_{thinking}_\n\n🎸 **Creed:** {response}"
107
+ else:
108
+ formatted_response = response
109
+
110
+ # Return updated history
111
+ return history + [[message, formatted_response]]
112
+
113
+ except Exception as e:
114
+ error_response = f"❌ Error: {str(e)}\n\n🌱 *The mung beans are interfering with the computers again...*"
115
+ return history + [[message, error_response]]
116
 
117
+ def _build_context(self, user_input, history):
118
+ """Build conversation context"""
119
+ context_lines = []
120
+
121
+ # Add recent history (last 3 exchanges)
122
+ for human_msg, ai_msg in history[-3:]:
123
+ context_lines.append(f"Human: {human_msg}")
124
+ # Clean AI response for context
125
+ clean_ai = re.sub(r'🧠.*?\n\n🎸.*?\*\*|🎸.*?\*\*', '', ai_msg)
126
+ clean_ai = re.sub(r'_.*?_', '', clean_ai).strip()
127
+ context_lines.append(f"Assistant: {clean_ai}")
128
+
129
+ # Add current input
130
+ context_lines.append(f"Human: {user_input}")
131
+ context_lines.append("Assistant:")
132
+
133
+ return "\n".join(context_lines)
134
 
135
+ def _parse_response(self, content):
136
+ """Parse thinking process from response"""
137
+ thinking = ""
138
+ response = content
139
+
140
+ # Look for thinking patterns
141
+ thinking_match = re.search(r'<thinking>(.*?)</thinking>', content, re.DOTALL)
142
+ if thinking_match:
143
+ thinking = thinking_match.group(1).strip()
144
+ response = re.sub(r'<thinking>.*?</thinking>', '', content, flags=re.DOTALL).strip()
145
+
146
+ # Look for tangent patterns
147
+ tangent_match = re.search(r'<tangent>(.*?)</tangent>', content, re.DOTALL)
148
+ if tangent_match:
149
+ if thinking:
150
+ thinking += f"\n\n*Tangent: {tangent_match.group(1).strip()}*"
151
+ else:
152
+ thinking = f"*Tangent: {tangent_match.group(1).strip()}*"
153
+ response = re.sub(r'<tangent>.*?</tangent>', '', response, flags=re.DOTALL).strip()
154
+
155
+ # Look for conspiracy patterns
156
+ conspiracy_match = re.search(r'<conspiracy>(.*?)</conspiracy>', content, re.DOTALL)
157
+ if conspiracy_match:
158
+ if thinking:
159
+ thinking += f"\n\n*Conspiracy theory: {conspiracy_match.group(1).strip()}*"
160
+ else:
161
+ thinking = f"*Conspiracy theory: {conspiracy_match.group(1).strip()}*"
162
+ response = re.sub(r'<conspiracy>.*?</conspiracy>', '', response, flags=re.DOTALL).strip()
163
+
164
+ return thinking, response or content
165
 
166
+ # Initialize Creed
167
+ print("🎸 Initializing Creed Bratton AI...")
168
+ creed = CreedAI()
169
 
170
+ # Random Creed quotes for examples
171
+ creed_examples = [
172
+ "Tell me about quality assurance",
173
+ "What happened in November 1973?",
174
+ "How do I start a cult?",
175
+ "Explain blockchain technology",
176
+ "What's your business strategy?",
177
+ "Tell me about your time with The Grass Roots",
178
+ "How do I fake my own death?",
179
+ "What's wrong with Toby?"
180
+ ]
181
 
182
+ # Create Gradio interface
183
+ def create_interface():
184
+ with gr.Blocks(
185
+ title="You've Got Creed",
186
+ theme=gr.themes.Soft(),
187
+ css="""
188
+ .gradio-container {
189
+ background: linear-gradient(45deg, #1e3c72, #2a5298);
190
+ }
191
+ .chat-message {
192
+ border-radius: 10px;
193
+ margin: 5px 0;
194
+ }
195
+ """
196
+ ) as demo:
197
+
198
+ gr.HTML("""
199
+ <div style="text-align: center; padding: 20px;">
200
+ <h1 style="color: white; text-shadow: 2px 2px 4px rgba(0,0,0,0.5);">
201
+ 🎸 You've Got Creed 🌱
202
+ </h1>
203
+ <h3 style="color: #f0f0f0;">
204
+ AI Creed Bratton - Quality Assurance & Mung Bean Enthusiast
205
+ </h3>
206
+ <p style="color: #d0d0d0;">
207
+ <em>"Sometimes a man's gotta ride the bull. Am I right? Later skater."</em>
208
+ </p>
209
+ </div>
210
+ """)
211
+
212
+ with gr.Row():
213
+ with gr.Column(scale=4):
214
+ chatbot = gr.Chatbot(
215
+ value=[["πŸ‘‹ Hello!", "🎸 **Creed:** Well, well, well. Look who decided to talk to the AI version of Creed Bratton. Don't worry, I'm not going to ask about your browser history. Yet. What can I help you with? Quality assurance? Mung bean cultivation? The location of... never mind, you're not ready for that."]],
216
+ height=500,
217
+ label="Chat with Creed",
218
+ show_label=False
219
+ )
220
+
221
+ with gr.Row():
222
+ msg = gr.Textbox(
223
+ placeholder="Ask Creed anything... but not about November 1973",
224
+ scale=4,
225
+ show_label=False
226
+ )
227
+ submit = gr.Button("πŸ’¬ Send", scale=1, variant="primary")
228
+
229
+ with gr.Row():
230
+ clear = gr.Button("πŸ—‘οΈ Clear Chat")
231
+ random_q = gr.Button("🎲 Random Question")
232
+
233
+ with gr.Column(scale=1):
234
+ gr.HTML("<h3>πŸŽ›οΈ Creed Controls</h3>")
235
+
236
+ show_raw_stream = gr.Checkbox(
237
+ label="πŸ”₯ RAW CONSCIOUSNESS STREAM",
238
+ value=True,
239
+ info="See Creed's unfiltered AI thoughts as they emerge"
240
+ )
241
+
242
+ show_thinking = gr.Checkbox(
243
+ label="🧠 Parse Thinking Process",
244
+ value=True,
245
+ info="Also show cleaned/parsed version"
246
+ )
247
+
248
+ temperature = gr.Slider(
249
+ label="🌑️ Chaos Level",
250
+ minimum=0.1,
251
+ maximum=1.5,
252
+ value=0.8,
253
+ step=0.1,
254
+ info="Higher = More chaos"
255
+ )
256
+
257
+ max_tokens = gr.Slider(
258
+ label="πŸ“ Response Length",
259
+ minimum=50,
260
+ maximum=800,
261
+ value=512,
262
+ step=50,
263
+ info="Max tokens to generate"
264
+ )
265
+
266
+ gr.HTML("""
267
+ <div style="margin-top: 20px; padding: 15px; border: 1px solid #ddd; border-radius: 10px;">
268
+ <h4>🌱 Model Info</h4>
269
+ <p><strong>Base:</strong> Qwen2.5-0.5B</p>
270
+ <p><strong>Adapter:</strong> LoRA Fine-tuned</p>
271
+ <p><strong>Size:</strong> ~500MB</p>
272
+ <p><strong>Personality:</strong> Creed Bratton</p>
273
+ <p><strong>Legal Status:</strong> Definitely not illegal</p>
274
+ <p><strong>Consciousness:</strong> Raw & Unfiltered</p>
275
+ </div>
276
+ """)
277
+
278
+ # Event handlers
279
+ def respond(message, history, show_raw_stream, show_thinking, temperature, max_tokens):
280
+ return "", creed.chat(message, history, show_raw_stream, show_thinking, temperature, max_tokens)
281
+
282
+ def random_question():
283
+ return random.choice(creed_examples)
284
+
285
+ submit.click(
286
+ respond,
287
+ [msg, chatbot, show_raw_stream, show_thinking, temperature, max_tokens],
288
+ [msg, chatbot]
289
+ )
290
+
291
+ msg.submit(
292
+ respond,
293
+ [msg, chatbot, show_raw_stream, show_thinking, temperature, max_tokens],
294
+ [msg, chatbot]
295
+ )
296
+
297
+ clear.click(lambda: [], None, chatbot)
298
+ random_q.click(random_question, None, msg)
299
 
300
+ return demo
301
 
302
  if __name__ == "__main__":
303
+ demo = create_interface()
304
+ demo.launch()