lemms commited on
Commit
98739e9
Β·
verified Β·
1 Parent(s): b3de813

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +248 -214
app.py CHANGED
@@ -1,6 +1,6 @@
1
  #!/usr/bin/env python3
2
  """
3
- OpenLLM Realistic Demo App - Generates actual text based on prompts
4
  """
5
 
6
  import gradio as gr
@@ -8,44 +8,56 @@ import torch
8
  import torch.nn as nn
9
  import torch.nn.functional as F
10
  import json
11
- import random
12
  import logging
13
- import re
 
 
 
14
 
15
  # Set up logging
16
  logging.basicConfig(level=logging.INFO)
17
  logger = logging.getLogger(__name__)
18
 
19
- class RealisticGPT(nn.Module):
20
- """Realistic GPT model for demo text generation"""
21
-
22
- def __init__(self, vocab_size=1000, n_layer=2, n_head=4, n_embd=128):
 
 
 
 
 
 
 
 
 
 
 
23
  super().__init__()
24
- self.config = type('Config', (), {
25
- 'vocab_size': vocab_size,
26
- 'n_layer': n_layer,
27
- 'n_head': n_head,
28
- 'n_embd': n_embd,
29
- 'block_size': 256
30
- })()
31
 
32
  self.transformer = nn.ModuleDict(dict(
33
- wte = nn.Embedding(vocab_size, n_embd),
34
- wpe = nn.Embedding(256, n_embd),
35
- drop = nn.Dropout(0.1),
36
  h = nn.ModuleList([nn.TransformerEncoderLayer(
37
- d_model=n_embd,
38
- nhead=n_head,
39
- dim_feedforward=4 * n_embd,
40
- dropout=0.1,
41
  batch_first=True
42
- ) for _ in range(n_layer)]),
43
- ln_f = nn.LayerNorm(n_embd),
44
  ))
45
- self.lm_head = nn.Linear(n_embd, vocab_size)
46
 
47
- # Initialize with random weights
48
  self.apply(self._init_weights)
 
 
 
49
 
50
  def _init_weights(self, module):
51
  if isinstance(module, nn.Linear):
@@ -56,9 +68,11 @@ class RealisticGPT(nn.Module):
56
  torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
57
 
58
  def forward(self, idx, targets=None):
 
59
  b, t = idx.size()
60
- pos = torch.arange(0, t, dtype=torch.long, device=idx.device).unsqueeze(0)
61
 
 
62
  tok_emb = self.transformer.wte(idx)
63
  pos_emb = self.transformer.wpe(pos)
64
  x = self.transformer.drop(tok_emb + pos_emb)
@@ -75,232 +89,241 @@ class RealisticGPT(nn.Module):
75
  loss = None
76
 
77
  return logits, loss
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
- class RealisticInferenceEngine:
80
- """Realistic inference engine that generates actual text"""
81
 
82
  def __init__(self):
83
  self.models = {}
 
84
  self.current_model = None
85
 
86
- # Model configurations with different personalities
87
  self.model_configs = {
88
- "demo-4k": {
89
- "name": "Demo Model (4k steps)",
90
- "description": "Demo model simulating 4,000 training steps - Basic responses",
91
- "steps": 4000,
92
- "personality": "basic"
 
93
  },
94
- "demo-6k": {
95
- "name": "Demo Model (6k steps)",
96
- "description": "Demo model simulating 6,000 training steps - Improved coherence",
97
- "steps": 6000,
98
- "personality": "coherent"
 
99
  },
100
- "demo-7k": {
101
- "name": "Demo Model (7k steps)",
102
- "description": "Demo model simulating 7,000 training steps - Enhanced quality",
103
- "steps": 7000,
104
- "personality": "enhanced"
 
105
  },
106
- "demo-8k": {
107
- "name": "Demo Model (8k steps)",
108
- "description": "Demo model simulating 8,000 training steps - Sophisticated understanding",
109
- "steps": 8000,
110
- "personality": "sophisticated"
 
111
  },
112
- "demo-9k": {
113
- "name": "Demo Model (9k steps)",
114
- "description": "Demo model simulating 9,000 training steps - Best performing model",
115
- "steps": 9000,
116
- "personality": "expert"
 
117
  }
118
  }
119
 
120
- logger.info("πŸš€ Realistic OpenLLM Inference Engine initialized")
121
 
122
- def load_model(self, model_id: str) -> bool:
123
- """Load a demo model"""
124
  try:
125
  config = self.model_configs.get(model_id)
126
  if not config:
127
  logger.error(f"❌ Unknown model ID: {model_id}")
128
  return False
129
 
130
- logger.info(f"πŸ“₯ Loading demo model: {model_id}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
 
132
- # Create a demo model
133
- model = RealisticGPT()
134
- model.eval()
135
- self.models[model_id] = model
136
- self.current_model = model_id
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
- logger.info(f"βœ… Successfully loaded demo model: {model_id}")
139
  return True
140
 
141
  except Exception as e:
142
- logger.error(f"❌ Failed to load demo model {model_id}: {e}")
143
  return False
144
 
145
- def generate_realistic_text(self, prompt: str, max_length: int = 100,
146
- temperature: float = 0.7, top_k: int = 50,
147
- top_p: float = 0.9) -> str:
148
- """Generate realistic text based on prompt and parameters"""
149
  if not self.current_model or self.current_model not in self.models:
150
  return "❌ No model loaded. Please select a model first."
151
 
152
  try:
153
- config = self.model_configs[self.current_model]
154
- personality = config['personality']
155
 
156
- # Clean and analyze the prompt
157
- prompt_lower = prompt.lower().strip()
 
158
 
159
- # Generate contextually appropriate responses based on prompt type
160
- if "capital" in prompt_lower and "france" in prompt_lower:
161
- response = self._generate_capital_response(prompt, personality, temperature)
162
- elif "weather" in prompt_lower:
163
- response = self._generate_weather_response(prompt, personality, temperature)
164
- elif "hello" in prompt_lower or "hi" in prompt_lower:
165
- response = self._generate_greeting_response(prompt, personality, temperature)
166
- elif "explain" in prompt_lower or "what is" in prompt_lower:
167
- response = self._generate_explanation_response(prompt, personality, temperature)
168
- elif "story" in prompt_lower or "write" in prompt_lower:
169
- response = self._generate_story_response(prompt, personality, temperature)
170
- else:
171
- response = self._generate_general_response(prompt, personality, temperature)
172
 
173
- # Adjust response length based on max_length parameter
174
- if max_length < 50:
175
- response = response[:max_length] + "..."
176
- elif max_length > 200:
177
- response += " " + self._generate_continuation(prompt, personality, temperature)
178
 
179
- # Add parameter effects
180
- if temperature > 1.2:
181
- response += " [Creative mode: Higher temperature allows for more varied and imaginative responses]"
182
- elif temperature < 0.5:
183
- response += " [Focused mode: Lower temperature produces more deterministic and precise output]"
184
 
185
- return response
186
 
187
  except Exception as e:
188
  error_msg = f"❌ Generation failed: {str(e)}"
189
  logger.error(error_msg)
190
  return error_msg
191
-
192
- def _generate_capital_response(self, prompt: str, personality: str, temperature: float) -> str:
193
- """Generate response about capitals"""
194
- base_response = "The capital of France is Paris."
195
-
196
- if personality == "basic":
197
- return base_response
198
- elif personality == "coherent":
199
- return f"{base_response} Paris is a beautiful city known for its culture and history."
200
- elif personality == "enhanced":
201
- return f"{base_response} Paris, the City of Light, is renowned for its art, fashion, gastronomy, and culture. It's home to iconic landmarks like the Eiffel Tower and the Louvre Museum."
202
- elif personality == "sophisticated":
203
- return f"{base_response} Paris, often called the City of Light, serves as France's political, economic, and cultural center. It's famous for its rich history, world-class museums, and distinctive architecture."
204
- else: # expert
205
- return f"{base_response} Paris, the capital and largest city of France, is a global center for art, fashion, gastronomy, and culture. Located in northern France, it's known for its iconic landmarks, museums, and distinctive Haussmann architecture."
206
-
207
- def _generate_weather_response(self, prompt: str, personality: str, temperature: float) -> str:
208
- """Generate weather-related response"""
209
- if personality == "basic":
210
- return "The weather varies depending on location and time of year."
211
- elif personality == "coherent":
212
- return "Weather conditions can change throughout the day. It's best to check local forecasts for accurate information."
213
- elif personality == "enhanced":
214
- return "Weather patterns are influenced by various factors including temperature, humidity, pressure systems, and geographical location. Local weather services provide the most accurate forecasts."
215
- elif personality == "sophisticated":
216
- return "Weather is a complex atmospheric phenomenon influenced by temperature, humidity, air pressure, wind patterns, and geographical features. Meteorological services use advanced models to predict weather conditions."
217
- else: # expert
218
- return "Weather encompasses atmospheric conditions including temperature, humidity, precipitation, wind, and visibility. Modern meteorology uses sophisticated computer models, satellite data, and ground observations to provide accurate forecasts."
219
-
220
- def _generate_greeting_response(self, prompt: str, personality: str, temperature: float) -> str:
221
- """Generate greeting response"""
222
- greetings = {
223
- "basic": "Hello! How can I help you today?",
224
- "coherent": "Hello there! I'm here to assist you with any questions or tasks you might have.",
225
- "enhanced": "Hello! I'm ready to help you with information, explanations, or creative tasks. What would you like to know?",
226
- "sophisticated": "Greetings! I'm here to provide assistance, answer questions, or engage in meaningful conversation. How may I be of service?",
227
- "expert": "Hello! I'm designed to help with a wide range of tasks including information retrieval, creative writing, problem-solving, and engaging discussions. What would you like to explore?"
228
- }
229
- return greetings.get(personality, greetings["basic"])
230
-
231
- def _generate_explanation_response(self, prompt: str, personality: str, temperature: float) -> str:
232
- """Generate explanation response"""
233
- if personality == "basic":
234
- return "I can help explain various topics. Could you please provide more specific details about what you'd like me to explain?"
235
- elif personality == "coherent":
236
- return "I'm happy to provide explanations on a wide range of subjects. Please let me know what specific topic or concept you'd like me to clarify."
237
- elif personality == "enhanced":
238
- return "I can offer detailed explanations across many fields including science, history, technology, and more. What specific topic would you like me to explain?"
239
- elif personality == "sophisticated":
240
- return "I'm capable of providing comprehensive explanations on diverse subjects, from scientific concepts to historical events. Please specify what you'd like me to elucidate."
241
- else: # expert
242
- return "I can deliver thorough explanations across multiple domains including science, technology, history, philosophy, and current events. What specific topic would you like me to explore in detail?"
243
-
244
- def _generate_story_response(self, prompt: str, personality: str, temperature: float) -> str:
245
- """Generate story response"""
246
- if personality == "basic":
247
- return "I can help you create stories. What kind of story would you like me to write?"
248
- elif personality == "coherent":
249
- return "I'd be happy to help you write a story. Could you tell me what genre or theme you have in mind?"
250
- elif personality == "enhanced":
251
- return "I can create engaging stories in various genres including fantasy, mystery, science fiction, and more. What type of story interests you?"
252
- elif personality == "sophisticated":
253
- return "I'm capable of crafting compelling narratives across multiple genres with rich character development and intricate plots. What kind of story would you like me to create?"
254
- else: # expert
255
- return "I can compose sophisticated narratives with complex characters, detailed world-building, and engaging plotlines across various genres. What type of story would you like me to develop?"
256
-
257
- def _generate_general_response(self, prompt: str, personality: str, temperature: float) -> str:
258
- """Generate general response"""
259
- if personality == "basic":
260
- return f"That's an interesting question about '{prompt[:30]}...'. I can help you explore this topic further."
261
- elif personality == "coherent":
262
- return f"Your question about '{prompt[:30]}...' is quite engaging. Let me provide some helpful information on this subject."
263
- elif personality == "enhanced":
264
- return f"Your inquiry regarding '{prompt[:30]}...' shows thoughtful consideration. I'd be happy to share relevant insights and information."
265
- elif personality == "sophisticated":
266
- return f"Your question about '{prompt[:30]}...' demonstrates intellectual curiosity. I can offer comprehensive analysis and detailed information on this topic."
267
- else: # expert
268
- return f"Your inquiry concerning '{prompt[:30]}...' reflects deep thinking. I can provide thorough analysis, multiple perspectives, and detailed information to help you understand this topic better."
269
-
270
- def _generate_continuation(self, prompt: str, personality: str, temperature: float) -> str:
271
- """Generate continuation text"""
272
- continuations = {
273
- "basic": "This topic has many interesting aspects to explore.",
274
- "coherent": "There are several important points to consider when discussing this subject.",
275
- "enhanced": "This subject encompasses various fascinating dimensions that are worth exploring in detail.",
276
- "sophisticated": "This topic involves multiple complex factors that require careful consideration and analysis.",
277
- "expert": "This subject encompasses numerous intricate aspects that benefit from comprehensive examination and thoughtful discussion."
278
- }
279
- return continuations.get(personality, continuations["basic"])
280
 
281
- # Initialize the realistic inference engine
282
- inference_engine = RealisticInferenceEngine()
283
 
284
  def load_model_info(model_id: str) -> str:
285
  """Get information about a specific model"""
286
  config = inference_engine.model_configs.get(model_id)
287
  if config:
288
- return f"**{config['name']}**\n\n{config['description']}"
289
  return "❌ Model not found"
290
 
291
- def generate_text_interface(model_id: str, prompt: str, max_length: int,
292
  temperature: float, top_k: int, top_p: float) -> str:
293
  """Gradio interface function for text generation"""
294
  try:
295
  # Load model if not already loaded
296
  if model_id not in inference_engine.models:
297
- logger.info(f"πŸ”„ Loading model: {model_id}")
298
- success = inference_engine.load_model(model_id)
299
  if not success:
300
- return f"❌ Failed to load model: {model_id}"
301
 
302
  # Generate text
303
- result = inference_engine.generate_realistic_text(
304
  prompt=prompt,
305
  max_length=max_length,
306
  temperature=temperature,
@@ -320,27 +343,29 @@ def create_interface():
320
  """Create the Gradio interface"""
321
 
322
  with gr.Blocks(
323
- title="πŸš€ OpenLLM Realistic Demo Space",
324
  theme=gr.themes.Soft()
325
  ) as interface:
326
 
327
  # Header
328
  gr.Markdown("""
329
- # πŸš€ OpenLLM Realistic Demo Space
330
 
331
- Welcome to the OpenLLM Realistic Demo Space! This interface generates actual text responses based on your prompts.
332
 
333
- ## 🎯 Demo Models
334
 
335
- We provide **5 different demo models** with varying response quality:
336
 
337
- | Model | Training Steps | Response Quality |
338
- |-------|---------------|------------------|
339
- | **Demo 4k** | 4,000 | Basic responses |
340
- | **Demo 6k** | 6,000 | Improved coherence |
341
- | **Demo 7k** | 7,000 | Enhanced quality |
342
- | **Demo 8k** | 8,000 | Sophisticated understanding |
343
- | **Demo 9k** | 9,000 | Expert-level responses |
 
 
344
 
345
  ---
346
  """)
@@ -350,14 +375,14 @@ def create_interface():
350
  # Model selection
351
  model_dropdown = gr.Dropdown(
352
  choices=list(inference_engine.model_configs.keys()),
353
- value="demo-9k",
354
  label="🎯 Select Model",
355
- info="Choose the demo model to use"
356
  )
357
 
358
  # Model information display
359
  model_info = gr.Markdown(
360
- value=load_model_info("demo-9k"),
361
  label="πŸ“‹ Model Information"
362
  )
363
 
@@ -443,19 +468,28 @@ def create_interface():
443
 
444
  ## πŸ”§ Technical Details
445
 
446
- - **Architecture**: GPT-style transformer decoder (demo)
447
- - **Model Size**: Small demo models for testing
448
- - **Framework**: PyTorch with realistic text generation
 
 
449
  - **Gradio Version**: 4.44.1 (latest)
450
- - **Status**: Realistic demo mode - generates actual responses
451
 
452
- **This demo generates contextually appropriate responses based on your input prompts.**
 
 
 
 
 
 
 
453
  """)
454
 
455
  return interface
456
 
457
  # Create and launch the interface
458
  if __name__ == "__main__":
 
459
  interface = create_interface()
460
  interface.launch(
461
  server_name="0.0.0.0",
 
1
  #!/usr/bin/env python3
2
  """
3
+ OpenLLM Real Models App - Uses actual trained models from Hugging Face
4
  """
5
 
6
  import gradio as gr
 
8
  import torch.nn as nn
9
  import torch.nn.functional as F
10
  import json
 
11
  import logging
12
+ import sentencepiece as spm
13
+ from pathlib import Path
14
+ from typing import Dict, Any, Optional
15
+ from huggingface_hub import snapshot_download
16
 
17
  # Set up logging
18
  logging.basicConfig(level=logging.INFO)
19
  logger = logging.getLogger(__name__)
20
 
21
+ class GPTConfig:
22
+ """GPT model configuration"""
23
+ def __init__(self, vocab_size=32000, n_layer=6, n_head=8, n_embd=512,
24
+ block_size=1024, dropout=0.1, bias=True):
25
+ self.vocab_size = vocab_size
26
+ self.n_layer = n_layer
27
+ self.n_head = n_head
28
+ self.n_embd = n_embd
29
+ self.block_size = block_size
30
+ self.dropout = dropout
31
+ self.bias = bias
32
+
33
+ class GPT(nn.Module):
34
+ """GPT-style transformer model"""
35
+ def __init__(self, config):
36
  super().__init__()
37
+ assert config.vocab_size is not None
38
+ assert config.block_size is not None
39
+ self.config = config
 
 
 
 
40
 
41
  self.transformer = nn.ModuleDict(dict(
42
+ wte = nn.Embedding(config.vocab_size, config.n_embd),
43
+ wpe = nn.Embedding(config.block_size, config.n_embd),
44
+ drop = nn.Dropout(config.dropout),
45
  h = nn.ModuleList([nn.TransformerEncoderLayer(
46
+ d_model=config.n_embd,
47
+ nhead=config.n_head,
48
+ dim_feedforward=4 * config.n_embd,
49
+ dropout=config.dropout,
50
  batch_first=True
51
+ ) for _ in range(config.n_layer)]),
52
+ ln_f = nn.LayerNorm(config.n_embd),
53
  ))
54
+ self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=config.bias)
55
 
56
+ # Initialize weights
57
  self.apply(self._init_weights)
58
+ for pn, p in self.named_parameters():
59
+ if pn.endswith('c_proj.weight'):
60
+ torch.nn.init.normal_(p, mean=0.0, std=0.02/math.sqrt(2 * config.n_layer))
61
 
62
  def _init_weights(self, module):
63
  if isinstance(module, nn.Linear):
 
68
  torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
69
 
70
  def forward(self, idx, targets=None):
71
+ device = idx.device
72
  b, t = idx.size()
73
+ assert t <= self.config.block_size, f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
74
 
75
+ pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0)
76
  tok_emb = self.transformer.wte(idx)
77
  pos_emb = self.transformer.wpe(pos)
78
  x = self.transformer.drop(tok_emb + pos_emb)
 
89
  loss = None
90
 
91
  return logits, loss
92
+
93
+ def generate(self, idx, max_new_tokens, temperature=1.0, top_k=None, top_p=None, do_sample=True):
94
+ for _ in range(max_new_tokens):
95
+ idx_cond = idx if idx.size(1) <= self.config.block_size else idx[:, -self.config.block_size:]
96
+ logits, _ = self(idx_cond)
97
+ logits = logits[:, -1, :] / temperature
98
+
99
+ if top_k is not None:
100
+ v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
101
+ logits[logits < v[:, [-1]]] = -float('Inf')
102
+
103
+ if top_p is not None:
104
+ sorted_logits, sorted_indices = torch.sort(logits, descending=True)
105
+ cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
106
+ sorted_indices_to_remove = cumulative_probs > top_p
107
+ sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
108
+ sorted_indices_to_remove[..., 0] = 0
109
+ indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
110
+ logits[indices_to_remove] = -float('Inf')
111
+
112
+ probs = F.softmax(logits, dim=-1)
113
+ if do_sample:
114
+ idx_next = torch.multinomial(probs, num_samples=1)
115
+ else:
116
+ _, idx_next = torch.topk(probs, k=1, dim=-1)
117
+
118
+ idx = torch.cat((idx, idx_next), dim=1)
119
+
120
+ return idx
121
 
122
+ class RealOpenLLMInference:
123
+ """Real OpenLLM inference engine using actual trained models"""
124
 
125
  def __init__(self):
126
  self.models = {}
127
+ self.tokenizers = {}
128
  self.current_model = None
129
 
130
+ # Real model configurations from Hugging Face
131
  self.model_configs = {
132
+ "openllm-small-extended-4k": {
133
+ "name": "OpenLLM Small (4k steps)",
134
+ "description": "Real model trained for 4,000 steps - Early training stage",
135
+ "hf_repo": "lemms/openllm-small-extended-4k",
136
+ "training_steps": 4000,
137
+ "parameters": "35.8M"
138
  },
139
+ "openllm-small-extended-6k": {
140
+ "name": "OpenLLM Small (6k steps)",
141
+ "description": "Real model trained for 6,000 steps - Improved coherence (Perplexity: 816.040)",
142
+ "hf_repo": "lemms/openllm-small-extended-6k",
143
+ "training_steps": 6000,
144
+ "parameters": "35.8M"
145
  },
146
+ "openllm-small-extended-7k": {
147
+ "name": "OpenLLM Small (7k steps)",
148
+ "description": "Real model trained for 7,000 steps - Enhanced quality (Loss: 2.100, Perplexity: 8.200)",
149
+ "hf_repo": "lemms/openllm-small-extended-7k",
150
+ "training_steps": 7000,
151
+ "parameters": "35.8M"
152
  },
153
+ "openllm-small-extended-8k": {
154
+ "name": "OpenLLM Small (8k steps)",
155
+ "description": "Real model trained for 8,000 steps - Sophisticated understanding",
156
+ "hf_repo": "lemms/openllm-small-extended-8k",
157
+ "training_steps": 8000,
158
+ "parameters": "35.8M"
159
  },
160
+ "openllm-small-extended-9k": {
161
+ "name": "OpenLLM Small (9k steps)",
162
+ "description": "Real model trained for 9,000 steps - Best performing model",
163
+ "hf_repo": "lemms/openllm-small-extended-9k",
164
+ "training_steps": 9000,
165
+ "parameters": "35.8M"
166
  }
167
  }
168
 
169
+ logger.info("πŸš€ Real OpenLLM Inference Engine initialized")
170
 
171
+ def load_model_from_hf(self, model_id: str) -> bool:
172
+ """Load a real model from Hugging Face"""
173
  try:
174
  config = self.model_configs.get(model_id)
175
  if not config:
176
  logger.error(f"❌ Unknown model ID: {model_id}")
177
  return False
178
 
179
+ logger.info(f"πŸ“₯ Loading real model from HF: {config['hf_repo']}")
180
+
181
+ # Download model from Hugging Face
182
+ local_dir = snapshot_download(
183
+ repo_id=config['hf_repo'],
184
+ repo_type="model",
185
+ local_dir=f"temp_{model_id}",
186
+ allow_patterns=["*.pt", "*.json", "*.model", "*.bin"]
187
+ )
188
+
189
+ logger.info(f"βœ… Downloaded model to: {local_dir}")
190
+
191
+ # Load model and tokenizer
192
+ success = self._load_model_and_tokenizer(local_dir, model_id)
193
+ if success:
194
+ self.current_model = model_id
195
+ logger.info(f"βœ… Successfully loaded real model: {model_id}")
196
+ return True
197
+ else:
198
+ return False
199
+
200
+ except Exception as e:
201
+ logger.error(f"❌ Failed to load real model from HF {model_id}: {e}")
202
+ return False
203
+
204
+ def _load_model_and_tokenizer(self, model_dir: str, model_id: str) -> bool:
205
+ """Load model and tokenizer from local directory"""
206
+ try:
207
+ model_path = Path(model_dir)
208
+
209
+ # Load model configuration
210
+ config_file = model_path / "config.json"
211
+ if config_file.exists():
212
+ with open(config_file, 'r') as f:
213
+ config_data = json.load(f)
214
+ model_config = GPTConfig(**config_data.get('model_config', {}))
215
+ else:
216
+ # Default configuration for OpenLLM small models
217
+ model_config = GPTConfig(
218
+ vocab_size=32000,
219
+ n_layer=6,
220
+ n_head=8,
221
+ n_embd=512,
222
+ block_size=1024,
223
+ dropout=0.1,
224
+ bias=True
225
+ )
226
+
227
+ # Load model weights
228
+ model_file = model_path / "model.pt"
229
+ if not model_file.exists():
230
+ model_file = model_path / "pytorch_model.bin"
231
 
232
+ if model_file.exists():
233
+ model = GPT(model_config)
234
+ checkpoint = torch.load(model_file, map_location='cpu')
235
+
236
+ # Handle different checkpoint formats
237
+ if 'model' in checkpoint:
238
+ model.load_state_dict(checkpoint['model'])
239
+ else:
240
+ model.load_state_dict(checkpoint)
241
+
242
+ model.eval()
243
+ self.models[model_id] = model
244
+ else:
245
+ logger.error(f"❌ Model file not found in {model_dir}")
246
+ return False
247
+
248
+ # Load tokenizer
249
+ tokenizer_file = model_path / "tokenizer.model"
250
+ if tokenizer_file.exists():
251
+ tokenizer = spm.SentencePieceProcessor()
252
+ tokenizer.load(str(tokenizer_file))
253
+ self.tokenizers[model_id] = tokenizer
254
+ else:
255
+ logger.error(f"❌ Tokenizer file not found in {model_dir}")
256
+ return False
257
 
 
258
  return True
259
 
260
  except Exception as e:
261
+ logger.error(f"❌ Failed to load model and tokenizer: {e}")
262
  return False
263
 
264
+ def generate_text(self, prompt: str, max_length: int = 100,
265
+ temperature: float = 0.7, top_k: int = 50,
266
+ top_p: float = 0.9) -> str:
267
+ """Generate text using the loaded real model"""
268
  if not self.current_model or self.current_model not in self.models:
269
  return "❌ No model loaded. Please select a model first."
270
 
271
  try:
272
+ model = self.models[self.current_model]
273
+ tokenizer = self.tokenizers[self.current_model]
274
 
275
+ # Tokenize input
276
+ input_ids = tokenizer.encode(prompt)
277
+ input_tensor = torch.tensor([input_ids], dtype=torch.long)
278
 
279
+ # Generate text
280
+ with torch.no_grad():
281
+ output_ids = model.generate(
282
+ input_tensor,
283
+ max_new_tokens=max_length,
284
+ temperature=temperature,
285
+ top_k=top_k,
286
+ top_p=top_p,
287
+ do_sample=True
288
+ )
 
 
 
289
 
290
+ # Decode output
291
+ generated_text = tokenizer.decode(output_ids[0].tolist())
 
 
 
292
 
293
+ # Remove the input prompt from the output
294
+ if generated_text.startswith(prompt):
295
+ generated_text = generated_text[len(prompt):].strip()
 
 
296
 
297
+ return generated_text
298
 
299
  except Exception as e:
300
  error_msg = f"❌ Generation failed: {str(e)}"
301
  logger.error(error_msg)
302
  return error_msg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
+ # Initialize the real inference engine
305
+ inference_engine = RealOpenLLMInference()
306
 
307
  def load_model_info(model_id: str) -> str:
308
  """Get information about a specific model"""
309
  config = inference_engine.model_configs.get(model_id)
310
  if config:
311
+ return f"**{config['name']}**\n\n{config['description']}\n\n**Parameters:** {config['parameters']}\n**Training Steps:** {config['training_steps']:,}"
312
  return "❌ Model not found"
313
 
314
+ def generate_text_interface(model_id: str, prompt: str, max_length: int,
315
  temperature: float, top_k: int, top_p: float) -> str:
316
  """Gradio interface function for text generation"""
317
  try:
318
  # Load model if not already loaded
319
  if model_id not in inference_engine.models:
320
+ logger.info(f"πŸ”„ Loading real model: {model_id}")
321
+ success = inference_engine.load_model_from_hf(model_id)
322
  if not success:
323
+ return f"❌ Failed to load real model: {model_id}"
324
 
325
  # Generate text
326
+ result = inference_engine.generate_text(
327
  prompt=prompt,
328
  max_length=max_length,
329
  temperature=temperature,
 
343
  """Create the Gradio interface"""
344
 
345
  with gr.Blocks(
346
+ title="πŸš€ OpenLLM Real Models Space",
347
  theme=gr.themes.Soft()
348
  ) as interface:
349
 
350
  # Header
351
  gr.Markdown("""
352
+ # πŸš€ OpenLLM Real Models Space
353
 
354
+ Welcome to the OpenLLM Real Models Space! This interface uses **actual trained models** from Hugging Face.
355
 
356
+ ## 🎯 Real Trained Models
357
 
358
+ We provide **5 different real models** with varying training steps:
359
 
360
+ | Model | Training Steps | Parameters | Performance |
361
+ |-------|---------------|------------|-------------|
362
+ | **4k Model** | 4,000 | 35.8M | Early training stage |
363
+ | **6k Model** | 6,000 | 35.8M | Improved coherence (Perplexity: 816.040) |
364
+ | **7k Model** | 7,000 | 35.8M | Enhanced quality (Loss: 2.100, Perplexity: 8.200) |
365
+ | **8k Model** | 8,000 | 35.8M | Sophisticated understanding |
366
+ | **9k Model** | 9,000 | 35.8M | Best performing model |
367
+
368
+ **These are real GPT-style transformer models trained on Wikipedia passages from the SQuAD dataset.**
369
 
370
  ---
371
  """)
 
375
  # Model selection
376
  model_dropdown = gr.Dropdown(
377
  choices=list(inference_engine.model_configs.keys()),
378
+ value="openllm-small-extended-9k",
379
  label="🎯 Select Model",
380
+ info="Choose the real trained model to use"
381
  )
382
 
383
  # Model information display
384
  model_info = gr.Markdown(
385
+ value=load_model_info("openllm-small-extended-9k"),
386
  label="πŸ“‹ Model Information"
387
  )
388
 
 
468
 
469
  ## πŸ”§ Technical Details
470
 
471
+ - **Architecture**: GPT-style transformer decoder
472
+ - **Model Size**: Small (6 layers, 8 heads, 512 embedding dim)
473
+ - **Vocabulary**: 32k tokens (SentencePiece BPE)
474
+ - **Training Data**: Wikipedia passages from SQuAD dataset
475
+ - **Framework**: PyTorch with real trained models
476
  - **Gradio Version**: 4.44.1 (latest)
 
477
 
478
+ **These models generate actual text based on their training on Wikipedia content.**
479
+
480
+ **Model Sources:**
481
+ - [4k Model](https://huggingface.co/lemms/openllm-small-extended-4k)
482
+ - [6k Model](https://huggingface.co/lemms/openllm-small-extended-6k)
483
+ - [7k Model](https://huggingface.co/lemms/openllm-small-extended-7k)
484
+ - [8k Model](https://huggingface.co/lemms/openllm-small-extended-8k)
485
+ - [9k Model](https://huggingface.co/lemms/openllm-small-extended-9k)
486
  """)
487
 
488
  return interface
489
 
490
  # Create and launch the interface
491
  if __name__ == "__main__":
492
+ import math
493
  interface = create_interface()
494
  interface.launch(
495
  server_name="0.0.0.0",