Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
OpenLLM
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
|
@@ -8,44 +8,56 @@ import torch
|
|
| 8 |
import torch.nn as nn
|
| 9 |
import torch.nn.functional as F
|
| 10 |
import json
|
| 11 |
-
import random
|
| 12 |
import logging
|
| 13 |
-
import
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# Set up logging
|
| 16 |
logging.basicConfig(level=logging.INFO)
|
| 17 |
logger = logging.getLogger(__name__)
|
| 18 |
|
| 19 |
-
class
|
| 20 |
-
"""
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
super().__init__()
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
'n_head': n_head,
|
| 28 |
-
'n_embd': n_embd,
|
| 29 |
-
'block_size': 256
|
| 30 |
-
})()
|
| 31 |
|
| 32 |
self.transformer = nn.ModuleDict(dict(
|
| 33 |
-
wte = nn.Embedding(vocab_size, n_embd),
|
| 34 |
-
wpe = nn.Embedding(
|
| 35 |
-
drop = nn.Dropout(
|
| 36 |
h = nn.ModuleList([nn.TransformerEncoderLayer(
|
| 37 |
-
d_model=n_embd,
|
| 38 |
-
nhead=n_head,
|
| 39 |
-
dim_feedforward=4 * n_embd,
|
| 40 |
-
dropout=
|
| 41 |
batch_first=True
|
| 42 |
-
) for _ in range(n_layer)]),
|
| 43 |
-
ln_f = nn.LayerNorm(n_embd),
|
| 44 |
))
|
| 45 |
-
self.lm_head = nn.Linear(n_embd, vocab_size)
|
| 46 |
|
| 47 |
-
# Initialize
|
| 48 |
self.apply(self._init_weights)
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
def _init_weights(self, module):
|
| 51 |
if isinstance(module, nn.Linear):
|
|
@@ -56,9 +68,11 @@ class RealisticGPT(nn.Module):
|
|
| 56 |
torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
|
| 57 |
|
| 58 |
def forward(self, idx, targets=None):
|
|
|
|
| 59 |
b, t = idx.size()
|
| 60 |
-
|
| 61 |
|
|
|
|
| 62 |
tok_emb = self.transformer.wte(idx)
|
| 63 |
pos_emb = self.transformer.wpe(pos)
|
| 64 |
x = self.transformer.drop(tok_emb + pos_emb)
|
|
@@ -75,232 +89,241 @@ class RealisticGPT(nn.Module):
|
|
| 75 |
loss = None
|
| 76 |
|
| 77 |
return logits, loss
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
|
| 79 |
-
class
|
| 80 |
-
"""
|
| 81 |
|
| 82 |
def __init__(self):
|
| 83 |
self.models = {}
|
|
|
|
| 84 |
self.current_model = None
|
| 85 |
|
| 86 |
-
#
|
| 87 |
self.model_configs = {
|
| 88 |
-
"
|
| 89 |
-
"name": "
|
| 90 |
-
"description": "
|
| 91 |
-
"
|
| 92 |
-
"
|
|
|
|
| 93 |
},
|
| 94 |
-
"
|
| 95 |
-
"name": "
|
| 96 |
-
"description": "
|
| 97 |
-
"
|
| 98 |
-
"
|
|
|
|
| 99 |
},
|
| 100 |
-
"
|
| 101 |
-
"name": "
|
| 102 |
-
"description": "
|
| 103 |
-
"
|
| 104 |
-
"
|
|
|
|
| 105 |
},
|
| 106 |
-
"
|
| 107 |
-
"name": "
|
| 108 |
-
"description": "
|
| 109 |
-
"
|
| 110 |
-
"
|
|
|
|
| 111 |
},
|
| 112 |
-
"
|
| 113 |
-
"name": "
|
| 114 |
-
"description": "
|
| 115 |
-
"
|
| 116 |
-
"
|
|
|
|
| 117 |
}
|
| 118 |
}
|
| 119 |
|
| 120 |
-
logger.info("π
|
| 121 |
|
| 122 |
-
def
|
| 123 |
-
"""Load a
|
| 124 |
try:
|
| 125 |
config = self.model_configs.get(model_id)
|
| 126 |
if not config:
|
| 127 |
logger.error(f"β Unknown model ID: {model_id}")
|
| 128 |
return False
|
| 129 |
|
| 130 |
-
logger.info(f"π₯ Loading
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
-
logger.info(f"β
Successfully loaded demo model: {model_id}")
|
| 139 |
return True
|
| 140 |
|
| 141 |
except Exception as e:
|
| 142 |
-
logger.error(f"β Failed to load
|
| 143 |
return False
|
| 144 |
|
| 145 |
-
def
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
"""Generate
|
| 149 |
if not self.current_model or self.current_model not in self.models:
|
| 150 |
return "β No model loaded. Please select a model first."
|
| 151 |
|
| 152 |
try:
|
| 153 |
-
|
| 154 |
-
|
| 155 |
|
| 156 |
-
#
|
| 157 |
-
|
|
|
|
| 158 |
|
| 159 |
-
# Generate
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
response = self._generate_story_response(prompt, personality, temperature)
|
| 170 |
-
else:
|
| 171 |
-
response = self._generate_general_response(prompt, personality, temperature)
|
| 172 |
|
| 173 |
-
#
|
| 174 |
-
|
| 175 |
-
response = response[:max_length] + "..."
|
| 176 |
-
elif max_length > 200:
|
| 177 |
-
response += " " + self._generate_continuation(prompt, personality, temperature)
|
| 178 |
|
| 179 |
-
#
|
| 180 |
-
if
|
| 181 |
-
|
| 182 |
-
elif temperature < 0.5:
|
| 183 |
-
response += " [Focused mode: Lower temperature produces more deterministic and precise output]"
|
| 184 |
|
| 185 |
-
return
|
| 186 |
|
| 187 |
except Exception as e:
|
| 188 |
error_msg = f"β Generation failed: {str(e)}"
|
| 189 |
logger.error(error_msg)
|
| 190 |
return error_msg
|
| 191 |
-
|
| 192 |
-
def _generate_capital_response(self, prompt: str, personality: str, temperature: float) -> str:
|
| 193 |
-
"""Generate response about capitals"""
|
| 194 |
-
base_response = "The capital of France is Paris."
|
| 195 |
-
|
| 196 |
-
if personality == "basic":
|
| 197 |
-
return base_response
|
| 198 |
-
elif personality == "coherent":
|
| 199 |
-
return f"{base_response} Paris is a beautiful city known for its culture and history."
|
| 200 |
-
elif personality == "enhanced":
|
| 201 |
-
return f"{base_response} Paris, the City of Light, is renowned for its art, fashion, gastronomy, and culture. It's home to iconic landmarks like the Eiffel Tower and the Louvre Museum."
|
| 202 |
-
elif personality == "sophisticated":
|
| 203 |
-
return f"{base_response} Paris, often called the City of Light, serves as France's political, economic, and cultural center. It's famous for its rich history, world-class museums, and distinctive architecture."
|
| 204 |
-
else: # expert
|
| 205 |
-
return f"{base_response} Paris, the capital and largest city of France, is a global center for art, fashion, gastronomy, and culture. Located in northern France, it's known for its iconic landmarks, museums, and distinctive Haussmann architecture."
|
| 206 |
-
|
| 207 |
-
def _generate_weather_response(self, prompt: str, personality: str, temperature: float) -> str:
|
| 208 |
-
"""Generate weather-related response"""
|
| 209 |
-
if personality == "basic":
|
| 210 |
-
return "The weather varies depending on location and time of year."
|
| 211 |
-
elif personality == "coherent":
|
| 212 |
-
return "Weather conditions can change throughout the day. It's best to check local forecasts for accurate information."
|
| 213 |
-
elif personality == "enhanced":
|
| 214 |
-
return "Weather patterns are influenced by various factors including temperature, humidity, pressure systems, and geographical location. Local weather services provide the most accurate forecasts."
|
| 215 |
-
elif personality == "sophisticated":
|
| 216 |
-
return "Weather is a complex atmospheric phenomenon influenced by temperature, humidity, air pressure, wind patterns, and geographical features. Meteorological services use advanced models to predict weather conditions."
|
| 217 |
-
else: # expert
|
| 218 |
-
return "Weather encompasses atmospheric conditions including temperature, humidity, precipitation, wind, and visibility. Modern meteorology uses sophisticated computer models, satellite data, and ground observations to provide accurate forecasts."
|
| 219 |
-
|
| 220 |
-
def _generate_greeting_response(self, prompt: str, personality: str, temperature: float) -> str:
|
| 221 |
-
"""Generate greeting response"""
|
| 222 |
-
greetings = {
|
| 223 |
-
"basic": "Hello! How can I help you today?",
|
| 224 |
-
"coherent": "Hello there! I'm here to assist you with any questions or tasks you might have.",
|
| 225 |
-
"enhanced": "Hello! I'm ready to help you with information, explanations, or creative tasks. What would you like to know?",
|
| 226 |
-
"sophisticated": "Greetings! I'm here to provide assistance, answer questions, or engage in meaningful conversation. How may I be of service?",
|
| 227 |
-
"expert": "Hello! I'm designed to help with a wide range of tasks including information retrieval, creative writing, problem-solving, and engaging discussions. What would you like to explore?"
|
| 228 |
-
}
|
| 229 |
-
return greetings.get(personality, greetings["basic"])
|
| 230 |
-
|
| 231 |
-
def _generate_explanation_response(self, prompt: str, personality: str, temperature: float) -> str:
|
| 232 |
-
"""Generate explanation response"""
|
| 233 |
-
if personality == "basic":
|
| 234 |
-
return "I can help explain various topics. Could you please provide more specific details about what you'd like me to explain?"
|
| 235 |
-
elif personality == "coherent":
|
| 236 |
-
return "I'm happy to provide explanations on a wide range of subjects. Please let me know what specific topic or concept you'd like me to clarify."
|
| 237 |
-
elif personality == "enhanced":
|
| 238 |
-
return "I can offer detailed explanations across many fields including science, history, technology, and more. What specific topic would you like me to explain?"
|
| 239 |
-
elif personality == "sophisticated":
|
| 240 |
-
return "I'm capable of providing comprehensive explanations on diverse subjects, from scientific concepts to historical events. Please specify what you'd like me to elucidate."
|
| 241 |
-
else: # expert
|
| 242 |
-
return "I can deliver thorough explanations across multiple domains including science, technology, history, philosophy, and current events. What specific topic would you like me to explore in detail?"
|
| 243 |
-
|
| 244 |
-
def _generate_story_response(self, prompt: str, personality: str, temperature: float) -> str:
|
| 245 |
-
"""Generate story response"""
|
| 246 |
-
if personality == "basic":
|
| 247 |
-
return "I can help you create stories. What kind of story would you like me to write?"
|
| 248 |
-
elif personality == "coherent":
|
| 249 |
-
return "I'd be happy to help you write a story. Could you tell me what genre or theme you have in mind?"
|
| 250 |
-
elif personality == "enhanced":
|
| 251 |
-
return "I can create engaging stories in various genres including fantasy, mystery, science fiction, and more. What type of story interests you?"
|
| 252 |
-
elif personality == "sophisticated":
|
| 253 |
-
return "I'm capable of crafting compelling narratives across multiple genres with rich character development and intricate plots. What kind of story would you like me to create?"
|
| 254 |
-
else: # expert
|
| 255 |
-
return "I can compose sophisticated narratives with complex characters, detailed world-building, and engaging plotlines across various genres. What type of story would you like me to develop?"
|
| 256 |
-
|
| 257 |
-
def _generate_general_response(self, prompt: str, personality: str, temperature: float) -> str:
|
| 258 |
-
"""Generate general response"""
|
| 259 |
-
if personality == "basic":
|
| 260 |
-
return f"That's an interesting question about '{prompt[:30]}...'. I can help you explore this topic further."
|
| 261 |
-
elif personality == "coherent":
|
| 262 |
-
return f"Your question about '{prompt[:30]}...' is quite engaging. Let me provide some helpful information on this subject."
|
| 263 |
-
elif personality == "enhanced":
|
| 264 |
-
return f"Your inquiry regarding '{prompt[:30]}...' shows thoughtful consideration. I'd be happy to share relevant insights and information."
|
| 265 |
-
elif personality == "sophisticated":
|
| 266 |
-
return f"Your question about '{prompt[:30]}...' demonstrates intellectual curiosity. I can offer comprehensive analysis and detailed information on this topic."
|
| 267 |
-
else: # expert
|
| 268 |
-
return f"Your inquiry concerning '{prompt[:30]}...' reflects deep thinking. I can provide thorough analysis, multiple perspectives, and detailed information to help you understand this topic better."
|
| 269 |
-
|
| 270 |
-
def _generate_continuation(self, prompt: str, personality: str, temperature: float) -> str:
|
| 271 |
-
"""Generate continuation text"""
|
| 272 |
-
continuations = {
|
| 273 |
-
"basic": "This topic has many interesting aspects to explore.",
|
| 274 |
-
"coherent": "There are several important points to consider when discussing this subject.",
|
| 275 |
-
"enhanced": "This subject encompasses various fascinating dimensions that are worth exploring in detail.",
|
| 276 |
-
"sophisticated": "This topic involves multiple complex factors that require careful consideration and analysis.",
|
| 277 |
-
"expert": "This subject encompasses numerous intricate aspects that benefit from comprehensive examination and thoughtful discussion."
|
| 278 |
-
}
|
| 279 |
-
return continuations.get(personality, continuations["basic"])
|
| 280 |
|
| 281 |
-
# Initialize the
|
| 282 |
-
inference_engine =
|
| 283 |
|
| 284 |
def load_model_info(model_id: str) -> str:
|
| 285 |
"""Get information about a specific model"""
|
| 286 |
config = inference_engine.model_configs.get(model_id)
|
| 287 |
if config:
|
| 288 |
-
return f"**{config['name']}**\n\n{config['description']}"
|
| 289 |
return "β Model not found"
|
| 290 |
|
| 291 |
-
def generate_text_interface(model_id: str, prompt: str, max_length: int,
|
| 292 |
temperature: float, top_k: int, top_p: float) -> str:
|
| 293 |
"""Gradio interface function for text generation"""
|
| 294 |
try:
|
| 295 |
# Load model if not already loaded
|
| 296 |
if model_id not in inference_engine.models:
|
| 297 |
-
logger.info(f"π Loading model: {model_id}")
|
| 298 |
-
success = inference_engine.
|
| 299 |
if not success:
|
| 300 |
-
return f"β Failed to load model: {model_id}"
|
| 301 |
|
| 302 |
# Generate text
|
| 303 |
-
result = inference_engine.
|
| 304 |
prompt=prompt,
|
| 305 |
max_length=max_length,
|
| 306 |
temperature=temperature,
|
|
@@ -320,27 +343,29 @@ def create_interface():
|
|
| 320 |
"""Create the Gradio interface"""
|
| 321 |
|
| 322 |
with gr.Blocks(
|
| 323 |
-
title="π OpenLLM
|
| 324 |
theme=gr.themes.Soft()
|
| 325 |
) as interface:
|
| 326 |
|
| 327 |
# Header
|
| 328 |
gr.Markdown("""
|
| 329 |
-
# π OpenLLM
|
| 330 |
|
| 331 |
-
Welcome to the OpenLLM
|
| 332 |
|
| 333 |
-
## π―
|
| 334 |
|
| 335 |
-
We provide **5 different
|
| 336 |
|
| 337 |
-
| Model | Training Steps |
|
| 338 |
-
|
| 339 |
-
| **
|
| 340 |
-
| **
|
| 341 |
-
| **
|
| 342 |
-
| **
|
| 343 |
-
| **
|
|
|
|
|
|
|
| 344 |
|
| 345 |
---
|
| 346 |
""")
|
|
@@ -350,14 +375,14 @@ def create_interface():
|
|
| 350 |
# Model selection
|
| 351 |
model_dropdown = gr.Dropdown(
|
| 352 |
choices=list(inference_engine.model_configs.keys()),
|
| 353 |
-
value="
|
| 354 |
label="π― Select Model",
|
| 355 |
-
info="Choose the
|
| 356 |
)
|
| 357 |
|
| 358 |
# Model information display
|
| 359 |
model_info = gr.Markdown(
|
| 360 |
-
value=load_model_info("
|
| 361 |
label="π Model Information"
|
| 362 |
)
|
| 363 |
|
|
@@ -443,19 +468,28 @@ def create_interface():
|
|
| 443 |
|
| 444 |
## π§ Technical Details
|
| 445 |
|
| 446 |
-
- **Architecture**: GPT-style transformer decoder
|
| 447 |
-
- **Model Size**: Small
|
| 448 |
-
- **
|
|
|
|
|
|
|
| 449 |
- **Gradio Version**: 4.44.1 (latest)
|
| 450 |
-
- **Status**: Realistic demo mode - generates actual responses
|
| 451 |
|
| 452 |
-
**
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 453 |
""")
|
| 454 |
|
| 455 |
return interface
|
| 456 |
|
| 457 |
# Create and launch the interface
|
| 458 |
if __name__ == "__main__":
|
|
|
|
| 459 |
interface = create_interface()
|
| 460 |
interface.launch(
|
| 461 |
server_name="0.0.0.0",
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
+
OpenLLM Real Models App - Uses actual trained models from Hugging Face
|
| 4 |
"""
|
| 5 |
|
| 6 |
import gradio as gr
|
|
|
|
| 8 |
import torch.nn as nn
|
| 9 |
import torch.nn.functional as F
|
| 10 |
import json
|
|
|
|
| 11 |
import logging
|
| 12 |
+
import sentencepiece as spm
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
from typing import Dict, Any, Optional
|
| 15 |
+
from huggingface_hub import snapshot_download
|
| 16 |
|
| 17 |
# Set up logging
|
| 18 |
logging.basicConfig(level=logging.INFO)
|
| 19 |
logger = logging.getLogger(__name__)
|
| 20 |
|
| 21 |
+
class GPTConfig:
|
| 22 |
+
"""GPT model configuration"""
|
| 23 |
+
def __init__(self, vocab_size=32000, n_layer=6, n_head=8, n_embd=512,
|
| 24 |
+
block_size=1024, dropout=0.1, bias=True):
|
| 25 |
+
self.vocab_size = vocab_size
|
| 26 |
+
self.n_layer = n_layer
|
| 27 |
+
self.n_head = n_head
|
| 28 |
+
self.n_embd = n_embd
|
| 29 |
+
self.block_size = block_size
|
| 30 |
+
self.dropout = dropout
|
| 31 |
+
self.bias = bias
|
| 32 |
+
|
| 33 |
+
class GPT(nn.Module):
|
| 34 |
+
"""GPT-style transformer model"""
|
| 35 |
+
def __init__(self, config):
|
| 36 |
super().__init__()
|
| 37 |
+
assert config.vocab_size is not None
|
| 38 |
+
assert config.block_size is not None
|
| 39 |
+
self.config = config
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
self.transformer = nn.ModuleDict(dict(
|
| 42 |
+
wte = nn.Embedding(config.vocab_size, config.n_embd),
|
| 43 |
+
wpe = nn.Embedding(config.block_size, config.n_embd),
|
| 44 |
+
drop = nn.Dropout(config.dropout),
|
| 45 |
h = nn.ModuleList([nn.TransformerEncoderLayer(
|
| 46 |
+
d_model=config.n_embd,
|
| 47 |
+
nhead=config.n_head,
|
| 48 |
+
dim_feedforward=4 * config.n_embd,
|
| 49 |
+
dropout=config.dropout,
|
| 50 |
batch_first=True
|
| 51 |
+
) for _ in range(config.n_layer)]),
|
| 52 |
+
ln_f = nn.LayerNorm(config.n_embd),
|
| 53 |
))
|
| 54 |
+
self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=config.bias)
|
| 55 |
|
| 56 |
+
# Initialize weights
|
| 57 |
self.apply(self._init_weights)
|
| 58 |
+
for pn, p in self.named_parameters():
|
| 59 |
+
if pn.endswith('c_proj.weight'):
|
| 60 |
+
torch.nn.init.normal_(p, mean=0.0, std=0.02/math.sqrt(2 * config.n_layer))
|
| 61 |
|
| 62 |
def _init_weights(self, module):
|
| 63 |
if isinstance(module, nn.Linear):
|
|
|
|
| 68 |
torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
|
| 69 |
|
| 70 |
def forward(self, idx, targets=None):
|
| 71 |
+
device = idx.device
|
| 72 |
b, t = idx.size()
|
| 73 |
+
assert t <= self.config.block_size, f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
|
| 74 |
|
| 75 |
+
pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0)
|
| 76 |
tok_emb = self.transformer.wte(idx)
|
| 77 |
pos_emb = self.transformer.wpe(pos)
|
| 78 |
x = self.transformer.drop(tok_emb + pos_emb)
|
|
|
|
| 89 |
loss = None
|
| 90 |
|
| 91 |
return logits, loss
|
| 92 |
+
|
| 93 |
+
def generate(self, idx, max_new_tokens, temperature=1.0, top_k=None, top_p=None, do_sample=True):
|
| 94 |
+
for _ in range(max_new_tokens):
|
| 95 |
+
idx_cond = idx if idx.size(1) <= self.config.block_size else idx[:, -self.config.block_size:]
|
| 96 |
+
logits, _ = self(idx_cond)
|
| 97 |
+
logits = logits[:, -1, :] / temperature
|
| 98 |
+
|
| 99 |
+
if top_k is not None:
|
| 100 |
+
v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
|
| 101 |
+
logits[logits < v[:, [-1]]] = -float('Inf')
|
| 102 |
+
|
| 103 |
+
if top_p is not None:
|
| 104 |
+
sorted_logits, sorted_indices = torch.sort(logits, descending=True)
|
| 105 |
+
cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
|
| 106 |
+
sorted_indices_to_remove = cumulative_probs > top_p
|
| 107 |
+
sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
|
| 108 |
+
sorted_indices_to_remove[..., 0] = 0
|
| 109 |
+
indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
|
| 110 |
+
logits[indices_to_remove] = -float('Inf')
|
| 111 |
+
|
| 112 |
+
probs = F.softmax(logits, dim=-1)
|
| 113 |
+
if do_sample:
|
| 114 |
+
idx_next = torch.multinomial(probs, num_samples=1)
|
| 115 |
+
else:
|
| 116 |
+
_, idx_next = torch.topk(probs, k=1, dim=-1)
|
| 117 |
+
|
| 118 |
+
idx = torch.cat((idx, idx_next), dim=1)
|
| 119 |
+
|
| 120 |
+
return idx
|
| 121 |
|
| 122 |
+
class RealOpenLLMInference:
|
| 123 |
+
"""Real OpenLLM inference engine using actual trained models"""
|
| 124 |
|
| 125 |
def __init__(self):
|
| 126 |
self.models = {}
|
| 127 |
+
self.tokenizers = {}
|
| 128 |
self.current_model = None
|
| 129 |
|
| 130 |
+
# Real model configurations from Hugging Face
|
| 131 |
self.model_configs = {
|
| 132 |
+
"openllm-small-extended-4k": {
|
| 133 |
+
"name": "OpenLLM Small (4k steps)",
|
| 134 |
+
"description": "Real model trained for 4,000 steps - Early training stage",
|
| 135 |
+
"hf_repo": "lemms/openllm-small-extended-4k",
|
| 136 |
+
"training_steps": 4000,
|
| 137 |
+
"parameters": "35.8M"
|
| 138 |
},
|
| 139 |
+
"openllm-small-extended-6k": {
|
| 140 |
+
"name": "OpenLLM Small (6k steps)",
|
| 141 |
+
"description": "Real model trained for 6,000 steps - Improved coherence (Perplexity: 816.040)",
|
| 142 |
+
"hf_repo": "lemms/openllm-small-extended-6k",
|
| 143 |
+
"training_steps": 6000,
|
| 144 |
+
"parameters": "35.8M"
|
| 145 |
},
|
| 146 |
+
"openllm-small-extended-7k": {
|
| 147 |
+
"name": "OpenLLM Small (7k steps)",
|
| 148 |
+
"description": "Real model trained for 7,000 steps - Enhanced quality (Loss: 2.100, Perplexity: 8.200)",
|
| 149 |
+
"hf_repo": "lemms/openllm-small-extended-7k",
|
| 150 |
+
"training_steps": 7000,
|
| 151 |
+
"parameters": "35.8M"
|
| 152 |
},
|
| 153 |
+
"openllm-small-extended-8k": {
|
| 154 |
+
"name": "OpenLLM Small (8k steps)",
|
| 155 |
+
"description": "Real model trained for 8,000 steps - Sophisticated understanding",
|
| 156 |
+
"hf_repo": "lemms/openllm-small-extended-8k",
|
| 157 |
+
"training_steps": 8000,
|
| 158 |
+
"parameters": "35.8M"
|
| 159 |
},
|
| 160 |
+
"openllm-small-extended-9k": {
|
| 161 |
+
"name": "OpenLLM Small (9k steps)",
|
| 162 |
+
"description": "Real model trained for 9,000 steps - Best performing model",
|
| 163 |
+
"hf_repo": "lemms/openllm-small-extended-9k",
|
| 164 |
+
"training_steps": 9000,
|
| 165 |
+
"parameters": "35.8M"
|
| 166 |
}
|
| 167 |
}
|
| 168 |
|
| 169 |
+
logger.info("π Real OpenLLM Inference Engine initialized")
|
| 170 |
|
| 171 |
+
def load_model_from_hf(self, model_id: str) -> bool:
|
| 172 |
+
"""Load a real model from Hugging Face"""
|
| 173 |
try:
|
| 174 |
config = self.model_configs.get(model_id)
|
| 175 |
if not config:
|
| 176 |
logger.error(f"β Unknown model ID: {model_id}")
|
| 177 |
return False
|
| 178 |
|
| 179 |
+
logger.info(f"π₯ Loading real model from HF: {config['hf_repo']}")
|
| 180 |
+
|
| 181 |
+
# Download model from Hugging Face
|
| 182 |
+
local_dir = snapshot_download(
|
| 183 |
+
repo_id=config['hf_repo'],
|
| 184 |
+
repo_type="model",
|
| 185 |
+
local_dir=f"temp_{model_id}",
|
| 186 |
+
allow_patterns=["*.pt", "*.json", "*.model", "*.bin"]
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
logger.info(f"β
Downloaded model to: {local_dir}")
|
| 190 |
+
|
| 191 |
+
# Load model and tokenizer
|
| 192 |
+
success = self._load_model_and_tokenizer(local_dir, model_id)
|
| 193 |
+
if success:
|
| 194 |
+
self.current_model = model_id
|
| 195 |
+
logger.info(f"β
Successfully loaded real model: {model_id}")
|
| 196 |
+
return True
|
| 197 |
+
else:
|
| 198 |
+
return False
|
| 199 |
+
|
| 200 |
+
except Exception as e:
|
| 201 |
+
logger.error(f"β Failed to load real model from HF {model_id}: {e}")
|
| 202 |
+
return False
|
| 203 |
+
|
| 204 |
+
def _load_model_and_tokenizer(self, model_dir: str, model_id: str) -> bool:
|
| 205 |
+
"""Load model and tokenizer from local directory"""
|
| 206 |
+
try:
|
| 207 |
+
model_path = Path(model_dir)
|
| 208 |
+
|
| 209 |
+
# Load model configuration
|
| 210 |
+
config_file = model_path / "config.json"
|
| 211 |
+
if config_file.exists():
|
| 212 |
+
with open(config_file, 'r') as f:
|
| 213 |
+
config_data = json.load(f)
|
| 214 |
+
model_config = GPTConfig(**config_data.get('model_config', {}))
|
| 215 |
+
else:
|
| 216 |
+
# Default configuration for OpenLLM small models
|
| 217 |
+
model_config = GPTConfig(
|
| 218 |
+
vocab_size=32000,
|
| 219 |
+
n_layer=6,
|
| 220 |
+
n_head=8,
|
| 221 |
+
n_embd=512,
|
| 222 |
+
block_size=1024,
|
| 223 |
+
dropout=0.1,
|
| 224 |
+
bias=True
|
| 225 |
+
)
|
| 226 |
+
|
| 227 |
+
# Load model weights
|
| 228 |
+
model_file = model_path / "model.pt"
|
| 229 |
+
if not model_file.exists():
|
| 230 |
+
model_file = model_path / "pytorch_model.bin"
|
| 231 |
|
| 232 |
+
if model_file.exists():
|
| 233 |
+
model = GPT(model_config)
|
| 234 |
+
checkpoint = torch.load(model_file, map_location='cpu')
|
| 235 |
+
|
| 236 |
+
# Handle different checkpoint formats
|
| 237 |
+
if 'model' in checkpoint:
|
| 238 |
+
model.load_state_dict(checkpoint['model'])
|
| 239 |
+
else:
|
| 240 |
+
model.load_state_dict(checkpoint)
|
| 241 |
+
|
| 242 |
+
model.eval()
|
| 243 |
+
self.models[model_id] = model
|
| 244 |
+
else:
|
| 245 |
+
logger.error(f"β Model file not found in {model_dir}")
|
| 246 |
+
return False
|
| 247 |
+
|
| 248 |
+
# Load tokenizer
|
| 249 |
+
tokenizer_file = model_path / "tokenizer.model"
|
| 250 |
+
if tokenizer_file.exists():
|
| 251 |
+
tokenizer = spm.SentencePieceProcessor()
|
| 252 |
+
tokenizer.load(str(tokenizer_file))
|
| 253 |
+
self.tokenizers[model_id] = tokenizer
|
| 254 |
+
else:
|
| 255 |
+
logger.error(f"β Tokenizer file not found in {model_dir}")
|
| 256 |
+
return False
|
| 257 |
|
|
|
|
| 258 |
return True
|
| 259 |
|
| 260 |
except Exception as e:
|
| 261 |
+
logger.error(f"β Failed to load model and tokenizer: {e}")
|
| 262 |
return False
|
| 263 |
|
| 264 |
+
def generate_text(self, prompt: str, max_length: int = 100,
|
| 265 |
+
temperature: float = 0.7, top_k: int = 50,
|
| 266 |
+
top_p: float = 0.9) -> str:
|
| 267 |
+
"""Generate text using the loaded real model"""
|
| 268 |
if not self.current_model or self.current_model not in self.models:
|
| 269 |
return "β No model loaded. Please select a model first."
|
| 270 |
|
| 271 |
try:
|
| 272 |
+
model = self.models[self.current_model]
|
| 273 |
+
tokenizer = self.tokenizers[self.current_model]
|
| 274 |
|
| 275 |
+
# Tokenize input
|
| 276 |
+
input_ids = tokenizer.encode(prompt)
|
| 277 |
+
input_tensor = torch.tensor([input_ids], dtype=torch.long)
|
| 278 |
|
| 279 |
+
# Generate text
|
| 280 |
+
with torch.no_grad():
|
| 281 |
+
output_ids = model.generate(
|
| 282 |
+
input_tensor,
|
| 283 |
+
max_new_tokens=max_length,
|
| 284 |
+
temperature=temperature,
|
| 285 |
+
top_k=top_k,
|
| 286 |
+
top_p=top_p,
|
| 287 |
+
do_sample=True
|
| 288 |
+
)
|
|
|
|
|
|
|
|
|
|
| 289 |
|
| 290 |
+
# Decode output
|
| 291 |
+
generated_text = tokenizer.decode(output_ids[0].tolist())
|
|
|
|
|
|
|
|
|
|
| 292 |
|
| 293 |
+
# Remove the input prompt from the output
|
| 294 |
+
if generated_text.startswith(prompt):
|
| 295 |
+
generated_text = generated_text[len(prompt):].strip()
|
|
|
|
|
|
|
| 296 |
|
| 297 |
+
return generated_text
|
| 298 |
|
| 299 |
except Exception as e:
|
| 300 |
error_msg = f"β Generation failed: {str(e)}"
|
| 301 |
logger.error(error_msg)
|
| 302 |
return error_msg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
|
| 304 |
+
# Initialize the real inference engine
|
| 305 |
+
inference_engine = RealOpenLLMInference()
|
| 306 |
|
| 307 |
def load_model_info(model_id: str) -> str:
|
| 308 |
"""Get information about a specific model"""
|
| 309 |
config = inference_engine.model_configs.get(model_id)
|
| 310 |
if config:
|
| 311 |
+
return f"**{config['name']}**\n\n{config['description']}\n\n**Parameters:** {config['parameters']}\n**Training Steps:** {config['training_steps']:,}"
|
| 312 |
return "β Model not found"
|
| 313 |
|
| 314 |
+
def generate_text_interface(model_id: str, prompt: str, max_length: int,
|
| 315 |
temperature: float, top_k: int, top_p: float) -> str:
|
| 316 |
"""Gradio interface function for text generation"""
|
| 317 |
try:
|
| 318 |
# Load model if not already loaded
|
| 319 |
if model_id not in inference_engine.models:
|
| 320 |
+
logger.info(f"π Loading real model: {model_id}")
|
| 321 |
+
success = inference_engine.load_model_from_hf(model_id)
|
| 322 |
if not success:
|
| 323 |
+
return f"β Failed to load real model: {model_id}"
|
| 324 |
|
| 325 |
# Generate text
|
| 326 |
+
result = inference_engine.generate_text(
|
| 327 |
prompt=prompt,
|
| 328 |
max_length=max_length,
|
| 329 |
temperature=temperature,
|
|
|
|
| 343 |
"""Create the Gradio interface"""
|
| 344 |
|
| 345 |
with gr.Blocks(
|
| 346 |
+
title="π OpenLLM Real Models Space",
|
| 347 |
theme=gr.themes.Soft()
|
| 348 |
) as interface:
|
| 349 |
|
| 350 |
# Header
|
| 351 |
gr.Markdown("""
|
| 352 |
+
# π OpenLLM Real Models Space
|
| 353 |
|
| 354 |
+
Welcome to the OpenLLM Real Models Space! This interface uses **actual trained models** from Hugging Face.
|
| 355 |
|
| 356 |
+
## π― Real Trained Models
|
| 357 |
|
| 358 |
+
We provide **5 different real models** with varying training steps:
|
| 359 |
|
| 360 |
+
| Model | Training Steps | Parameters | Performance |
|
| 361 |
+
|-------|---------------|------------|-------------|
|
| 362 |
+
| **4k Model** | 4,000 | 35.8M | Early training stage |
|
| 363 |
+
| **6k Model** | 6,000 | 35.8M | Improved coherence (Perplexity: 816.040) |
|
| 364 |
+
| **7k Model** | 7,000 | 35.8M | Enhanced quality (Loss: 2.100, Perplexity: 8.200) |
|
| 365 |
+
| **8k Model** | 8,000 | 35.8M | Sophisticated understanding |
|
| 366 |
+
| **9k Model** | 9,000 | 35.8M | Best performing model |
|
| 367 |
+
|
| 368 |
+
**These are real GPT-style transformer models trained on Wikipedia passages from the SQuAD dataset.**
|
| 369 |
|
| 370 |
---
|
| 371 |
""")
|
|
|
|
| 375 |
# Model selection
|
| 376 |
model_dropdown = gr.Dropdown(
|
| 377 |
choices=list(inference_engine.model_configs.keys()),
|
| 378 |
+
value="openllm-small-extended-9k",
|
| 379 |
label="π― Select Model",
|
| 380 |
+
info="Choose the real trained model to use"
|
| 381 |
)
|
| 382 |
|
| 383 |
# Model information display
|
| 384 |
model_info = gr.Markdown(
|
| 385 |
+
value=load_model_info("openllm-small-extended-9k"),
|
| 386 |
label="π Model Information"
|
| 387 |
)
|
| 388 |
|
|
|
|
| 468 |
|
| 469 |
## π§ Technical Details
|
| 470 |
|
| 471 |
+
- **Architecture**: GPT-style transformer decoder
|
| 472 |
+
- **Model Size**: Small (6 layers, 8 heads, 512 embedding dim)
|
| 473 |
+
- **Vocabulary**: 32k tokens (SentencePiece BPE)
|
| 474 |
+
- **Training Data**: Wikipedia passages from SQuAD dataset
|
| 475 |
+
- **Framework**: PyTorch with real trained models
|
| 476 |
- **Gradio Version**: 4.44.1 (latest)
|
|
|
|
| 477 |
|
| 478 |
+
**These models generate actual text based on their training on Wikipedia content.**
|
| 479 |
+
|
| 480 |
+
**Model Sources:**
|
| 481 |
+
- [4k Model](https://huggingface.co/lemms/openllm-small-extended-4k)
|
| 482 |
+
- [6k Model](https://huggingface.co/lemms/openllm-small-extended-6k)
|
| 483 |
+
- [7k Model](https://huggingface.co/lemms/openllm-small-extended-7k)
|
| 484 |
+
- [8k Model](https://huggingface.co/lemms/openllm-small-extended-8k)
|
| 485 |
+
- [9k Model](https://huggingface.co/lemms/openllm-small-extended-9k)
|
| 486 |
""")
|
| 487 |
|
| 488 |
return interface
|
| 489 |
|
| 490 |
# Create and launch the interface
|
| 491 |
if __name__ == "__main__":
|
| 492 |
+
import math
|
| 493 |
interface = create_interface()
|
| 494 |
interface.launch(
|
| 495 |
server_name="0.0.0.0",
|