aixk commited on
Commit
53663e6
Β·
verified Β·
1 Parent(s): 304f89f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +105 -54
app.py CHANGED
@@ -1,69 +1,120 @@
 
 
 
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
- def respond(
6
- message,
7
- history: list[dict[str, str]],
8
- system_message,
9
- max_tokens,
10
- temperature,
11
- top_p,
12
- hf_token: gr.OAuthToken,
13
- ):
14
- """
15
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
16
- """
17
- client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
18
 
19
- messages = [{"role": "system", "content": system_message}]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- messages.extend(history)
 
 
 
 
 
 
 
 
22
 
23
- messages.append({"role": "user", "content": message})
 
 
24
 
25
- response = ""
 
26
 
27
- for message in client.chat_completion(
28
- messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
38
 
39
- response += token
40
- yield response
 
 
 
 
 
 
41
 
 
 
 
 
 
 
 
 
42
 
43
- """
44
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
45
- """
46
- chatbot = gr.ChatInterface(
47
- respond,
48
- additional_inputs=[
49
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
50
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
51
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
52
- gr.Slider(
53
- minimum=0.1,
54
- maximum=1.0,
55
- value=0.95,
56
- step=0.05,
57
- label="Top-p (nucleus sampling)",
58
- ),
59
- ],
60
- )
61
 
62
- with gr.Blocks() as demo:
63
- with gr.Sidebar():
64
- gr.LoginButton()
65
- chatbot.render()
 
 
 
66
 
 
 
 
 
 
 
 
67
 
68
  if __name__ == "__main__":
69
- demo.launch()
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import unicodedata
4
+ import os
5
  import gradio as gr
6
+ from transformers import PreTrainedTokenizerFast, PretrainedConfig, PreTrainedModel
7
+ from tokenizers import decoders
8
 
9
+ # 1. Re-define the Architecture Classes (identical to the training/test phase)
10
+ class IsaiConfig(PretrainedConfig):
11
+ model_type = "isai"
12
+ def __init__(self, vocab_size=32000, hidden_size=1024, intermediate_size=2816, num_hidden_layers=24, num_attention_heads=16, num_key_value_heads=16, hidden_act="silu", max_position_embeddings=2048, initializer_range=0.02, rms_norm_eps=1e-6, use_cache=True, pad_token_id=0, bos_token_id=1, eos_token_id=2, **kwargs):
13
+ super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
14
+ self.vocab_size = vocab_size
15
+ self.hidden_size = hidden_size
16
+ self.intermediate_size = intermediate_size
17
+ self.num_hidden_layers = num_hidden_layers
18
+ self.num_attention_heads = num_attention_heads
19
+ self.num_key_value_heads = num_key_value_heads
20
+ self.max_position_embeddings = max_position_embeddings
21
+ self.rms_norm_eps = rms_norm_eps
22
 
23
+ class IsaiRMSNorm(nn.Module):
24
+ def __init__(self, hidden_size, eps=1e-6):
25
+ super().__init__()
26
+ self.weight = nn.Parameter(torch.ones(hidden_size))
27
+ self.variance_epsilon = eps
28
+ def forward(self, hidden_states):
29
+ input_dtype = hidden_states.dtype
30
+ hidden_states = hidden_states.to(torch.float32)
31
+ variance = hidden_states.pow(2).mean(-1, keepdim=True)
32
+ hidden_states = hidden_states * torch.rsqrt(variance + self.variance_epsilon)
33
+ return self.weight * hidden_states.to(input_dtype)
 
 
34
 
35
+ class IsaiForCausalLM(PreTrainedModel):
36
+ config_class = IsaiConfig
37
+ def __init__(self, config):
38
+ super().__init__(config)
39
+ self.model = nn.ModuleDict({
40
+ "embed_tokens": nn.Embedding(config.vocab_size, config.hidden_size),
41
+ "layers": nn.ModuleList([nn.ModuleDict({
42
+ "input_layernorm": IsaiRMSNorm(config.hidden_size, eps=config.rms_norm_eps),
43
+ "post_attention_layernorm": IsaiRMSNorm(config.hidden_size, eps=config.rms_norm_eps),
44
+ "self_attn": nn.Linear(config.hidden_size, config.hidden_size, bias=False),
45
+ "mlp": nn.ModuleDict({
46
+ "gate_proj": nn.Linear(config.hidden_size, config.intermediate_size, bias=False),
47
+ "up_proj": nn.Linear(config.hidden_size, config.intermediate_size, bias=False),
48
+ "down_proj": nn.Linear(config.intermediate_size, config.hidden_size, bias=False),
49
+ })
50
+ }) for _ in range(config.num_hidden_layers)]),
51
+ "norm": IsaiRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
52
+ })
53
+ self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
54
+ self.post_init()
55
 
56
+ def forward(self, input_ids=None, **kwargs):
57
+ hidden_states = self.model.embed_tokens(input_ids)
58
+ for layer in self.model.layers:
59
+ h = layer.input_layernorm(hidden_states)
60
+ hidden_states = hidden_states + layer.self_attn(h)
61
+ h = layer.post_attention_layernorm(hidden_states)
62
+ hidden_states = hidden_states + layer.mlp.down_proj(nn.functional.silu(layer.mlp.gate_proj(h)) * layer.mlp.up_proj(h))
63
+ logits = self.lm_head(self.model.norm(hidden_states))
64
+ return logits
65
 
66
+ # 2. Load Model and Tokenizer
67
+ model_dir = "models/isai-v4.2"
68
+ device = "cuda" if torch.cuda.is_available() else "cpu"
69
 
70
+ tokenizer = PreTrainedTokenizerFast.from_pretrained(model_dir)
71
+ tokenizer._tokenizer.decoder = decoders.ByteLevel() # Critical for jaso restoration
72
 
73
+ config = IsaiConfig.from_pretrained(model_dir)
74
+ model = IsaiForCausalLM(config).to(device)
 
 
 
 
 
 
 
 
 
75
 
76
+ # Prioritize safetensors
77
+ weights_path = os.path.join(model_dir, "model.safetensors")
78
+ if os.path.exists(weights_path):
79
+ from safetensors.torch import load_file
80
+ model.load_state_dict(load_file(weights_path))
81
+ else:
82
+ model.load_state_dict(torch.load(os.path.join(model_dir, "pytorch_model.bin"), map_location=device))
83
+ model.eval()
84
 
85
+ # 3. Define the Prediction Logic with Jaso Processing
86
+ def predict(message, history):
87
+ # A. NFD Decomposition (Input)
88
+ decomposed_input = unicodedata.normalize('NFD', message)
89
+ input_ids = tokenizer.encode(decomposed_input, return_tensors="pt").to(device)
90
+
91
+ current_ids = input_ids
92
+ max_new_tokens = 50
93
 
94
+ # B. Generate tokens
95
+ for _ in range(max_new_tokens):
96
+ with torch.no_grad():
97
+ logits = model(current_ids)
98
+ next_token = torch.argmax(logits[:, -1, :], dim=-1).unsqueeze(0)
99
+ current_ids = torch.cat([current_ids, next_token], dim=-1)
100
+ if next_token.item() == tokenizer.eos_token_id:
101
+ break
 
 
 
 
 
 
 
 
 
 
102
 
103
+ # C. Decode and NFC Recomposition (Output)
104
+ # Only decode the generated part
105
+ generated_tokens = current_ids[0][input_ids.shape[1]:]
106
+ raw_response = tokenizer.decode(generated_tokens, skip_special_tokens=True)
107
+ final_response = unicodedata.normalize('NFC', raw_response)
108
+
109
+ return final_response
110
 
111
+ # 4. Create and Launch Gradio Interface
112
+ demo = gr.ChatInterface(
113
+ fn=predict,
114
+ title="isai-v4.2 Jaso-Level Chat",
115
+ description="μžμ†Œ λ‹¨μœ„(NFD)둜 μ†Œν†΅ν•˜λŠ” μ΄ˆμ†Œν˜• 일상 λŒ€ν™” λͺ¨λΈμž…λ‹ˆλ‹€. μž…λ ₯은 μžλ™μœΌλ‘œ λΆ„ν•΄λ˜κ³  좜λ ₯은 λ‹€μ‹œ ν•œκΈ€λ‘œ μ‘°ν•©λ©λ‹ˆλ‹€.",
116
+ examples=["μ•ˆλ…•? λ°˜κ°€μ›Œ.", "였늘 날씨가 μ–΄λ•Œ?", "λ„ˆμ˜ 이름은 뭐야?"]
117
+ )
118
 
119
  if __name__ == "__main__":
120
+ demo.launch(share=True)