Spaces:

Wilsonwin
/

mini-gpt-demo

Sleeping

App Files Files Community

Wilsonwin commited on Jan 31

Commit

999f17d

1 Parent(s): 1b7e8af

Add Mini-GPT Gradio app

Browse files

Files changed (3) hide show

README.md +20 -9
app.py +261 -53
requirements.txt +6 -0

README.md CHANGED Viewed

@@ -1,16 +1,27 @@
 ---
-title: Mini Gpt Demo
-emoji: 💬
-colorFrom: yellow
 colorTo: purple
 sdk: gradio
-sdk_version: 5.42.0
 app_file: app.py
 pinned: false
-hf_oauth: true
-hf_oauth_scopes:
-- inference-api
-license: mit
 ---
-An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).

 ---
+title: Mini-GPT 文本生成
+emoji: 🤖
+colorFrom: blue
 colorTo: purple
 sdk: gradio
+sdk_version: 4.44.0
 app_file: app.py
 pinned: false
+license: apache-2.0
 ---
+# Mini-GPT 文本生成
+使用 JAX/Flax 在 Kaggle TPU 上训练的小型 GPT 模型。
+## 功能
+- 支持中英文文本生成
+- 可调节生成长度和温度参数
+## 模型信息
+- **架构**: GPT-2 style transformer
+- **参数量**: ~25M
+- **训练框架**: JAX/Flax
+- **训练硬件**: Kaggle TPU v3-8

app.py CHANGED Viewed

@@ -1,70 +1,278 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-def respond(
-    message,
-    history: list[dict[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-    hf_token: gr.OAuthToken,
-):
-    """
-    For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-    """
-    client = InferenceClient(token=hf_token.token, model="openai/gpt-oss-20b")
-    messages = [{"role": "system", "content": system_message}]
-    messages.extend(history)
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-chatbot = gr.ChatInterface(
-    respond,
-    type="messages",
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
 )
-with gr.Blocks() as demo:
-    with gr.Sidebar():
-        gr.LoginButton()
-    chatbot.render()
 if __name__ == "__main__":
     demo.launch()

+"""
+HuggingFace Spaces Gradio App for Mini-GPT
+上传到 HuggingFace Spaces 即可部署
+"""
 import gradio as gr
+import jax
+import jax.numpy as jnp
+import flax.linen as nn
+from huggingface_hub import hf_hub_download
+import orbax.checkpoint as ocp
+from typing import List, Optional, Union
+import os
+# ============================================================================
+# 模型定义 (与训练时保持一致)
+# ============================================================================
+class TokenAndPositionEmbedding(nn.Module):
+    vocab_size: int
+    max_len: int
+    embed_dim: int
+    @nn.compact
+    def __call__(self, x):
+        seq_len = x.shape[1]
+        positions = jnp.arange(seq_len)
+        tok_emb = nn.Embed(self.vocab_size, self.embed_dim, name='token_emb')(x)
+        pos_emb = nn.Embed(self.max_len, self.embed_dim, name='pos_emb')(positions)
+        return tok_emb + pos_emb
+class TransformerBlock(nn.Module):
+    embed_dim: int
+    num_heads: int
+    ff_dim: int
+    dropout_rate: float = 0.1
+    @nn.compact
+    def __call__(self, x, training: bool = False):
+        attn_output = nn.SelfAttention(
+            num_heads=self.num_heads,
+            qkv_features=self.embed_dim,
+            dropout_rate=self.dropout_rate,
+            deterministic=True,  # 推理时不使用 dropout
+            decode=False,
+        )(x, mask=nn.make_causal_mask(jnp.ones((x.shape[0], x.shape[1]))))
+        x = nn.LayerNorm()(x + attn_output)
+        ffn_output = nn.Dense(self.ff_dim)(x)
+        ffn_output = nn.gelu(ffn_output)
+        ffn_output = nn.Dense(self.embed_dim)(ffn_output)
+        x = nn.LayerNorm()(x + ffn_output)
+        return x
+class MiniGPT(nn.Module):
+    vocab_size: int
+    max_len: int
+    embed_dim: int
+    num_heads: int
+    num_layers: int
+    ff_dim: int
+    dropout_rate: float = 0.1
+    @nn.compact
+    def __call__(self, x, training: bool = False):
+        x = TokenAndPositionEmbedding(
+            vocab_size=self.vocab_size,
+            max_len=self.max_len,
+            embed_dim=self.embed_dim
+        )(x)
+        for i in range(self.num_layers):
+            x = TransformerBlock(
+                embed_dim=self.embed_dim,
+                num_heads=self.num_heads,
+                ff_dim=self.ff_dim,
+                dropout_rate=self.dropout_rate,
+                name=f'transformer_block_{i}'
+            )(x, training=training)
+        logits = nn.Dense(self.vocab_size, name='lm_head')(x)
+        return logits
+# ============================================================================
+# Tokenizer (Yi-1.5)
+# ============================================================================
+class MultilingualTokenizer:
+    def __init__(self, model_name: str = "01-ai/Yi-1.5-6B"):
+        from transformers import AutoTokenizer
+        self._tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            trust_remote_code=True,
+            use_fast=True
+        )
+        self._eot_token = self._tokenizer.eos_token_id
+        self._pad_token = self._tokenizer.pad_token_id if self._tokenizer.pad_token_id is not None else 0
+        raw_vocab = len(self._tokenizer)
+        self._padded_vocab = ((raw_vocab // 128) + 1) * 128 if raw_vocab % 128 != 0 else raw_vocab
+    @property
+    def padded_vocab_size(self) -> int:
+        return self._padded_vocab
+    @property
+    def eot_token(self) -> int:
+        return self._eot_token
+    def encode(self, text: str) -> List[int]:
+        return self._tokenizer.encode(text, add_special_tokens=False)
+    def decode(self, tokens) -> str:
+        if isinstance(tokens, int):
+            tokens = [tokens]
+        return self._tokenizer.decode(tokens, skip_special_tokens=True)
+# ============================================================================
+# 模型配置 (必须与训练时一致!)
+# ============================================================================
+CONFIG = {
+    "max_len": 256,
+    "embed_dim": 512,
+    "num_heads": 8,
+    "num_layers": 6,
+    "ff_dim": 2048,
+    "dropout_rate": 0.1,
+}
+REPO_ID = "Wilsonwin/handsongpt2"  # 你的 HuggingFace 仓库
+# ============================================================================
+# 加载模型
+# ============================================================================
+print("Loading tokenizer...")
+tokenizer = MultilingualTokenizer()
+CONFIG["vocab_size"] = tokenizer.padded_vocab_size
+print("Creating model...")
+model = MiniGPT(
+    vocab_size=CONFIG["vocab_size"],
+    max_len=CONFIG["max_len"],
+    embed_dim=CONFIG["embed_dim"],
+    num_heads=CONFIG["num_heads"],
+    num_layers=CONFIG["num_layers"],
+    ff_dim=CONFIG["ff_dim"],
+    dropout_rate=CONFIG["dropout_rate"]
+)
+print("Downloading checkpoint from HuggingFace...")
+checkpoint_path = hf_hub_download(
+    repo_id=REPO_ID,
+    filename="checkpoint",
+    repo_type="model",
+    local_dir="./checkpoint_dir"
 )
+print(f"Loading checkpoint from {checkpoint_path}...")
+checkpointer = ocp.PyTreeCheckpointer()
+state = checkpointer.restore(checkpoint_path)
+params = state['params']
+print("✓ Model loaded successfully!")
+# ============================================================================
+# 文本生成函数
+# ============================================================================
+def generate_text(prompt: str, max_new_tokens: int = 50, temperature: float = 1.0) -> str:
+    """生成文本"""
+    input_ids = jnp.array([tokenizer.encode(prompt)], dtype=jnp.int32)
+    for _ in range(max_new_tokens):
+        if input_ids.shape[1] >= CONFIG["max_len"]:
+            input_ids = input_ids[:, -CONFIG["max_len"]:]
+        logits = model.apply({'params': params}, input_ids, training=False)
+        next_token_logits = logits[0, -1, :] / max(temperature, 0.1)
+        # 贪婪采样
+        next_token = jnp.argmax(next_token_logits)
+        input_ids = jnp.concatenate([input_ids, next_token[None, None]], axis=1)
+        if next_token == tokenizer.eot_token:
+            break
+    return tokenizer.decode(input_ids[0].tolist())
+# ============================================================================
+# Gradio 界面
+# ============================================================================
+def gradio_generate(prompt, max_tokens, temperature):
+    """Gradio 回调函数"""
+    if not prompt.strip():
+        return "请输入提示词..."
+    result = generate_text(prompt, int(max_tokens), float(temperature))
+    return result
+# 创建界面
+with gr.Blocks(title="Mini-GPT 文本生成", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("""
+    # 🤖 Mini-GPT 文本生成
+    使用 JAX/Flax 在 Kaggle TPU 上训练的小型 GPT 模型。
+    支持中英文输入。
+    """)
+    with gr.Row():
+        with gr.Column(scale=2):
+            prompt_input = gr.Textbox(
+                label="输入提示词",
+                placeholder="例如: 从前有一个...",
+                lines=3
+            )
+            with gr.Row():
+                max_tokens = gr.Slider(
+                    minimum=10,
+                    maximum=100,
+                    value=50,
+                    step=10,
+                    label="最大生成长度"
+                )
+                temperature = gr.Slider(
+                    minimum=0.1,
+                    maximum=2.0,
+                    value=1.0,
+                    step=0.1,
+                    label="温度 (越高越随机)"
+                )
+            generate_btn = gr.Button("🚀 生成", variant="primary")
+        with gr.Column(scale=2):
+            output = gr.Textbox(
+                label="生成结果",
+                lines=8,
+                interactive=False
+            )
+    # 示例
+    gr.Examples(
+        examples=[
+            ["这是", 50, 1.0],
+            ["Hello", 50, 1.0],
+            ["从前有一个", 80, 0.8],
+            ["The quick brown", 50, 1.0],
+        ],
+        inputs=[prompt_input, max_tokens, temperature],
+    )
+    generate_btn.click(
+        fn=gradio_generate,
+        inputs=[prompt_input, max_tokens, temperature],
+        outputs=output
+    )
+# 启动
 if __name__ == "__main__":
     demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio>=4.0.0
+jax[cpu]
+flax
+orbax-checkpoint
+transformers
+huggingface_hub