Spaces:

Really-Amazing
/

SimpleAI-259M

Sleeping

App Files Files Community

suraj-self commited on Mar 15

Commit

f62dc29

1 Parent(s): e9e19db

updated

Browse files

Files changed (3) hide show

Dockerfile +19 -12
app.py +82 -85
requirements.txt +3 -3

Dockerfile CHANGED Viewed

@@ -2,25 +2,32 @@ FROM python:3.10-slim
 WORKDIR /app
-# system deps
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    git \
-    libstdc++6 \
     && rm -rf /var/lib/apt/lists/*
-# install torch cpu first
-RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
-# install python deps
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
-# install rustbpe directly
-RUN pip install rustbpe
-# copy repo
 COPY . .
-ENV GRADIO_SERVER_NAME=0.0.0.0
 CMD ["python", "app.py"]

 WORKDIR /app
+# Install build tools for Rust-based components
 RUN apt-get update && apt-get install -y --no-install-recommends \
+    build-essential curl git rustc cargo \
     && rm -rf /var/lib/apt/lists/*
+# Install python dependencies
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
+# Copy all files from your repo root to /app
 COPY . .
+# --- THE CRITICAL FIX ---
+# Nanochat looks for these in a specific hidden path.
+# We create that path and copy your uploaded files there.
+RUN mkdir -p /root/.cache/nanochat/tokenizer/ && \
+    cp tokenizer.pkl /root/.cache/nanochat/tokenizer/tokenizer.pkl && \
+    cp token_bytes.pt /root/.cache/nanochat/tokenizer/token_bytes.pt
+# Ensure the Hugging Face 'user' (UID 1000) can also see them
+RUN mkdir -p /.cache/nanochat/tokenizer/ && \
+    cp tokenizer.pkl /.cache/nanochat/tokenizer/tokenizer.pkl && \
+    cp token_bytes.pt /.cache/nanochat/tokenizer/token_bytes.pt && \
+    chmod -R 777 /.cache
+EXPOSE 7860
+ENV GRADIO_SERVER_NAME="0.0.0.0"
 CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -1,93 +1,90 @@
-import gradio as gr
 import torch
 from nanochat.gpt import GPT, GPTConfig
-from nanochat.tokenizer import get_tokenizer
-import json
-import os
-# --------------------------
-# 1) Load tokenizer
-# --------------------------
-tokenizer = get_tokenizer()
-# --------------------------
-# 2) Load model config & weights
-# --------------------------
-meta_path = "meta_000971.json"
-model_path = "model_000971.pt"
-with open(meta_path, "r") as f:
-    meta = json.load(f)
-config = GPTConfig(**meta["model_config"])
 model = GPT(config)
-checkpoint = torch.load(model_path, map_location="cpu")
-model.load_state_dict(checkpoint)
-model.eval()
-# Optional: Torch compile for CPU optimization
-try:
-    model = torch.compile(model)
-except Exception as e:
-    print(f"Torch compile skipped: {e}")
-# --------------------------
-# 3) Helper functions
-# --------------------------
-def chat_with_model(conversation_history, user_input, max_tokens=128, temperature=0.8, top_k=40):
-    """
-    conversation_history: list of {"role": "user"/"assistant", "content": str}
-    user_input: str
-    Returns updated conversation and assistant's response
-    """
-    # Append user's message
-    conversation_history.append({"role": "user", "content": user_input})
-    # Render tokens for completion
-    conv_for_gen = {"messages": conversation_history + [{"role": "assistant", "content": ""}]}
-    input_ids = tokenizer.render_for_completion(conv_for_gen)
-    # Generate tokens
-    output_ids = []
-    for token_id in model.generate(input_ids, max_tokens=max_tokens, temperature=temperature, top_k=top_k):
-        output_ids.append(token_id)
-    # Decode assistant's response
-    assistant_response = tokenizer.decode(output_ids)
-    # Append assistant's message
-    conversation_history.append({"role": "assistant", "content": assistant_response})
-    return conversation_history, assistant_response
-# --------------------------
-# 4) Gradio UI
-# --------------------------
-with gr.Blocks() as demo:
-    gr.Markdown("## NanoChat ClimbMix D12 🐍\nCPU-friendly GPT chat")
-    chatbot = gr.Chatbot()
-    msg = gr.Textbox(label="Your message")
-    clear = gr.Button("Clear")
-    def user_send(message, history):
-        history = history or []
-        conversation_history = [{"role": "user" if i % 2 == 0 else "assistant", "content": m} for i, m in enumerate(sum(history, ()))]
-        history, assistant_response = chat_with_model(conversation_history, message)
-        # Convert to Gradio-friendly format: list of (user, assistant) tuples
-        gr_history = []
-        for i in range(0, len(history), 2):
-            user_msg = history[i]["content"]
-            assistant_msg = history[i + 1]["content"] if i + 1 < len(history) else ""
-            gr_history.append((user_msg, assistant_msg))
-        return gr_history, ""
-    msg.submit(user_send, [msg, chatbot], [chatbot, msg])
-    clear.click(lambda: None, None, chatbot, queue=False)
-# --------------------------
-# 5) Launch
-# --------------------------
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

+import os
 import torch
+import gradio as gr
 from nanochat.gpt import GPT, GPTConfig
+from nanochat.tokenizer import RustBPETokenizer
+# Logic to find the tokenizer files
+# 1. Check local root, 2. Check the hidden cache
+local_path = "."
+cache_path = os.path.expanduser("~/.cache/nanochat/tokenizer/")
+TOKENIZER_DIR = local_path if os.path.exists(os.path.join(local_path, "token_bytes.pt")) else cache_path
+print(f"--- System Initialization ---")
+print(f"Loading tokenizer from: {os.path.abspath(TOKENIZER_DIR)}")
+# Load Tokenizer
+tokenizer = RustBPETokenizer.from_directory(TOKENIZER_DIR)
+# Map IDs (Ensure these strings match your training config)
+tokenizer.bos_token_id = tokenizer.enc.encode_single_token("<|bos|>")
+tokenizer.user_start_id = tokenizer.enc.encode_single_token("<|user_start|>")
+tokenizer.user_end_id = tokenizer.enc.encode_single_token("<|user_end|>")
+tokenizer.assistant_start_id = tokenizer.enc.encode_single_token("<|assistant_start|>")
+tokenizer.assistant_end_id = tokenizer.enc.encode_single_token("<|assistant_end|>")
+# Model Architecture (D12 ClimbMix)
+config = GPTConfig(
+    vocab_size=32768,
+    n_layer=12,
+    n_head=6,
+    n_embd=768,
+    sequence_len=2048
+)
 model = GPT(config)
+print("Loading model weights...")
+state_dict = torch.load("model_000971.pt", map_location="cpu")
+# Clean the '_orig_mod' prefix from compiled training
+state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
+model.load_state_dict(state_dict, strict=False)
+model.eval()
+print("Toddler is online!")
+def predict(message, history):
+    # Prepare the sequence with Chat ML tags
+    tokens = [tokenizer.bos_token_id]
+    for human, assistant in history:
+        tokens.extend([tokenizer.user_start_id] + tokenizer.encode(human) + [tokenizer.user_end_id])
+        if assistant:
+            tokens.extend([tokenizer.assistant_start_id] + tokenizer.encode(assistant) + [tokenizer.assistant_end_id])
+    tokens.extend([tokenizer.user_start_id] + tokenizer.encode(message) + [tokenizer.user_end_id])
+    tokens.append(tokenizer.assistant_start_id)
+    input_ids = torch.tensor([tokens], dtype=torch.long)
+    with torch.no_grad():
+        # Using the standard generate call
+        output = model.generate(input_ids, max_tokens=512, temperature=0.8)
+        # Determine if output is streaming or static tensor
+        if isinstance(output, torch.Tensor):
+            # Static: Slice new tokens and decode
+            new_tokens = output[0][input_ids.shape[1]:]
+            response = tokenizer.decode(new_tokens.tolist())
+            # Clean up trailing tags
+            for tag in ["<|assistant_end|>", "<|end|>", "<|user_start|>"]:
+                response = response.split(tag)[0]
+            yield response.strip()
+        else:
+            # Streaming: Iterate through generator
+            generated_text = ""
+            for token in output:
+                token_id = token if isinstance(token, int) else token.item()
+                char = tokenizer.decode([token_id])
+                if "<|assistant_end|>" in char:
+                    break
+                generated_text += char
+                yield generated_text.strip()
+# Launching with Gradio 6.0 compatible settings
+demo = gr.ChatInterface(
+    fn=predict,
+    title="🧸 NanoChat-D12",
+    description="Optimized for CPU inference."
+)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
-gradio>=4.0
-tokenizers
-tiktoken
 numpy
 fsspec
 rustbpe

+torch --index-url https://download.pytorch.org/whl/cpu
+gradio
 numpy
+tiktoken
 fsspec
 rustbpe