Spaces:

Really-Amazing
/

SimpleAI-259M

Sleeping

App Files Files Community

suraj-self commited on Mar 15

Commit

e9e19db

1 Parent(s): c430d50

updated

Browse files

Files changed (3) hide show

Dockerfile +11 -41
app.py +83 -83
requirements.txt +3 -3

Dockerfile CHANGED Viewed

@@ -1,56 +1,26 @@
-# STAGE 1: The Builder (Heavy Lifting)
-FROM python:3.10-slim AS builder
-WORKDIR /build
-# Install build dependencies
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    build-essential curl git rustc cargo \
-    && rm -rf /var/lib/apt/lists/*
-# Build Karpathy's rustbpe
-RUN git clone https://github.com/karpathy/rustbpe.git \
-    && pip install --no-cache-dir maturin \
-    && cd rustbpe && maturin build --release --out dist
-# STAGE 2: The Final App (Slim & Fast)
 FROM python:3.10-slim
 WORKDIR /app
-# Install only runtime dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
     libstdc++6 \
     && rm -rf /var/lib/apt/lists/*
-# Copy the wheel from the builder and install it
-COPY --from=builder /build/rustbpe/dist/*.whl .
-RUN pip install *.whl && rm *.whl
-# Install requirements (Optimized for CPU/Space)
 COPY requirements.txt .
-RUN pip install --no-cache-dir -r requirements.txt \
-    && pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
-# Copy project files
 COPY . .
-# --- THE PRO FIX (Correcting the paths) ---
-# Note: Hugging Face Spaces run as user "user" (UID 1000), not root.
-# We create the cache for both potential users to be safe.
-RUN mkdir -p /root/.cache/nanochat/tokenizer/ && \
-    cp tokenizer.pkl /root/.cache/nanochat/tokenizer/tokenizer.pkl && \
-    cp token_bytes.pt /root/.cache/nanochat/tokenizer/token_bytes.pt
-# Set permissions for the HF "user"
-RUN mkdir -p /.cache/nanochat/tokenizer/ && \
-    chmod -R 777 /.cache && \
-    cp tokenizer.pkl /.cache/nanochat/tokenizer/tokenizer.pkl && \
-    cp token_bytes.pt /.cache/nanochat/tokenizer/token_bytes.pt
-# Clean up and finish
-RUN pip cache purge
-EXPOSE 7860
-ENV GRADIO_SERVER_NAME="0.0.0.0"
 CMD ["python", "app.py"]

 FROM python:3.10-slim
 WORKDIR /app
+# system deps
 RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
     libstdc++6 \
     && rm -rf /var/lib/apt/lists/*
+# install torch cpu first
+RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
+# install python deps
 COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# install rustbpe directly
+RUN pip install rustbpe
+# copy repo
 COPY . .
+ENV GRADIO_SERVER_NAME=0.0.0.0
 CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -1,93 +1,93 @@
-import json
-import pickle
-import torch
 import gradio as gr
 from nanochat.gpt import GPT, GPTConfig
-print("🚀 Loading NanoChat...")
-# -----------------------
-# Load tokenizer
-# -----------------------
-with open("tokenizer.pkl", "rb") as f:
-    tokenizer = pickle.load(f)
-print("Tokenizer loaded")
-# -----------------------
-# Load model config
-# -----------------------
-with open("meta_000971.json") as f:
     meta = json.load(f)
-config = GPTConfig(**meta)
-# -----------------------
-# Build model
-# -----------------------
 model = GPT(config)
-print("Loading weights...")
-state_dict = torch.load("model_000971.pt", map_location="cpu")
-state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
-model.load_state_dict(state_dict, strict=False)
 model.eval()
-print("✅ NanoChat ready")
-# -----------------------
-# Chat function
-# -----------------------
-def generate_reply(message, history):
-    tokens = [tokenizer.bos_token_id]
-    for user, assistant in history:
-        tokens += [tokenizer.user_start_id] + tokenizer.encode(user) + [tokenizer.user_end_id]
-        tokens += [tokenizer.assistant_start_id] + tokenizer.encode(assistant) + [tokenizer.assistant_end_id]
-    tokens += [tokenizer.user_start_id] + tokenizer.encode(message) + [tokenizer.user_end_id]
-    tokens.append(tokenizer.assistant_start_id)
-    input_ids = torch.tensor([tokens])
-    with torch.no_grad():
-        output = model.generate(
-            input_ids,
-            max_tokens=256,
-            temperature=0.8,
-            top_k=40
-        )
-    new_tokens = output[0][input_ids.shape[1]:]
-    text = tokenizer.decode(new_tokens.tolist())
-    for tag in ["<|assistant_end|>", "<|end|>"]:
-        text = text.split(tag)[0]
-    return text.strip()
-# -----------------------
-# UI
-# -----------------------
-demo = gr.ChatInterface(
-    fn=generate_reply,
-    title="🧸 NanoChat ClimbMix D12",
-    description="Small locally-trained NanoChat model running on HuggingFace Spaces",
-    examples=[
-        "Hi!",
-        "Explain UPI",
-        "Tell me a joke"
-    ],
-)
-demo.launch(server_name="0.0.0.0", server_port=7860)

 import gradio as gr
+import torch
 from nanochat.gpt import GPT, GPTConfig
+from nanochat.tokenizer import get_tokenizer
+import json
+import os
+# --------------------------
+# 1) Load tokenizer
+# --------------------------
+tokenizer = get_tokenizer()
+# --------------------------
+# 2) Load model config & weights
+# --------------------------
+meta_path = "meta_000971.json"
+model_path = "model_000971.pt"
+with open(meta_path, "r") as f:
     meta = json.load(f)
+config = GPTConfig(**meta["model_config"])
 model = GPT(config)
+checkpoint = torch.load(model_path, map_location="cpu")
+model.load_state_dict(checkpoint)
 model.eval()
+# Optional: Torch compile for CPU optimization
+try:
+    model = torch.compile(model)
+except Exception as e:
+    print(f"Torch compile skipped: {e}")
+# --------------------------
+# 3) Helper functions
+# --------------------------
+def chat_with_model(conversation_history, user_input, max_tokens=128, temperature=0.8, top_k=40):
+    """
+    conversation_history: list of {"role": "user"/"assistant", "content": str}
+    user_input: str
+    Returns updated conversation and assistant's response
+    """
+    # Append user's message
+    conversation_history.append({"role": "user", "content": user_input})
+    # Render tokens for completion
+    conv_for_gen = {"messages": conversation_history + [{"role": "assistant", "content": ""}]}
+    input_ids = tokenizer.render_for_completion(conv_for_gen)
+    # Generate tokens
+    output_ids = []
+    for token_id in model.generate(input_ids, max_tokens=max_tokens, temperature=temperature, top_k=top_k):
+        output_ids.append(token_id)
+    # Decode assistant's response
+    assistant_response = tokenizer.decode(output_ids)
+    # Append assistant's message
+    conversation_history.append({"role": "assistant", "content": assistant_response})
+    return conversation_history, assistant_response
+# --------------------------
+# 4) Gradio UI
+# --------------------------
+with gr.Blocks() as demo:
+    gr.Markdown("## NanoChat ClimbMix D12 🐍\nCPU-friendly GPT chat")
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox(label="Your message")
+    clear = gr.Button("Clear")
+    def user_send(message, history):
+        history = history or []
+        conversation_history = [{"role": "user" if i % 2 == 0 else "assistant", "content": m} for i, m in enumerate(sum(history, ()))]
+        history, assistant_response = chat_with_model(conversation_history, message)
+        # Convert to Gradio-friendly format: list of (user, assistant) tuples
+        gr_history = []
+        for i in range(0, len(history), 2):
+            user_msg = history[i]["content"]
+            assistant_msg = history[i + 1]["content"] if i + 1 < len(history) else ""
+            gr_history.append((user_msg, assistant_msg))
+        return gr_history, ""
+    msg.submit(user_send, [msg, chatbot], [chatbot, msg])
+    clear.click(lambda: None, None, chatbot, queue=False)
+# --------------------------
+# 5) Launch
+# --------------------------
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
-gradio
 tokenizers
 tiktoken
 numpy
-torch
-fsspec

+gradio>=4.0
 tokenizers
 tiktoken
 numpy
+fsspec
+rustbpe