Spaces:
Sleeping
Sleeping
Commit ·
e9e19db
1
Parent(s): c430d50
updated
Browse files- Dockerfile +11 -41
- app.py +83 -83
- requirements.txt +3 -3
Dockerfile
CHANGED
|
@@ -1,56 +1,26 @@
|
|
| 1 |
-
# STAGE 1: The Builder (Heavy Lifting)
|
| 2 |
-
FROM python:3.10-slim AS builder
|
| 3 |
-
|
| 4 |
-
WORKDIR /build
|
| 5 |
-
|
| 6 |
-
# Install build dependencies
|
| 7 |
-
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 8 |
-
build-essential curl git rustc cargo \
|
| 9 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
-
|
| 11 |
-
# Build Karpathy's rustbpe
|
| 12 |
-
RUN git clone https://github.com/karpathy/rustbpe.git \
|
| 13 |
-
&& pip install --no-cache-dir maturin \
|
| 14 |
-
&& cd rustbpe && maturin build --release --out dist
|
| 15 |
-
|
| 16 |
-
# STAGE 2: The Final App (Slim & Fast)
|
| 17 |
FROM python:3.10-slim
|
| 18 |
|
| 19 |
WORKDIR /app
|
| 20 |
|
| 21 |
-
#
|
| 22 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
|
|
| 23 |
libstdc++6 \
|
| 24 |
&& rm -rf /var/lib/apt/lists/*
|
| 25 |
|
| 26 |
-
#
|
| 27 |
-
|
| 28 |
-
RUN pip install *.whl && rm *.whl
|
| 29 |
|
| 30 |
-
#
|
| 31 |
COPY requirements.txt .
|
| 32 |
-
RUN pip install --no-cache-dir -r requirements.txt
|
| 33 |
-
|
|
|
|
|
|
|
| 34 |
|
| 35 |
-
#
|
| 36 |
COPY . .
|
| 37 |
|
| 38 |
-
|
| 39 |
-
# Note: Hugging Face Spaces run as user "user" (UID 1000), not root.
|
| 40 |
-
# We create the cache for both potential users to be safe.
|
| 41 |
-
RUN mkdir -p /root/.cache/nanochat/tokenizer/ && \
|
| 42 |
-
cp tokenizer.pkl /root/.cache/nanochat/tokenizer/tokenizer.pkl && \
|
| 43 |
-
cp token_bytes.pt /root/.cache/nanochat/tokenizer/token_bytes.pt
|
| 44 |
-
|
| 45 |
-
# Set permissions for the HF "user"
|
| 46 |
-
RUN mkdir -p /.cache/nanochat/tokenizer/ && \
|
| 47 |
-
chmod -R 777 /.cache && \
|
| 48 |
-
cp tokenizer.pkl /.cache/nanochat/tokenizer/tokenizer.pkl && \
|
| 49 |
-
cp token_bytes.pt /.cache/nanochat/tokenizer/token_bytes.pt
|
| 50 |
-
|
| 51 |
-
# Clean up and finish
|
| 52 |
-
RUN pip cache purge
|
| 53 |
-
EXPOSE 7860
|
| 54 |
-
ENV GRADIO_SERVER_NAME="0.0.0.0"
|
| 55 |
|
| 56 |
CMD ["python", "app.py"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
FROM python:3.10-slim
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
+
# system deps
|
| 6 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 7 |
+
git \
|
| 8 |
libstdc++6 \
|
| 9 |
&& rm -rf /var/lib/apt/lists/*
|
| 10 |
|
| 11 |
+
# install torch cpu first
|
| 12 |
+
RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
|
|
|
|
| 13 |
|
| 14 |
+
# install python deps
|
| 15 |
COPY requirements.txt .
|
| 16 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 17 |
+
|
| 18 |
+
# install rustbpe directly
|
| 19 |
+
RUN pip install rustbpe
|
| 20 |
|
| 21 |
+
# copy repo
|
| 22 |
COPY . .
|
| 23 |
|
| 24 |
+
ENV GRADIO_SERVER_NAME=0.0.0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
CMD ["python", "app.py"]
|
app.py
CHANGED
|
@@ -1,93 +1,93 @@
|
|
| 1 |
-
import json
|
| 2 |
-
import pickle
|
| 3 |
-
import torch
|
| 4 |
import gradio as gr
|
| 5 |
-
|
| 6 |
from nanochat.gpt import GPT, GPTConfig
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
# -----------------------
|
| 11 |
-
|
| 12 |
-
# -----------------------
|
| 13 |
-
|
| 14 |
-
with open("tokenizer.pkl", "rb") as f:
|
| 15 |
-
tokenizer = pickle.load(f)
|
| 16 |
-
|
| 17 |
-
print("Tokenizer loaded")
|
| 18 |
|
| 19 |
-
# -----------------------
|
| 20 |
-
# Load model config
|
| 21 |
-
# -----------------------
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
with open("
|
| 24 |
meta = json.load(f)
|
| 25 |
|
| 26 |
-
config = GPTConfig(**meta)
|
| 27 |
-
|
| 28 |
-
# -----------------------
|
| 29 |
-
# Build model
|
| 30 |
-
# -----------------------
|
| 31 |
-
|
| 32 |
model = GPT(config)
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
state_dict = torch.load("model_000971.pt", map_location="cpu")
|
| 37 |
-
state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
|
| 38 |
-
|
| 39 |
-
model.load_state_dict(state_dict, strict=False)
|
| 40 |
model.eval()
|
| 41 |
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
)
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
return
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
#
|
| 79 |
-
#
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
"
|
| 90 |
-
|
| 91 |
-
)
|
| 92 |
-
|
| 93 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import torch
|
| 3 |
from nanochat.gpt import GPT, GPTConfig
|
| 4 |
+
from nanochat.tokenizer import get_tokenizer
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
|
| 8 |
+
# --------------------------
|
| 9 |
+
# 1) Load tokenizer
|
| 10 |
+
# --------------------------
|
| 11 |
+
tokenizer = get_tokenizer()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
# --------------------------
|
| 14 |
+
# 2) Load model config & weights
|
| 15 |
+
# --------------------------
|
| 16 |
+
meta_path = "meta_000971.json"
|
| 17 |
+
model_path = "model_000971.pt"
|
| 18 |
|
| 19 |
+
with open(meta_path, "r") as f:
|
| 20 |
meta = json.load(f)
|
| 21 |
|
| 22 |
+
config = GPTConfig(**meta["model_config"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
model = GPT(config)
|
| 24 |
+
checkpoint = torch.load(model_path, map_location="cpu")
|
| 25 |
+
model.load_state_dict(checkpoint)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
model.eval()
|
| 27 |
|
| 28 |
+
# Optional: Torch compile for CPU optimization
|
| 29 |
+
try:
|
| 30 |
+
model = torch.compile(model)
|
| 31 |
+
except Exception as e:
|
| 32 |
+
print(f"Torch compile skipped: {e}")
|
| 33 |
+
|
| 34 |
+
# --------------------------
|
| 35 |
+
# 3) Helper functions
|
| 36 |
+
# --------------------------
|
| 37 |
+
def chat_with_model(conversation_history, user_input, max_tokens=128, temperature=0.8, top_k=40):
|
| 38 |
+
"""
|
| 39 |
+
conversation_history: list of {"role": "user"/"assistant", "content": str}
|
| 40 |
+
user_input: str
|
| 41 |
+
Returns updated conversation and assistant's response
|
| 42 |
+
"""
|
| 43 |
+
# Append user's message
|
| 44 |
+
conversation_history.append({"role": "user", "content": user_input})
|
| 45 |
+
|
| 46 |
+
# Render tokens for completion
|
| 47 |
+
conv_for_gen = {"messages": conversation_history + [{"role": "assistant", "content": ""}]}
|
| 48 |
+
input_ids = tokenizer.render_for_completion(conv_for_gen)
|
| 49 |
+
|
| 50 |
+
# Generate tokens
|
| 51 |
+
output_ids = []
|
| 52 |
+
for token_id in model.generate(input_ids, max_tokens=max_tokens, temperature=temperature, top_k=top_k):
|
| 53 |
+
output_ids.append(token_id)
|
| 54 |
+
|
| 55 |
+
# Decode assistant's response
|
| 56 |
+
assistant_response = tokenizer.decode(output_ids)
|
| 57 |
+
|
| 58 |
+
# Append assistant's message
|
| 59 |
+
conversation_history.append({"role": "assistant", "content": assistant_response})
|
| 60 |
+
|
| 61 |
+
return conversation_history, assistant_response
|
| 62 |
+
|
| 63 |
+
# --------------------------
|
| 64 |
+
# 4) Gradio UI
|
| 65 |
+
# --------------------------
|
| 66 |
+
with gr.Blocks() as demo:
|
| 67 |
+
gr.Markdown("## NanoChat ClimbMix D12 🐍\nCPU-friendly GPT chat")
|
| 68 |
+
|
| 69 |
+
chatbot = gr.Chatbot()
|
| 70 |
+
msg = gr.Textbox(label="Your message")
|
| 71 |
+
clear = gr.Button("Clear")
|
| 72 |
+
|
| 73 |
+
def user_send(message, history):
|
| 74 |
+
history = history or []
|
| 75 |
+
conversation_history = [{"role": "user" if i % 2 == 0 else "assistant", "content": m} for i, m in enumerate(sum(history, ()))]
|
| 76 |
+
|
| 77 |
+
history, assistant_response = chat_with_model(conversation_history, message)
|
| 78 |
+
# Convert to Gradio-friendly format: list of (user, assistant) tuples
|
| 79 |
+
gr_history = []
|
| 80 |
+
for i in range(0, len(history), 2):
|
| 81 |
+
user_msg = history[i]["content"]
|
| 82 |
+
assistant_msg = history[i + 1]["content"] if i + 1 < len(history) else ""
|
| 83 |
+
gr_history.append((user_msg, assistant_msg))
|
| 84 |
+
return gr_history, ""
|
| 85 |
+
|
| 86 |
+
msg.submit(user_send, [msg, chatbot], [chatbot, msg])
|
| 87 |
+
clear.click(lambda: None, None, chatbot, queue=False)
|
| 88 |
+
|
| 89 |
+
# --------------------------
|
| 90 |
+
# 5) Launch
|
| 91 |
+
# --------------------------
|
| 92 |
+
if __name__ == "__main__":
|
| 93 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
requirements.txt
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
-
gradio
|
| 2 |
tokenizers
|
| 3 |
tiktoken
|
| 4 |
numpy
|
| 5 |
-
|
| 6 |
-
|
|
|
|
| 1 |
+
gradio>=4.0
|
| 2 |
tokenizers
|
| 3 |
tiktoken
|
| 4 |
numpy
|
| 5 |
+
fsspec
|
| 6 |
+
rustbpe
|