suraj-self commited on
Commit
e9e19db
·
1 Parent(s): c430d50
Files changed (3) hide show
  1. Dockerfile +11 -41
  2. app.py +83 -83
  3. requirements.txt +3 -3
Dockerfile CHANGED
@@ -1,56 +1,26 @@
1
- # STAGE 1: The Builder (Heavy Lifting)
2
- FROM python:3.10-slim AS builder
3
-
4
- WORKDIR /build
5
-
6
- # Install build dependencies
7
- RUN apt-get update && apt-get install -y --no-install-recommends \
8
- build-essential curl git rustc cargo \
9
- && rm -rf /var/lib/apt/lists/*
10
-
11
- # Build Karpathy's rustbpe
12
- RUN git clone https://github.com/karpathy/rustbpe.git \
13
- && pip install --no-cache-dir maturin \
14
- && cd rustbpe && maturin build --release --out dist
15
-
16
- # STAGE 2: The Final App (Slim & Fast)
17
  FROM python:3.10-slim
18
 
19
  WORKDIR /app
20
 
21
- # Install only runtime dependencies
22
  RUN apt-get update && apt-get install -y --no-install-recommends \
 
23
  libstdc++6 \
24
  && rm -rf /var/lib/apt/lists/*
25
 
26
- # Copy the wheel from the builder and install it
27
- COPY --from=builder /build/rustbpe/dist/*.whl .
28
- RUN pip install *.whl && rm *.whl
29
 
30
- # Install requirements (Optimized for CPU/Space)
31
  COPY requirements.txt .
32
- RUN pip install --no-cache-dir -r requirements.txt \
33
- && pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
 
 
34
 
35
- # Copy project files
36
  COPY . .
37
 
38
- # --- THE PRO FIX (Correcting the paths) ---
39
- # Note: Hugging Face Spaces run as user "user" (UID 1000), not root.
40
- # We create the cache for both potential users to be safe.
41
- RUN mkdir -p /root/.cache/nanochat/tokenizer/ && \
42
- cp tokenizer.pkl /root/.cache/nanochat/tokenizer/tokenizer.pkl && \
43
- cp token_bytes.pt /root/.cache/nanochat/tokenizer/token_bytes.pt
44
-
45
- # Set permissions for the HF "user"
46
- RUN mkdir -p /.cache/nanochat/tokenizer/ && \
47
- chmod -R 777 /.cache && \
48
- cp tokenizer.pkl /.cache/nanochat/tokenizer/tokenizer.pkl && \
49
- cp token_bytes.pt /.cache/nanochat/tokenizer/token_bytes.pt
50
-
51
- # Clean up and finish
52
- RUN pip cache purge
53
- EXPOSE 7860
54
- ENV GRADIO_SERVER_NAME="0.0.0.0"
55
 
56
  CMD ["python", "app.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  FROM python:3.10-slim
2
 
3
  WORKDIR /app
4
 
5
+ # system deps
6
  RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ git \
8
  libstdc++6 \
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
+ # install torch cpu first
12
+ RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu
 
13
 
14
+ # install python deps
15
  COPY requirements.txt .
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # install rustbpe directly
19
+ RUN pip install rustbpe
20
 
21
+ # copy repo
22
  COPY . .
23
 
24
+ ENV GRADIO_SERVER_NAME=0.0.0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  CMD ["python", "app.py"]
app.py CHANGED
@@ -1,93 +1,93 @@
1
- import json
2
- import pickle
3
- import torch
4
  import gradio as gr
5
-
6
  from nanochat.gpt import GPT, GPTConfig
 
 
 
7
 
8
- print("🚀 Loading NanoChat...")
9
-
10
- # -----------------------
11
- # Load tokenizer
12
- # -----------------------
13
-
14
- with open("tokenizer.pkl", "rb") as f:
15
- tokenizer = pickle.load(f)
16
-
17
- print("Tokenizer loaded")
18
 
19
- # -----------------------
20
- # Load model config
21
- # -----------------------
 
 
22
 
23
- with open("meta_000971.json") as f:
24
  meta = json.load(f)
25
 
26
- config = GPTConfig(**meta)
27
-
28
- # -----------------------
29
- # Build model
30
- # -----------------------
31
-
32
  model = GPT(config)
33
-
34
- print("Loading weights...")
35
-
36
- state_dict = torch.load("model_000971.pt", map_location="cpu")
37
- state_dict = {k.replace("_orig_mod.", ""): v for k, v in state_dict.items()}
38
-
39
- model.load_state_dict(state_dict, strict=False)
40
  model.eval()
41
 
42
- print("✅ NanoChat ready")
43
-
44
- # -----------------------
45
- # Chat function
46
- # -----------------------
47
-
48
- def generate_reply(message, history):
49
-
50
- tokens = [tokenizer.bos_token_id]
51
-
52
- for user, assistant in history:
53
- tokens += [tokenizer.user_start_id] + tokenizer.encode(user) + [tokenizer.user_end_id]
54
- tokens += [tokenizer.assistant_start_id] + tokenizer.encode(assistant) + [tokenizer.assistant_end_id]
55
-
56
- tokens += [tokenizer.user_start_id] + tokenizer.encode(message) + [tokenizer.user_end_id]
57
- tokens.append(tokenizer.assistant_start_id)
58
-
59
- input_ids = torch.tensor([tokens])
60
-
61
- with torch.no_grad():
62
- output = model.generate(
63
- input_ids,
64
- max_tokens=256,
65
- temperature=0.8,
66
- top_k=40
67
- )
68
-
69
- new_tokens = output[0][input_ids.shape[1]:]
70
- text = tokenizer.decode(new_tokens.tolist())
71
-
72
- for tag in ["<|assistant_end|>", "<|end|>"]:
73
- text = text.split(tag)[0]
74
-
75
- return text.strip()
76
-
77
-
78
- # -----------------------
79
- # UI
80
- # -----------------------
81
-
82
- demo = gr.ChatInterface(
83
- fn=generate_reply,
84
- title="🧸 NanoChat ClimbMix D12",
85
- description="Small locally-trained NanoChat model running on HuggingFace Spaces",
86
- examples=[
87
- "Hi!",
88
- "Explain UPI",
89
- "Tell me a joke"
90
- ],
91
- )
92
-
93
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
  from nanochat.gpt import GPT, GPTConfig
4
+ from nanochat.tokenizer import get_tokenizer
5
+ import json
6
+ import os
7
 
8
+ # --------------------------
9
+ # 1) Load tokenizer
10
+ # --------------------------
11
+ tokenizer = get_tokenizer()
 
 
 
 
 
 
12
 
13
+ # --------------------------
14
+ # 2) Load model config & weights
15
+ # --------------------------
16
+ meta_path = "meta_000971.json"
17
+ model_path = "model_000971.pt"
18
 
19
+ with open(meta_path, "r") as f:
20
  meta = json.load(f)
21
 
22
+ config = GPTConfig(**meta["model_config"])
 
 
 
 
 
23
  model = GPT(config)
24
+ checkpoint = torch.load(model_path, map_location="cpu")
25
+ model.load_state_dict(checkpoint)
 
 
 
 
 
26
  model.eval()
27
 
28
+ # Optional: Torch compile for CPU optimization
29
+ try:
30
+ model = torch.compile(model)
31
+ except Exception as e:
32
+ print(f"Torch compile skipped: {e}")
33
+
34
+ # --------------------------
35
+ # 3) Helper functions
36
+ # --------------------------
37
+ def chat_with_model(conversation_history, user_input, max_tokens=128, temperature=0.8, top_k=40):
38
+ """
39
+ conversation_history: list of {"role": "user"/"assistant", "content": str}
40
+ user_input: str
41
+ Returns updated conversation and assistant's response
42
+ """
43
+ # Append user's message
44
+ conversation_history.append({"role": "user", "content": user_input})
45
+
46
+ # Render tokens for completion
47
+ conv_for_gen = {"messages": conversation_history + [{"role": "assistant", "content": ""}]}
48
+ input_ids = tokenizer.render_for_completion(conv_for_gen)
49
+
50
+ # Generate tokens
51
+ output_ids = []
52
+ for token_id in model.generate(input_ids, max_tokens=max_tokens, temperature=temperature, top_k=top_k):
53
+ output_ids.append(token_id)
54
+
55
+ # Decode assistant's response
56
+ assistant_response = tokenizer.decode(output_ids)
57
+
58
+ # Append assistant's message
59
+ conversation_history.append({"role": "assistant", "content": assistant_response})
60
+
61
+ return conversation_history, assistant_response
62
+
63
+ # --------------------------
64
+ # 4) Gradio UI
65
+ # --------------------------
66
+ with gr.Blocks() as demo:
67
+ gr.Markdown("## NanoChat ClimbMix D12 🐍\nCPU-friendly GPT chat")
68
+
69
+ chatbot = gr.Chatbot()
70
+ msg = gr.Textbox(label="Your message")
71
+ clear = gr.Button("Clear")
72
+
73
+ def user_send(message, history):
74
+ history = history or []
75
+ conversation_history = [{"role": "user" if i % 2 == 0 else "assistant", "content": m} for i, m in enumerate(sum(history, ()))]
76
+
77
+ history, assistant_response = chat_with_model(conversation_history, message)
78
+ # Convert to Gradio-friendly format: list of (user, assistant) tuples
79
+ gr_history = []
80
+ for i in range(0, len(history), 2):
81
+ user_msg = history[i]["content"]
82
+ assistant_msg = history[i + 1]["content"] if i + 1 < len(history) else ""
83
+ gr_history.append((user_msg, assistant_msg))
84
+ return gr_history, ""
85
+
86
+ msg.submit(user_send, [msg, chatbot], [chatbot, msg])
87
+ clear.click(lambda: None, None, chatbot, queue=False)
88
+
89
+ # --------------------------
90
+ # 5) Launch
91
+ # --------------------------
92
+ if __name__ == "__main__":
93
+ demo.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
- gradio
2
  tokenizers
3
  tiktoken
4
  numpy
5
- torch
6
- fsspec
 
1
+ gradio>=4.0
2
  tokenizers
3
  tiktoken
4
  numpy
5
+ fsspec
6
+ rustbpe