Raiff1982 commited on
Commit
2be316c
·
verified ·
1 Parent(s): d560682

Upload 5 files

Browse files
Files changed (5) hide show
  1. Dockerfile +16 -0
  2. README.md +27 -6
  3. app.py +146 -0
  4. index.html +370 -0
  5. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system deps
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ git curl && rm -rf /var/lib/apt/lists/*
8
+
9
+ COPY requirements.txt .
10
+ RUN pip install --no-cache-dir -r requirements.txt
11
+
12
+ COPY . .
13
+
14
+ EXPOSE 7860
15
+
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,12 +1,33 @@
1
  ---
2
- title: Codette Ai
3
- emoji: 😻
4
  colorFrom: gray
5
- colorTo: purple
6
  sdk: docker
 
7
  pinned: false
8
- license: mit
9
- short_description: ' Codette is not a chatbot. She is not a feature. She is AI'
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Codette AI Music Assistant
3
+ emoji: 🎵
4
  colorFrom: gray
5
+ colorTo: gray
6
  sdk: docker
7
+ app_port: 7860
8
  pinned: false
 
 
9
  ---
10
 
11
+ # Codette AI Music Production Assistant
12
+
13
+ A LoRA fine-tuned **Llama 3.2-1B** model specialising in music theory,
14
+ mixing, arrangement, and production guidance.
15
+
16
+ ## API
17
+
18
+ The Space exposes an Ollama-compatible streaming endpoint:
19
+
20
+ ```
21
+ POST /api/chat
22
+ Content-Type: application/json
23
+
24
+ {
25
+ "messages": [
26
+ {"role": "system", "content": "..."},
27
+ {"role": "user", "content": "What key is this chord progression in?"}
28
+ ],
29
+ "stream": true
30
+ }
31
+ ```
32
+
33
+ Responses stream as newline-delimited JSON, identical to Ollama's `/api/chat`.
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Codette AI Space — FastAPI + streaming chat API
3
+ Compatible with the Ollama /api/chat streaming format so the HTML widget
4
+ needs only a URL change to work.
5
+
6
+ Adapter files should live in ./adapter/ inside this Space repo.
7
+ Base model: meta-llama/Llama-3.2-1B
8
+ """
9
+
10
+ import json
11
+ import asyncio
12
+ import threading
13
+ from pathlib import Path
14
+ from typing import Iterator
15
+
16
+ import torch
17
+ from fastapi import FastAPI, Request
18
+ from fastapi.middleware.cors import CORSMiddleware
19
+ from fastapi.responses import StreamingResponse, HTMLResponse
20
+ from peft import PeftModel
21
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
22
+
23
+ # ── Config ───────────────────────────────────────────────────────────────────
24
+ BASE_MODEL = "meta-llama/Llama-3.2-1B"
25
+ ADAPTER_PATH = Path(__file__).parent / "adapter"
26
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
27
+
28
+ # ── App ───────────────────────────────────────────────────────────────────────
29
+ app = FastAPI(title="Codette AI")
30
+
31
+ app.add_middleware(
32
+ CORSMiddleware,
33
+ allow_origins=["*"], # Squarespace domain — keep open so the widget works
34
+ allow_methods=["*"],
35
+ allow_headers=["*"],
36
+ )
37
+
38
+ # ── Model (loaded once at startup) ────────────────────────────────────────────
39
+ print(f"Loading tokenizer from {ADAPTER_PATH} …")
40
+ tokenizer = AutoTokenizer.from_pretrained(str(ADAPTER_PATH))
41
+ if tokenizer.pad_token is None:
42
+ tokenizer.pad_token = tokenizer.eos_token
43
+
44
+ print(f"Loading base model {BASE_MODEL} …")
45
+ base = AutoModelForCausalLM.from_pretrained(
46
+ BASE_MODEL,
47
+ torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
48
+ device_map="auto",
49
+ low_cpu_mem_usage=True,
50
+ )
51
+
52
+ print(f"Loading LoRA adapter from {ADAPTER_PATH} …")
53
+ model = PeftModel.from_pretrained(base, str(ADAPTER_PATH))
54
+
55
+ print("Merging LoRA weights into base model …")
56
+ model = model.merge_and_unload() # ← this is the actual merge step
57
+ model.eval()
58
+ print(f"✅ Model ready on {DEVICE}")
59
+
60
+
61
+ # ── Helpers ───────────────────────────────────────────────────────────────────
62
+ def build_prompt(messages: list[dict]) -> str:
63
+ """Convert OpenAI-style messages to a simple Llama-3.2 instruct prompt."""
64
+ parts = []
65
+ for m in messages:
66
+ role = m.get("role", "user")
67
+ content = m.get("content", "")
68
+ if role == "system":
69
+ parts.append(f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{content}<|eot_id|>")
70
+ elif role == "user":
71
+ parts.append(f"<|start_header_id|>user<|end_header_id|>\n{content}<|eot_id|>")
72
+ elif role == "assistant":
73
+ parts.append(f"<|start_header_id|>assistant<|end_header_id|>\n{content}<|eot_id|>")
74
+ parts.append("<|start_header_id|>assistant<|end_header_id|>\n")
75
+ return "".join(parts)
76
+
77
+
78
+ def stream_tokens(messages: list[dict], max_new_tokens: int = 512) -> Iterator[str]:
79
+ prompt = build_prompt(messages)
80
+ inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
81
+
82
+ streamer = TextIteratorStreamer(
83
+ tokenizer, skip_prompt=True, skip_special_tokens=True
84
+ )
85
+
86
+ gen_kwargs = dict(
87
+ **inputs,
88
+ max_new_tokens=max_new_tokens,
89
+ do_sample=True,
90
+ temperature=0.7,
91
+ top_p=0.9,
92
+ streamer=streamer,
93
+ )
94
+
95
+ thread = threading.Thread(target=model.generate, kwargs=gen_kwargs)
96
+ thread.start()
97
+
98
+ for token in streamer:
99
+ yield token
100
+
101
+ thread.join()
102
+
103
+
104
+ # ── Routes ────────────────────────────────────────────────────────────────────
105
+ @app.get("/", response_class=HTMLResponse)
106
+ async def root():
107
+ return "<h2>Codette AI is running ✅</h2><p>POST /api/chat to chat.</p>"
108
+
109
+
110
+ @app.post("/api/chat")
111
+ async def chat(request: Request):
112
+ body = await request.json()
113
+ messages = body.get("messages", [])
114
+ stream = body.get("stream", True)
115
+
116
+ if not stream:
117
+ # Non-streaming — collect everything first
118
+ full = "".join(stream_tokens(messages))
119
+ return {
120
+ "message": {"role": "assistant", "content": full},
121
+ "done": True,
122
+ }
123
+
124
+ # Streaming — mimic Ollama's NDJSON format exactly
125
+ async def event_stream():
126
+ full = ""
127
+ for token in stream_tokens(messages):
128
+ full += token
129
+ chunk = json.dumps({
130
+ "message": {"role": "assistant", "content": token},
131
+ "done": False,
132
+ })
133
+ yield chunk + "\n"
134
+ await asyncio.sleep(0) # yield control to event loop
135
+
136
+ # Final message with done=true
137
+ yield json.dumps({
138
+ "message": {"role": "assistant", "content": ""},
139
+ "done": True,
140
+ }) + "\n"
141
+
142
+ return StreamingResponse(
143
+ event_stream(),
144
+ media_type="application/x-ndjson",
145
+ headers={"X-Accel-Buffering": "no"},
146
+ )
index.html ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8" />
5
+ <title>Codette Chat</title>
6
+ <link rel="preconnect" href="https://fonts.googleapis.com" />
7
+ <link href="https://fonts.googleapis.com/css2?family=Space+Mono:ital@0;1&family=DM+Sans:wght@400;500;600&display=swap" rel="stylesheet" />
8
+ <style>
9
+ /* ── Reset / Base ── */
10
+ *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
11
+
12
+ /* ── Variables ── */
13
+ :root {
14
+ --bg: #0b0c10;
15
+ --surface: #13141a;
16
+ --border: #1f2130;
17
+ --accent: #c8f078; /* neon lime — studio meter green */
18
+ --accent2: #78d4f0; /* cool cyan */
19
+ --text: #e8eaf0;
20
+ --muted: #5a5f78;
21
+ --user-bg: #1a1f35;
22
+ --bot-bg: #111318;
23
+ --radius: 14px;
24
+ --font-mono: 'Space Mono', monospace;
25
+ --font-sans: 'DM Sans', sans-serif;
26
+ }
27
+
28
+ body {
29
+ background: var(--bg);
30
+ display: flex;
31
+ align-items: center;
32
+ justify-content: center;
33
+ min-height: 100vh;
34
+ font-family: var(--font-sans);
35
+ }
36
+
37
+ /* ── Widget Shell ── */
38
+ #hc-chat {
39
+ width: 100%;
40
+ max-width: 680px;
41
+ background: var(--surface);
42
+ border: 1px solid var(--border);
43
+ border-radius: 20px;
44
+ overflow: hidden;
45
+ display: flex;
46
+ flex-direction: column;
47
+ box-shadow:
48
+ 0 0 0 1px #ffffff08 inset,
49
+ 0 32px 80px #00000060;
50
+ }
51
+
52
+ /* ── Header ── */
53
+ #hc-header {
54
+ display: flex;
55
+ align-items: center;
56
+ gap: 12px;
57
+ padding: 16px 20px;
58
+ border-bottom: 1px solid var(--border);
59
+ background: linear-gradient(135deg, #0f1118 0%, #141825 100%);
60
+ }
61
+
62
+ .hc-avatar {
63
+ width: 36px;
64
+ height: 36px;
65
+ border-radius: 10px;
66
+ background: linear-gradient(135deg, var(--accent) 0%, var(--accent2) 100%);
67
+ display: flex;
68
+ align-items: center;
69
+ justify-content: center;
70
+ font-size: 18px;
71
+ flex-shrink: 0;
72
+ }
73
+
74
+ .hc-title {
75
+ font-weight: 600;
76
+ color: var(--text);
77
+ font-size: 15px;
78
+ letter-spacing: 0.01em;
79
+ }
80
+
81
+ .hc-subtitle {
82
+ font-family: var(--font-mono);
83
+ font-size: 10px;
84
+ color: var(--accent);
85
+ letter-spacing: 0.08em;
86
+ text-transform: uppercase;
87
+ margin-top: 2px;
88
+ }
89
+
90
+ /* LED meter dots */
91
+ .hc-dots {
92
+ margin-left: auto;
93
+ display: flex;
94
+ gap: 5px;
95
+ align-items: center;
96
+ }
97
+ .hc-dot {
98
+ width: 7px; height: 7px; border-radius: 50%;
99
+ background: var(--border);
100
+ }
101
+ .hc-dot.on { background: var(--accent); box-shadow: 0 0 6px var(--accent); animation: pulse 1.8s ease-in-out infinite; }
102
+ @keyframes pulse { 0%,100%{opacity:1} 50%{opacity:0.4} }
103
+
104
+ /* ── Log ── */
105
+ #hc-log {
106
+ flex: 1;
107
+ min-height: 320px;
108
+ max-height: 400px;
109
+ overflow-y: auto;
110
+ padding: 20px;
111
+ display: flex;
112
+ flex-direction: column;
113
+ gap: 14px;
114
+ scrollbar-width: thin;
115
+ scrollbar-color: var(--border) transparent;
116
+ }
117
+
118
+ /* Bubbles */
119
+ .hc-row {
120
+ display: flex;
121
+ flex-direction: column;
122
+ }
123
+ .hc-row.user { align-items: flex-end; }
124
+ .hc-row.bot { align-items: flex-start; }
125
+
126
+ .hc-label {
127
+ font-family: var(--font-mono);
128
+ font-size: 9px;
129
+ letter-spacing: 0.1em;
130
+ text-transform: uppercase;
131
+ color: var(--muted);
132
+ margin-bottom: 5px;
133
+ padding: 0 4px;
134
+ }
135
+
136
+ .hc-bubble {
137
+ max-width: 82%;
138
+ padding: 11px 15px;
139
+ border-radius: var(--radius);
140
+ font-size: 14px;
141
+ line-height: 1.6;
142
+ white-space: pre-wrap;
143
+ border: 1px solid var(--border);
144
+ }
145
+
146
+ .hc-row.user .hc-bubble {
147
+ background: var(--user-bg);
148
+ color: var(--text);
149
+ border-color: #2a3050;
150
+ border-bottom-right-radius: 4px;
151
+ }
152
+
153
+ .hc-row.bot .hc-bubble {
154
+ background: var(--bot-bg);
155
+ color: var(--text);
156
+ border-color: var(--border);
157
+ border-bottom-left-radius: 4px;
158
+ }
159
+
160
+ /* cursor blink */
161
+ .hc-cursor::after {
162
+ content: '▋';
163
+ color: var(--accent);
164
+ animation: blink .7s step-end infinite;
165
+ }
166
+ @keyframes blink { 50%{opacity:0} }
167
+
168
+ /* ── Input bar ── */
169
+ #hc-bar {
170
+ display: flex;
171
+ gap: 10px;
172
+ padding: 14px 16px;
173
+ border-top: 1px solid var(--border);
174
+ background: var(--surface);
175
+ }
176
+
177
+ #hc-input {
178
+ flex: 1;
179
+ padding: 11px 14px;
180
+ background: var(--bg);
181
+ border: 1px solid var(--border);
182
+ border-radius: 10px;
183
+ color: var(--text);
184
+ font-family: var(--font-sans);
185
+ font-size: 14px;
186
+ outline: none;
187
+ transition: border-color .2s;
188
+ }
189
+ #hc-input::placeholder { color: var(--muted); }
190
+ #hc-input:focus { border-color: #2e3555; }
191
+
192
+ #hc-send {
193
+ padding: 11px 18px;
194
+ background: var(--accent);
195
+ color: #0b0c10;
196
+ border: none;
197
+ border-radius: 10px;
198
+ font-family: var(--font-sans);
199
+ font-weight: 600;
200
+ font-size: 13px;
201
+ cursor: pointer;
202
+ transition: opacity .15s, transform .1s;
203
+ letter-spacing: 0.02em;
204
+ }
205
+ #hc-send:hover { opacity: .88; }
206
+ #hc-send:active { transform: scale(.97); }
207
+ #hc-send:disabled { opacity: .35; cursor: not-allowed; }
208
+
209
+ /* ── Footer ── */
210
+ #hc-footer {
211
+ padding: 10px 20px 14px;
212
+ font-family: var(--font-mono);
213
+ font-size: 10px;
214
+ color: var(--muted);
215
+ letter-spacing: 0.05em;
216
+ }
217
+ </style>
218
+ </head>
219
+ <body>
220
+
221
+ <div id="hc-chat">
222
+ <!-- Header -->
223
+ <div id="hc-header">
224
+ <div class="hc-avatar">🎵</div>
225
+ <div>
226
+ <div class="hc-title">Codette</div>
227
+ <div class="hc-subtitle">Music Production AI</div>
228
+ </div>
229
+ <div class="hc-dots">
230
+ <div class="hc-dot on" style="animation-delay:.0s"></div>
231
+ <div class="hc-dot on" style="animation-delay:.3s"></div>
232
+ <div class="hc-dot on" style="animation-delay:.6s"></div>
233
+ <div class="hc-dot"></div>
234
+ </div>
235
+ </div>
236
+
237
+ <!-- Message log -->
238
+ <div id="hc-log"></div>
239
+
240
+ <!-- Input -->
241
+ <div id="hc-bar">
242
+ <input id="hc-input" type="text" placeholder="Ask about chords, mixing, arrangement…" autocomplete="off" />
243
+ <button id="hc-send">Send</button>
244
+ </div>
245
+
246
+ <!-- Footer -->
247
+ <div id="hc-footer">
248
+ CODETTE-LLAMA · LLAMA 3.2-1B · NO CHAT HISTORY SAVED
249
+ </div>
250
+ </div>
251
+
252
+ <script>
253
+ (() => {
254
+ // ── CHANGE THIS to your HF Space URL once deployed ──
255
+ // Format: https://raiff1982-codette-ai.hf.space
256
+ const API_BASE = "https://raiff1982-codette-ai.hf.space";
257
+
258
+ const log = document.getElementById("hc-log");
259
+ const input = document.getElementById("hc-input");
260
+ const sendBtn = document.getElementById("hc-send");
261
+
262
+ const messages = [
263
+ {
264
+ role: "system",
265
+ content:
266
+ "You are Codette, an AI music production assistant with expertise in " +
267
+ "music theory, mixing, composition, harmonic progressions, genre analysis, " +
268
+ "ear training, and arrangement. You assist musicians and producers with " +
269
+ "creative and technical guidance. Be concise, practical, and musically informed."
270
+ }
271
+ ];
272
+
273
+ function addBubble(role, text = "") {
274
+ const row = document.createElement("div");
275
+ row.className = `hc-row ${role === "user" ? "user" : "bot"}`;
276
+
277
+ const label = document.createElement("div");
278
+ label.className = "hc-label";
279
+ label.textContent = role === "user" ? "You" : "Codette";
280
+
281
+ const bubble = document.createElement("div");
282
+ bubble.className = "hc-bubble";
283
+ if (role === "assistant" && !text) bubble.classList.add("hc-cursor");
284
+ bubble.textContent = text;
285
+
286
+ row.appendChild(label);
287
+ row.appendChild(bubble);
288
+ log.appendChild(row);
289
+ log.scrollTop = log.scrollHeight;
290
+ return bubble;
291
+ }
292
+
293
+ async function streamReply() {
294
+ const text = input.value.trim();
295
+ if (!text) return;
296
+
297
+ input.value = "";
298
+ messages.push({ role: "user", content: text });
299
+ addBubble("user", text);
300
+
301
+ const bubble = addBubble("assistant", "");
302
+ sendBtn.disabled = true;
303
+ input.disabled = true;
304
+
305
+ try {
306
+ const resp = await fetch(`${API_BASE}/api/chat`, {
307
+ method: "POST",
308
+ headers: { "Content-Type": "application/json" },
309
+ body: JSON.stringify({ messages, stream: true })
310
+ });
311
+
312
+ if (!resp.ok) {
313
+ bubble.textContent = `Error ${resp.status}: ${await resp.text()}`;
314
+ bubble.classList.remove("hc-cursor");
315
+ return;
316
+ }
317
+
318
+ const reader = resp.body.getReader();
319
+ const decoder = new TextDecoder();
320
+ let full = "", buffer = "";
321
+
322
+ while (true) {
323
+ const { value, done } = await reader.read();
324
+ if (done) break;
325
+
326
+ buffer += decoder.decode(value, { stream: true });
327
+ const lines = buffer.split("\n");
328
+ buffer = lines.pop();
329
+
330
+ for (const line of lines) {
331
+ if (!line.trim()) continue;
332
+ try {
333
+ const chunk = JSON.parse(line);
334
+ const token = chunk?.message?.content ?? "";
335
+ full += token;
336
+ bubble.textContent = full || "";
337
+ bubble.classList.add("hc-cursor");
338
+ log.scrollTop = log.scrollHeight;
339
+ if (chunk.done) {
340
+ messages.push({ role: "assistant", content: full });
341
+ bubble.classList.remove("hc-cursor");
342
+ }
343
+ } catch { /* skip malformed */ }
344
+ }
345
+ }
346
+
347
+ bubble.classList.remove("hc-cursor");
348
+ if (!full) bubble.textContent = "(no response)";
349
+
350
+ } catch (err) {
351
+ bubble.classList.remove("hc-cursor");
352
+ bubble.textContent = err.message.includes("fetch")
353
+ ? "⚠️ Cannot reach Codette server. The Space may be starting up — try again in 30 seconds."
354
+ : `Error: ${err.message}`;
355
+ } finally {
356
+ sendBtn.disabled = false;
357
+ input.disabled = false;
358
+ input.focus();
359
+ }
360
+ }
361
+
362
+ sendBtn.addEventListener("click", streamReply);
363
+ input.addEventListener("keydown", e => {
364
+ if (e.key === "Enter" && !e.shiftKey) streamReply();
365
+ });
366
+ })();
367
+ </script>
368
+
369
+ </body>
370
+ </html>
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi>=0.110.0
2
+ uvicorn>=0.29.0
3
+ peft>=0.10.0
4
+ transformers>=4.40.0
5
+ accelerate>=0.29.0
6
+ torch>=2.2.0
7
+ safetensors>=0.4.3