Spaces:
Runtime error
Runtime error
Commit ยท
3fbe92b
1
Parent(s): 7175181
feat: configure StarCoder2 3B model deployment configurations and ports
Browse files- app/config.py +3 -3
- app/main.py +8 -8
- download_model.py +3 -3
- setup.py +3 -3
app/config.py
CHANGED
|
@@ -6,7 +6,7 @@ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
|
| 6 |
|
| 7 |
# Model Configuration
|
| 8 |
MODEL_DIR = os.path.join(BASE_DIR, "models")
|
| 9 |
-
MODEL_NAME = "
|
| 10 |
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
|
| 11 |
|
| 12 |
# llama.cpp Configuration
|
|
@@ -17,8 +17,8 @@ else:
|
|
| 17 |
LLAMA_SERVER_PATH = os.environ.get("LLAMA_SERVER_PATH", os.path.join(BIN_DIR, "llama-server"))
|
| 18 |
|
| 19 |
# Server Ports
|
| 20 |
-
LLAMA_PORT = int(os.environ.get("LLAMA_PORT",
|
| 21 |
-
FASTAPI_PORT = int(os.environ.get("FASTAPI_PORT",
|
| 22 |
LLAMA_HOST = os.environ.get("LLAMA_HOST", "127.0.0.1")
|
| 23 |
|
| 24 |
# Memory and CPU Performance Management
|
|
|
|
| 6 |
|
| 7 |
# Model Configuration
|
| 8 |
MODEL_DIR = os.path.join(BASE_DIR, "models")
|
| 9 |
+
MODEL_NAME = "starcoder2-3b-instruct.Q4_K_M.gguf"
|
| 10 |
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
|
| 11 |
|
| 12 |
# llama.cpp Configuration
|
|
|
|
| 17 |
LLAMA_SERVER_PATH = os.environ.get("LLAMA_SERVER_PATH", os.path.join(BIN_DIR, "llama-server"))
|
| 18 |
|
| 19 |
# Server Ports
|
| 20 |
+
LLAMA_PORT = int(os.environ.get("LLAMA_PORT", 8084))
|
| 21 |
+
FASTAPI_PORT = int(os.environ.get("FASTAPI_PORT", 8004))
|
| 22 |
LLAMA_HOST = os.environ.get("LLAMA_HOST", "127.0.0.1")
|
| 23 |
|
| 24 |
# Memory and CPU Performance Management
|
app/main.py
CHANGED
|
@@ -7,7 +7,7 @@ from fastapi import FastAPI, HTTPException, status, Request
|
|
| 7 |
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
from fastapi.responses import StreamingResponse, Response, HTMLResponse
|
| 9 |
|
| 10 |
-
from app.config import MODEL_PATH, FASTAPI_PORT, CONTEXT_SIZE
|
| 11 |
from app.schemas.chat import ChatRequest, AutocompleteRequest, StatusResponse
|
| 12 |
from app.services.model_service import ModelService
|
| 13 |
from app.services.FIM_service import FIMService
|
|
@@ -70,7 +70,7 @@ async def lifespan(app: FastAPI):
|
|
| 70 |
# Create FastAPI App
|
| 71 |
app = FastAPI(
|
| 72 |
title="Local IDE Code Completion Engine",
|
| 73 |
-
description="Optimized backend providing high-speed coding completions and chat using local
|
| 74 |
version="1.0.0",
|
| 75 |
lifespan=lifespan
|
| 76 |
)
|
|
@@ -237,7 +237,7 @@ async def proxy_openai(path: str, request: Request):
|
|
| 237 |
Directly proxies OpenAI-compatible requests to the underlying llama-server backend.
|
| 238 |
Enables instant compatibility with standard VS Code / Cursor extensions.
|
| 239 |
"""
|
| 240 |
-
url = f"http://127.0.0.1:
|
| 241 |
body = await request.body()
|
| 242 |
headers = dict(request.headers)
|
| 243 |
headers.pop("host", None)
|
|
@@ -305,7 +305,7 @@ async def get_dashboard():
|
|
| 305 |
<head>
|
| 306 |
<meta charset="UTF-8">
|
| 307 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 308 |
-
<title>
|
| 309 |
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;700&family=Outfit:wght@300;400;600;700&display=swap" rel="stylesheet">
|
| 310 |
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 311 |
<style>
|
|
@@ -658,8 +658,8 @@ async def get_dashboard():
|
|
| 658 |
<body>
|
| 659 |
<header>
|
| 660 |
<div class="logo-section">
|
| 661 |
-
<div class="logo-badge">
|
| 662 |
-
<div class="logo-title">
|
| 663 |
</div>
|
| 664 |
<div class="status-pill">
|
| 665 |
<div class="status-dot"></div>
|
|
@@ -720,7 +720,7 @@ async def get_dashboard():
|
|
| 720 |
<div class="chat-messages" id="chat-box">
|
| 721 |
<!-- Welcome/default message of the LLM is inside the scrollable flow -->
|
| 722 |
<div class="message bot" style="max-width: 100%;">
|
| 723 |
-
<div class="message-content">Hello! I am
|
| 724 |
<div class="message-meta" style="display: flex; align-items: center; gap: 0.75rem; margin-top: 0.5rem; font-size: 0.75rem; color: var(--text-muted); opacity: 0.8;">
|
| 725 |
<span class="message-time">Welcome</span>
|
| 726 |
<button class="copy-all-btn" onclick="copyEntireMessage(this)" style="background: none; border: none; color: var(--text-muted); cursor: pointer; display: flex; align-items: center; gap: 0.25rem; font-size: 0.75rem; transition: color 0.2s; padding: 0;">
|
|
@@ -732,7 +732,7 @@ async def get_dashboard():
|
|
| 732 |
</div>
|
| 733 |
<!-- Input row sits cleanly below the message box, aligning dynamically -->
|
| 734 |
<div class="input-row">
|
| 735 |
-
<input type="text" class="chat-input" id="user-input" placeholder="Ask
|
| 736 |
<button class="send-btn" id="action-btn" onclick="handleAction()">Send</button>
|
| 737 |
</div>
|
| 738 |
</div>
|
|
|
|
| 7 |
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
from fastapi.responses import StreamingResponse, Response, HTMLResponse
|
| 9 |
|
| 10 |
+
from app.config import MODEL_PATH, FASTAPI_PORT, CONTEXT_SIZE, LLAMA_PORT
|
| 11 |
from app.schemas.chat import ChatRequest, AutocompleteRequest, StatusResponse
|
| 12 |
from app.services.model_service import ModelService
|
| 13 |
from app.services.FIM_service import FIMService
|
|
|
|
| 70 |
# Create FastAPI App
|
| 71 |
app = FastAPI(
|
| 72 |
title="Local IDE Code Completion Engine",
|
| 73 |
+
description="Optimized backend providing high-speed coding completions and chat using local StarCoder2 3B.",
|
| 74 |
version="1.0.0",
|
| 75 |
lifespan=lifespan
|
| 76 |
)
|
|
|
|
| 237 |
Directly proxies OpenAI-compatible requests to the underlying llama-server backend.
|
| 238 |
Enables instant compatibility with standard VS Code / Cursor extensions.
|
| 239 |
"""
|
| 240 |
+
url = f"http://127.0.0.1:{LLAMA_PORT}/v1/{path}"
|
| 241 |
body = await request.body()
|
| 242 |
headers = dict(request.headers)
|
| 243 |
headers.pop("host", None)
|
|
|
|
| 305 |
<head>
|
| 306 |
<meta charset="UTF-8">
|
| 307 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 308 |
+
<title>StarCoder2 3B | Interactive Engine Dashboard</title>
|
| 309 |
<link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;700&family=Outfit:wght@300;400;600;700&display=swap" rel="stylesheet">
|
| 310 |
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 311 |
<style>
|
|
|
|
| 658 |
<body>
|
| 659 |
<header>
|
| 660 |
<div class="logo-section">
|
| 661 |
+
<div class="logo-badge">S</div>
|
| 662 |
+
<div class="logo-title">StarCoder2 3B</div>
|
| 663 |
</div>
|
| 664 |
<div class="status-pill">
|
| 665 |
<div class="status-dot"></div>
|
|
|
|
| 720 |
<div class="chat-messages" id="chat-box">
|
| 721 |
<!-- Welcome/default message of the LLM is inside the scrollable flow -->
|
| 722 |
<div class="message bot" style="max-width: 100%;">
|
| 723 |
+
<div class="message-content">Hello! I am StarCoder2 3B. Ask me any coding questions, request complete script generation, or let me explain complex logic!</div>
|
| 724 |
<div class="message-meta" style="display: flex; align-items: center; gap: 0.75rem; margin-top: 0.5rem; font-size: 0.75rem; color: var(--text-muted); opacity: 0.8;">
|
| 725 |
<span class="message-time">Welcome</span>
|
| 726 |
<button class="copy-all-btn" onclick="copyEntireMessage(this)" style="background: none; border: none; color: var(--text-muted); cursor: pointer; display: flex; align-items: center; gap: 0.25rem; font-size: 0.75rem; transition: color 0.2s; padding: 0;">
|
|
|
|
| 732 |
</div>
|
| 733 |
<!-- Input row sits cleanly below the message box, aligning dynamically -->
|
| 734 |
<div class="input-row">
|
| 735 |
+
<input type="text" class="chat-input" id="user-input" placeholder="Ask StarCoder2 something (e.g. Write a quicksort in Go)..." onkeydown="if(event.key === 'Enter') handleAction()">
|
| 736 |
<button class="send-btn" id="action-btn" onclick="handleAction()">Send</button>
|
| 737 |
</div>
|
| 738 |
</div>
|
download_model.py
CHANGED
|
@@ -3,9 +3,9 @@ import sys
|
|
| 3 |
import requests
|
| 4 |
|
| 5 |
MODEL_DIR = "models"
|
| 6 |
-
MODEL_NAME = "
|
| 7 |
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
|
| 8 |
-
MODEL_URL = "https://huggingface.co/
|
| 9 |
|
| 10 |
def download_model():
|
| 11 |
os.makedirs(MODEL_DIR, exist_ok=True)
|
|
@@ -13,7 +13,7 @@ def download_model():
|
|
| 13 |
print(f"Model '{MODEL_NAME}' already exists. Skipping download.")
|
| 14 |
return
|
| 15 |
|
| 16 |
-
print(f"Initiating download for
|
| 17 |
response = requests.get(MODEL_URL, stream=True)
|
| 18 |
response.raise_for_status()
|
| 19 |
|
|
|
|
| 3 |
import requests
|
| 4 |
|
| 5 |
MODEL_DIR = "models"
|
| 6 |
+
MODEL_NAME = "starcoder2-3b-instruct.Q4_K_M.gguf"
|
| 7 |
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
|
| 8 |
+
MODEL_URL = "https://huggingface.co/QuantFactory/starcoder2-3b-instruct-GGUF/resolve/main/starcoder2-3b-instruct.Q4_K_M.gguf"
|
| 9 |
|
| 10 |
def download_model():
|
| 11 |
os.makedirs(MODEL_DIR, exist_ok=True)
|
|
|
|
| 13 |
print(f"Model '{MODEL_NAME}' already exists. Skipping download.")
|
| 14 |
return
|
| 15 |
|
| 16 |
+
print(f"Initiating download for StarCoder2 3B GGUF...")
|
| 17 |
response = requests.get(MODEL_URL, stream=True)
|
| 18 |
response.raise_for_status()
|
| 19 |
|
setup.py
CHANGED
|
@@ -8,9 +8,9 @@ import requests
|
|
| 8 |
MODEL_DIR = "models"
|
| 9 |
BIN_DIR = "bin"
|
| 10 |
|
| 11 |
-
# Hugging Face URL for
|
| 12 |
-
MODEL_URL = "https://huggingface.co/
|
| 13 |
-
MODEL_NAME = "
|
| 14 |
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
|
| 15 |
|
| 16 |
# llama.cpp stable release b4834 for Windows AVX2 (Supports almost all modern CPUs)
|
|
|
|
| 8 |
MODEL_DIR = "models"
|
| 9 |
BIN_DIR = "bin"
|
| 10 |
|
| 11 |
+
# Hugging Face URL for StarCoder2 3B Instruct Q4_K_M GGUF (approx 1.8 GB)
|
| 12 |
+
MODEL_URL = "https://huggingface.co/QuantFactory/starcoder2-3b-instruct-GGUF/resolve/main/starcoder2-3b-instruct.Q4_K_M.gguf"
|
| 13 |
+
MODEL_NAME = "starcoder2-3b-instruct.Q4_K_M.gguf"
|
| 14 |
MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
|
| 15 |
|
| 16 |
# llama.cpp stable release b4834 for Windows AVX2 (Supports almost all modern CPUs)
|