Spaces:

AjinkyaPagare
/

StarCoder2-3B

Runtime error

App Files Files Community

AjinkyaPagare commited on 9 days ago

Commit

3fbe92b

1 Parent(s): 7175181

feat: configure StarCoder2 3B model deployment configurations and ports

Browse files

Files changed (4) hide show

app/config.py +3 -3
app/main.py +8 -8
download_model.py +3 -3
setup.py +3 -3

app/config.py CHANGED Viewed

@@ -6,7 +6,7 @@ BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 # Model Configuration
 MODEL_DIR = os.path.join(BASE_DIR, "models")
-MODEL_NAME = "qwen2.5-coder-7b-instruct-q4_k_m.gguf"
 MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
 # llama.cpp Configuration
@@ -17,8 +17,8 @@ else:
     LLAMA_SERVER_PATH = os.environ.get("LLAMA_SERVER_PATH", os.path.join(BIN_DIR, "llama-server"))
 # Server Ports
-LLAMA_PORT = int(os.environ.get("LLAMA_PORT", 8081))
-FASTAPI_PORT = int(os.environ.get("FASTAPI_PORT", 8001))
 LLAMA_HOST = os.environ.get("LLAMA_HOST", "127.0.0.1")
 # Memory and CPU Performance Management

 # Model Configuration
 MODEL_DIR = os.path.join(BASE_DIR, "models")
+MODEL_NAME = "starcoder2-3b-instruct.Q4_K_M.gguf"
 MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
 # llama.cpp Configuration
     LLAMA_SERVER_PATH = os.environ.get("LLAMA_SERVER_PATH", os.path.join(BIN_DIR, "llama-server"))
 # Server Ports
+LLAMA_PORT = int(os.environ.get("LLAMA_PORT", 8084))
+FASTAPI_PORT = int(os.environ.get("FASTAPI_PORT", 8004))
 LLAMA_HOST = os.environ.get("LLAMA_HOST", "127.0.0.1")
 # Memory and CPU Performance Management

app/main.py CHANGED Viewed

@@ -7,7 +7,7 @@ from fastapi import FastAPI, HTTPException, status, Request
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, Response, HTMLResponse
-from app.config import MODEL_PATH, FASTAPI_PORT, CONTEXT_SIZE
 from app.schemas.chat import ChatRequest, AutocompleteRequest, StatusResponse
 from app.services.model_service import ModelService
 from app.services.FIM_service import FIMService
@@ -70,7 +70,7 @@ async def lifespan(app: FastAPI):
 # Create FastAPI App
 app = FastAPI(
     title="Local IDE Code Completion Engine",
-    description="Optimized backend providing high-speed coding completions and chat using local Qwen 2.5 Coder.",
     version="1.0.0",
     lifespan=lifespan
 )
@@ -237,7 +237,7 @@ async def proxy_openai(path: str, request: Request):
     Directly proxies OpenAI-compatible requests to the underlying llama-server backend.
     Enables instant compatibility with standard VS Code / Cursor extensions.
     """
-    url = f"http://127.0.0.1:8080/v1/{path}"
     body = await request.body()
     headers = dict(request.headers)
     headers.pop("host", None)
@@ -305,7 +305,7 @@ async def get_dashboard():
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Qwen 2.5 Coder | Interactive Engine Dashboard</title>
     <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;700&family=Outfit:wght@300;400;600;700&display=swap" rel="stylesheet">
     <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
     <style>
@@ -658,8 +658,8 @@ async def get_dashboard():
 <body>
     <header>
         <div class="logo-section">
-            <div class="logo-badge">Q</div>
-            <div class="logo-title">Qwen 2.5 Coder</div>
         </div>
         <div class="status-pill">
             <div class="status-dot"></div>
@@ -720,7 +720,7 @@ async def get_dashboard():
                 <div class="chat-messages" id="chat-box">
                     <!-- Welcome/default message of the LLM is inside the scrollable flow -->
                     <div class="message bot" style="max-width: 100%;">
-                        <div class="message-content">Hello! I am Qwen 2.5 Coder. Ask me any coding questions, request complete script generation, or let me explain complex logic!</div>
                         <div class="message-meta" style="display: flex; align-items: center; gap: 0.75rem; margin-top: 0.5rem; font-size: 0.75rem; color: var(--text-muted); opacity: 0.8;">
                             <span class="message-time">Welcome</span>
                             <button class="copy-all-btn" onclick="copyEntireMessage(this)" style="background: none; border: none; color: var(--text-muted); cursor: pointer; display: flex; align-items: center; gap: 0.25rem; font-size: 0.75rem; transition: color 0.2s; padding: 0;">
@@ -732,7 +732,7 @@ async def get_dashboard():
                 </div>
                 <!-- Input row sits cleanly below the message box, aligning dynamically -->
                 <div class="input-row">
-                    <input type="text" class="chat-input" id="user-input" placeholder="Ask Qwen-Coder something (e.g. Write a quicksort in Go)..." onkeydown="if(event.key === 'Enter') handleAction()">
                     <button class="send-btn" id="action-btn" onclick="handleAction()">Send</button>
                 </div>
             </div>

 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import StreamingResponse, Response, HTMLResponse
+from app.config import MODEL_PATH, FASTAPI_PORT, CONTEXT_SIZE, LLAMA_PORT
 from app.schemas.chat import ChatRequest, AutocompleteRequest, StatusResponse
 from app.services.model_service import ModelService
 from app.services.FIM_service import FIMService
 # Create FastAPI App
 app = FastAPI(
     title="Local IDE Code Completion Engine",
+    description="Optimized backend providing high-speed coding completions and chat using local StarCoder2 3B.",
     version="1.0.0",
     lifespan=lifespan
 )
     Directly proxies OpenAI-compatible requests to the underlying llama-server backend.
     Enables instant compatibility with standard VS Code / Cursor extensions.
     """
+    url = f"http://127.0.0.1:{LLAMA_PORT}/v1/{path}"
     body = await request.body()
     headers = dict(request.headers)
     headers.pop("host", None)
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>StarCoder2 3B | Interactive Engine Dashboard</title>
     <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500;700&family=Outfit:wght@300;400;600;700&display=swap" rel="stylesheet">
     <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
     <style>
 <body>
     <header>
         <div class="logo-section">
+            <div class="logo-badge">S</div>
+            <div class="logo-title">StarCoder2 3B</div>
         </div>
         <div class="status-pill">
             <div class="status-dot"></div>
                 <div class="chat-messages" id="chat-box">
                     <!-- Welcome/default message of the LLM is inside the scrollable flow -->
                     <div class="message bot" style="max-width: 100%;">
+                        <div class="message-content">Hello! I am StarCoder2 3B. Ask me any coding questions, request complete script generation, or let me explain complex logic!</div>
                         <div class="message-meta" style="display: flex; align-items: center; gap: 0.75rem; margin-top: 0.5rem; font-size: 0.75rem; color: var(--text-muted); opacity: 0.8;">
                             <span class="message-time">Welcome</span>
                             <button class="copy-all-btn" onclick="copyEntireMessage(this)" style="background: none; border: none; color: var(--text-muted); cursor: pointer; display: flex; align-items: center; gap: 0.25rem; font-size: 0.75rem; transition: color 0.2s; padding: 0;">
                 </div>
                 <!-- Input row sits cleanly below the message box, aligning dynamically -->
                 <div class="input-row">
+                    <input type="text" class="chat-input" id="user-input" placeholder="Ask StarCoder2 something (e.g. Write a quicksort in Go)..." onkeydown="if(event.key === 'Enter') handleAction()">
                     <button class="send-btn" id="action-btn" onclick="handleAction()">Send</button>
                 </div>
             </div>

download_model.py CHANGED Viewed

@@ -3,9 +3,9 @@ import sys
 import requests
 MODEL_DIR = "models"
-MODEL_NAME = "qwen2.5-coder-7b-instruct-q4_k_m.gguf"
 MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
-MODEL_URL = "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_k_m.gguf"
 def download_model():
     os.makedirs(MODEL_DIR, exist_ok=True)
@@ -13,7 +13,7 @@ def download_model():
         print(f"Model '{MODEL_NAME}' already exists. Skipping download.")
         return
-    print(f"Initiating download for Qwen 2.5 Coder 7B GGUF...")
     response = requests.get(MODEL_URL, stream=True)
     response.raise_for_status()

 import requests
 MODEL_DIR = "models"
+MODEL_NAME = "starcoder2-3b-instruct.Q4_K_M.gguf"
 MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
+MODEL_URL = "https://huggingface.co/QuantFactory/starcoder2-3b-instruct-GGUF/resolve/main/starcoder2-3b-instruct.Q4_K_M.gguf"
 def download_model():
     os.makedirs(MODEL_DIR, exist_ok=True)
         print(f"Model '{MODEL_NAME}' already exists. Skipping download.")
         return
+    print(f"Initiating download for StarCoder2 3B GGUF...")
     response = requests.get(MODEL_URL, stream=True)
     response.raise_for_status()

setup.py CHANGED Viewed

@@ -8,9 +8,9 @@ import requests
 MODEL_DIR = "models"
 BIN_DIR = "bin"
-# Hugging Face URL for Qwen 2.5 Coder 1.5B Instruct Q4_K_M GGUF (approx 1.2 GB)
-MODEL_URL = "https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct-GGUF/resolve/main/qwen2.5-coder-7b-instruct-q4_k_m.gguf"
-MODEL_NAME = "qwen2.5-coder-7b-instruct-q4_k_m.gguf"
 MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
 # llama.cpp stable release b4834 for Windows AVX2 (Supports almost all modern CPUs)

 MODEL_DIR = "models"
 BIN_DIR = "bin"
+# Hugging Face URL for StarCoder2 3B Instruct Q4_K_M GGUF (approx 1.8 GB)
+MODEL_URL = "https://huggingface.co/QuantFactory/starcoder2-3b-instruct-GGUF/resolve/main/starcoder2-3b-instruct.Q4_K_M.gguf"
+MODEL_NAME = "starcoder2-3b-instruct.Q4_K_M.gguf"
 MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME)
 # llama.cpp stable release b4834 for Windows AVX2 (Supports almost all modern CPUs)