truegleai
/

deepseek-coder-api

GGUF

conversational

Model card Files Files and versions

xet

Community

truegleai commited on Feb 1

Commit

c7773a3

verified ·

1 Parent(s): 7799a1e

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -12

app.py CHANGED Viewed

@@ -4,27 +4,48 @@ from llama_cpp import Llama
 import time
 import os
-# Configuration
-MODEL_NAME = "DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf"
-MODEL_PATH = MODEL_NAME  # Since we placed it in the same directory
-# Initialize model (will be loaded on first use)
 llm = None
 def load_model():
     """Lazy-load the model only when needed."""
     global llm
     if llm is None:
-        print(f"⏳ Loading model {MODEL_NAME}... This may take 1-2 minutes on first run.")
         start_time = time.time()
-        # CPU-optimized settings for free tier
         llm = Llama(
             model_path=MODEL_PATH,
             n_ctx=2048,           # Context size (smaller = less memory)
-            n_threads=2,          # Use 2 CPU threads
             n_gpu_layers=0,       # CPU only on free tier
-            verbose=False
         )
         load_time = time.time() - start_time
@@ -82,8 +103,8 @@ demo = gr.Interface(
         label="Generated Code",
         language="python"
     ),
-    title="💻 DeepSeek Coder V2 Lite (16B) - o87Dev",
-    description="**CPU Deployment** - Largest viable model on Hugging Face Spaces free tier. ⚠️ **First request loads model (~1-2 min)**",
     examples=[
         ["Write a Python function to check if a number is prime"],
         ["Create a React component for a login form"],
@@ -96,5 +117,5 @@ if __name__ == "__main__":
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
-        share=False  # Set to True if you want a public link
-    )

 import time
 import os
+# Configuration - Using a 6.7B model that fits free tier
+MODEL_REPO_ID = "TheBloke/DeepSeek-Coder-6.7B-Instruct-GGUF"
+MODEL_FILENAME = "deepseek-coder-6.7b-instruct.Q4_K_M.gguf"
+# Download model if not already present
+def get_model_path():
+    """Download model from Hugging Face Hub or use cached version."""
+    try:
+        model_path = hf_hub_download(
+            repo_id=MODEL_REPO_ID,
+            filename=MODEL_FILENAME,
+            local_dir="./models",  # Save to models folder
+            local_dir_use_symlinks=False,  # Avoid symlink issues
+            resume_download=True  # Resume if interrupted
+        )
+        print(f"✅ Model downloaded to: {model_path}")
+        return model_path
+    except Exception as e:
+        print(f"❌ Error downloading model: {e}")
+        # Fallback to local path if already uploaded
+        if os.path.exists(MODEL_FILENAME):
+            return MODEL_FILENAME
+        raise
+# Initialize model
+MODEL_PATH = get_model_path()
 llm = None
 def load_model():
     """Lazy-load the model only when needed."""
     global llm
     if llm is None:
+        print(f"⏳ Loading model... This may take 1-2 minutes on first run.")
         start_time = time.time()
+        # Optimized for free tier constraints
         llm = Llama(
             model_path=MODEL_PATH,
             n_ctx=2048,           # Context size (smaller = less memory)
+            n_threads=2,          # Use 2 CPU threads (free tier has 2)
             n_gpu_layers=0,       # CPU only on free tier
+            verbose=True          # Helpful for debugging
         )
         load_time = time.time() - start_time
         label="Generated Code",
         language="python"
     ),
+    title="💻 DeepSeek Coder 6.7B Instruct - o87Dev",
+    description="**CPU Deployment** - Running on Hugging Face Spaces free tier. ⚠️ **First request loads model (~1-2 min)**",
     examples=[
         ["Write a Python function to check if a number is prime"],
         ["Create a React component for a login form"],
     demo.launch(
         server_name="0.0.0.0",
         server_port=7860,
+        share=False
+    )