truegleai commited on
Commit
c7773a3
·
verified ·
1 Parent(s): 7799a1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -12
app.py CHANGED
@@ -4,27 +4,48 @@ from llama_cpp import Llama
4
  import time
5
  import os
6
 
7
- # Configuration
8
- MODEL_NAME = "DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf"
9
- MODEL_PATH = MODEL_NAME # Since we placed it in the same directory
10
 
11
- # Initialize model (will be loaded on first use)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  llm = None
13
 
14
  def load_model():
15
  """Lazy-load the model only when needed."""
16
  global llm
17
  if llm is None:
18
- print(f"⏳ Loading model {MODEL_NAME}... This may take 1-2 minutes on first run.")
19
  start_time = time.time()
20
 
21
- # CPU-optimized settings for free tier
22
  llm = Llama(
23
  model_path=MODEL_PATH,
24
  n_ctx=2048, # Context size (smaller = less memory)
25
- n_threads=2, # Use 2 CPU threads
26
  n_gpu_layers=0, # CPU only on free tier
27
- verbose=False
28
  )
29
 
30
  load_time = time.time() - start_time
@@ -82,8 +103,8 @@ demo = gr.Interface(
82
  label="Generated Code",
83
  language="python"
84
  ),
85
- title="💻 DeepSeek Coder V2 Lite (16B) - o87Dev",
86
- description="**CPU Deployment** - Largest viable model on Hugging Face Spaces free tier. ⚠️ **First request loads model (~1-2 min)**",
87
  examples=[
88
  ["Write a Python function to check if a number is prime"],
89
  ["Create a React component for a login form"],
@@ -96,5 +117,5 @@ if __name__ == "__main__":
96
  demo.launch(
97
  server_name="0.0.0.0",
98
  server_port=7860,
99
- share=False # Set to True if you want a public link
100
- )
 
4
  import time
5
  import os
6
 
7
+ # Configuration - Using a 6.7B model that fits free tier
8
+ MODEL_REPO_ID = "TheBloke/DeepSeek-Coder-6.7B-Instruct-GGUF"
9
+ MODEL_FILENAME = "deepseek-coder-6.7b-instruct.Q4_K_M.gguf"
10
 
11
+ # Download model if not already present
12
+ def get_model_path():
13
+ """Download model from Hugging Face Hub or use cached version."""
14
+ try:
15
+ model_path = hf_hub_download(
16
+ repo_id=MODEL_REPO_ID,
17
+ filename=MODEL_FILENAME,
18
+ local_dir="./models", # Save to models folder
19
+ local_dir_use_symlinks=False, # Avoid symlink issues
20
+ resume_download=True # Resume if interrupted
21
+ )
22
+ print(f"✅ Model downloaded to: {model_path}")
23
+ return model_path
24
+ except Exception as e:
25
+ print(f"❌ Error downloading model: {e}")
26
+ # Fallback to local path if already uploaded
27
+ if os.path.exists(MODEL_FILENAME):
28
+ return MODEL_FILENAME
29
+ raise
30
+
31
+ # Initialize model
32
+ MODEL_PATH = get_model_path()
33
  llm = None
34
 
35
  def load_model():
36
  """Lazy-load the model only when needed."""
37
  global llm
38
  if llm is None:
39
+ print(f"⏳ Loading model... This may take 1-2 minutes on first run.")
40
  start_time = time.time()
41
 
42
+ # Optimized for free tier constraints
43
  llm = Llama(
44
  model_path=MODEL_PATH,
45
  n_ctx=2048, # Context size (smaller = less memory)
46
+ n_threads=2, # Use 2 CPU threads (free tier has 2)
47
  n_gpu_layers=0, # CPU only on free tier
48
+ verbose=True # Helpful for debugging
49
  )
50
 
51
  load_time = time.time() - start_time
 
103
  label="Generated Code",
104
  language="python"
105
  ),
106
+ title="💻 DeepSeek Coder 6.7B Instruct - o87Dev",
107
+ description="**CPU Deployment** - Running on Hugging Face Spaces free tier. ⚠️ **First request loads model (~1-2 min)**",
108
  examples=[
109
  ["Write a Python function to check if a number is prime"],
110
  ["Create a React component for a login form"],
 
117
  demo.launch(
118
  server_name="0.0.0.0",
119
  server_port=7860,
120
+ share=False
121
+ )