Anonymous0045 commited on
Commit
197748f
·
verified ·
1 Parent(s): 4c3020e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -37
app.py CHANGED
@@ -1,36 +1,84 @@
1
-
 
2
  import gradio as gr
3
  from llama_cpp import Llama
4
  from huggingface_hub import hf_hub_download
5
  import config
6
- import multiprocessing
7
 
8
- print("Downloading model...")
9
 
10
- model_path = hf_hub_download(
11
- repo_id=config.MODEL_REPO,
12
- filename=config.MODEL_FILE
13
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
- print("Loading model...")
 
 
16
 
17
- cpu_threads = multiprocessing.cpu_count()
18
 
19
  llm = Llama(
20
  model_path=model_path,
21
  n_ctx=config.CTX_SIZE,
22
- n_threads=cpu_threads,
23
  n_batch=512,
24
  use_mmap=True,
25
  use_mlock=False,
26
  verbose=False
27
  )
28
 
 
 
 
 
 
 
 
29
  SYSTEM_PROMPT = """You are DeepSeek Coder, an expert programming assistant.
30
- You write clean, correct, efficient code.
31
- Always return only code unless explanation is requested.
 
 
 
 
32
  """
33
 
 
34
  def format_prompt(message, history):
35
 
36
  prompt = SYSTEM_PROMPT + "\n\n"
@@ -43,53 +91,95 @@ def format_prompt(message, history):
43
  return prompt
44
 
45
 
46
- def generate(message, history):
 
 
 
 
47
 
48
  prompt = format_prompt(message, history)
49
 
50
  output = ""
51
 
52
- for token in llm(
53
- prompt,
54
- max_tokens=config.MAX_TOKENS,
55
- temperature=config.TEMPERATURE,
56
- stream=True
57
- ):
58
- text = token["choices"][0]["text"]
59
- output += text
60
- yield output
 
 
 
 
 
 
 
 
 
 
61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
 
63
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
64
 
65
- gr.Markdown("# DeepSeek Coder 1.3B (Production GGUF)")
 
66
 
67
  chatbot = gr.Chatbot(height=500)
68
 
69
  msg = gr.Textbox(
70
- placeholder="Ask coding question...",
71
  container=False
72
  )
73
 
74
- clear = gr.Button("Clear")
75
-
76
- def user(user_message, history):
77
- return "", history + [[user_message, ""]]
78
 
79
- def bot(history):
80
 
81
- user_message = history[-1][0]
 
 
 
 
 
 
 
 
 
82
 
83
- for response in generate(user_message, history[:-1]):
84
- history[-1][1] = response
85
- yield history
86
 
87
- msg.submit(user, [msg, chatbot], [msg, chatbot], queue=True).then(
88
- bot, chatbot, chatbot
 
 
 
89
  )
90
 
91
- clear.click(lambda: [], None, chatbot, queue=False)
92
 
 
 
 
93
 
94
  demo.queue()
95
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
1
+ import os
2
+ import multiprocessing
3
  import gradio as gr
4
  from llama_cpp import Llama
5
  from huggingface_hub import hf_hub_download
6
  import config
 
7
 
 
8
 
9
+ # ============================
10
+ # Environment & Token Setup
11
+ # ============================
12
+
13
+ HF_TOKEN = os.environ.get("HF_TOKEN")
14
+
15
+ if HF_TOKEN is None:
16
+ print("Warning: HF_TOKEN not found. Download may fail for gated repos.")
17
+
18
+
19
+ # ============================
20
+ # Model Download (cached automatically by HF)
21
+ # ============================
22
+
23
+ print("Downloading model from Hugging Face Hub...")
24
+
25
+ try:
26
+ model_path = hf_hub_download(
27
+ repo_id=config.MODEL_REPO,
28
+ filename=config.MODEL_FILE,
29
+ token=HF_TOKEN,
30
+ cache_dir="/tmp/hf_cache"
31
+ )
32
+
33
+ print(f"Model downloaded successfully: {model_path}")
34
+
35
+ except Exception as e:
36
+ print("Model download failed:", str(e))
37
+ raise e
38
+
39
+
40
+ # ============================
41
+ # CPU Optimization
42
+ # ============================
43
+
44
+ CPU_THREADS = multiprocessing.cpu_count()
45
+
46
+ print(f"CPU Threads available: {CPU_THREADS}")
47
+
48
 
49
+ # ============================
50
+ # Load llama.cpp model
51
+ # ============================
52
 
53
+ print("Loading model into memory...")
54
 
55
  llm = Llama(
56
  model_path=model_path,
57
  n_ctx=config.CTX_SIZE,
58
+ n_threads=CPU_THREADS,
59
  n_batch=512,
60
  use_mmap=True,
61
  use_mlock=False,
62
  verbose=False
63
  )
64
 
65
+ print("Model loaded successfully.")
66
+
67
+
68
+ # ============================
69
+ # Prompt System
70
+ # ============================
71
+
72
  SYSTEM_PROMPT = """You are DeepSeek Coder, an expert programming assistant.
73
+
74
+ Rules:
75
+ - Write clean, correct, production-ready code
76
+ - Be concise
77
+ - Only explain if asked
78
+ - Prefer efficient solutions
79
  """
80
 
81
+
82
  def format_prompt(message, history):
83
 
84
  prompt = SYSTEM_PROMPT + "\n\n"
 
91
  return prompt
92
 
93
 
94
+ # ============================
95
+ # Streaming Generation
96
+ # ============================
97
+
98
+ def generate_stream(message, history):
99
 
100
  prompt = format_prompt(message, history)
101
 
102
  output = ""
103
 
104
+ try:
105
+ for token in llm(
106
+ prompt,
107
+ max_tokens=config.MAX_TOKENS,
108
+ temperature=config.TEMPERATURE,
109
+ top_p=0.95,
110
+ stream=True
111
+ ):
112
+ text = token["choices"][0]["text"]
113
+ output += text
114
+ yield output
115
+
116
+ except Exception as e:
117
+ yield f"Error during generation: {str(e)}"
118
+
119
+
120
+ # ============================
121
+ # Gradio UI Logic
122
+ # ============================
123
 
124
+ def user(user_message, history):
125
+ return "", history + [[user_message, ""]]
126
+
127
+
128
+ def bot(history):
129
+
130
+ user_message = history[-1][0]
131
+
132
+ for response in generate_stream(user_message, history[:-1]):
133
+ history[-1][1] = response
134
+ yield history
135
+
136
+
137
+ # ============================
138
+ # Gradio Interface
139
+ # ============================
140
 
141
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
142
 
143
+ gr.Markdown("# DeepSeek Coder 1.3B (GGUF Production)")
144
+ gr.Markdown("Fast, efficient coding assistant running on llama.cpp")
145
 
146
  chatbot = gr.Chatbot(height=500)
147
 
148
  msg = gr.Textbox(
149
+ placeholder="Ask a coding question...",
150
  container=False
151
  )
152
 
153
+ clear = gr.Button("Clear Chat")
 
 
 
154
 
 
155
 
156
+ msg.submit(
157
+ user,
158
+ [msg, chatbot],
159
+ [msg, chatbot],
160
+ queue=True
161
+ ).then(
162
+ bot,
163
+ chatbot,
164
+ chatbot
165
+ )
166
 
 
 
 
167
 
168
+ clear.click(
169
+ lambda: [],
170
+ None,
171
+ chatbot,
172
+ queue=False
173
  )
174
 
 
175
 
176
+ # ============================
177
+ # Launch Server
178
+ # ============================
179
 
180
  demo.queue()
181
+
182
+ demo.launch(
183
+ server_name="0.0.0.0",
184
+ server_port=7860
185
+ )