Update app.py
Browse files
app.py
CHANGED
|
@@ -210,22 +210,25 @@ HTML_CONTENT = '''
|
|
| 210 |
</body>
|
| 211 |
</html>
|
| 212 |
'''
|
| 213 |
-
|
| 214 |
def download_model():
|
| 215 |
model_name = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
|
| 216 |
-
model_file = "deepseek-coder-v2-lite-instruct.Q6_K.gguf" #
|
| 217 |
-
return hf_hub_download(model_name, filename=model_file)
|
| 218 |
|
| 219 |
-
def initialize_model(
|
| 220 |
-
|
| 221 |
-
model_path=
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 226 |
|
| 227 |
-
|
| 228 |
-
llm = initialize_model(model_path)
|
| 229 |
|
| 230 |
system_prompt = (
|
| 231 |
"You are a helpful AI coding assistant. Your mission is to help people with programming "
|
|
@@ -236,24 +239,35 @@ chat_history = [{"role": "system", "content": system_prompt}]
|
|
| 236 |
|
| 237 |
@app.route('/')
|
| 238 |
def index():
|
| 239 |
-
return
|
| 240 |
|
| 241 |
@app.route('/chat')
|
| 242 |
def chat():
|
| 243 |
global chat_history
|
| 244 |
user_message = request.args.get('message', '')
|
|
|
|
|
|
|
|
|
|
| 245 |
chat_history.append({"role": "user", "content": user_message})
|
| 246 |
|
| 247 |
-
full_prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in chat_history])
|
| 248 |
-
full_prompt += "\nAssistant:"
|
| 249 |
-
|
| 250 |
def generate():
|
| 251 |
ai_response = ""
|
| 252 |
-
|
| 253 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 254 |
if chunk:
|
| 255 |
ai_response += chunk
|
| 256 |
yield f"data: {chunk}\n\n"
|
|
|
|
| 257 |
chat_history.append({"role": "assistant", "content": ai_response.strip()})
|
| 258 |
if len(chat_history) > 10: # Limit history to last 10 messages
|
| 259 |
chat_history = chat_history[-10:]
|
|
|
|
| 210 |
</body>
|
| 211 |
</html>
|
| 212 |
'''
|
|
|
|
| 213 |
def download_model():
|
| 214 |
model_name = "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct"
|
| 215 |
+
model_file = "deepseek-coder-v2-lite-instruct.Q6_K.gguf" # or another quantized version
|
| 216 |
+
return hf_hub_download(repo_id=model_name, filename=model_file)
|
| 217 |
|
| 218 |
+
def initialize_model():
|
| 219 |
+
try:
|
| 220 |
+
model_path = download_model()
|
| 221 |
+
return Llama(
|
| 222 |
+
model_path=model_path,
|
| 223 |
+
n_ctx=4096,
|
| 224 |
+
n_threads=4,
|
| 225 |
+
n_gpu_layers=-1 # Use GPU if available
|
| 226 |
+
)
|
| 227 |
+
except Exception as e:
|
| 228 |
+
print(f"Error initializing model: {e}")
|
| 229 |
+
return None
|
| 230 |
|
| 231 |
+
llm = initialize_model()
|
|
|
|
| 232 |
|
| 233 |
system_prompt = (
|
| 234 |
"You are a helpful AI coding assistant. Your mission is to help people with programming "
|
|
|
|
| 239 |
|
| 240 |
@app.route('/')
|
| 241 |
def index():
|
| 242 |
+
return render_template('index.html') # You should move your HTML to a templates folder
|
| 243 |
|
| 244 |
@app.route('/chat')
|
| 245 |
def chat():
|
| 246 |
global chat_history
|
| 247 |
user_message = request.args.get('message', '')
|
| 248 |
+
if not llm:
|
| 249 |
+
return Response("data: Model not loaded\n\ndata: [DONE]\n\n", content_type='text/event-stream')
|
| 250 |
+
|
| 251 |
chat_history.append({"role": "user", "content": user_message})
|
| 252 |
|
|
|
|
|
|
|
|
|
|
| 253 |
def generate():
|
| 254 |
ai_response = ""
|
| 255 |
+
# Format messages for the model
|
| 256 |
+
messages = [{"role": msg["role"], "content": msg["content"]} for msg in chat_history]
|
| 257 |
+
|
| 258 |
+
stream = llm.create_chat_completion(
|
| 259 |
+
messages=messages,
|
| 260 |
+
max_tokens=1000,
|
| 261 |
+
stop=["User:"],
|
| 262 |
+
stream=True
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
for output in stream:
|
| 266 |
+
chunk = output['choices'][0]['delta'].get('content', '')
|
| 267 |
if chunk:
|
| 268 |
ai_response += chunk
|
| 269 |
yield f"data: {chunk}\n\n"
|
| 270 |
+
|
| 271 |
chat_history.append({"role": "assistant", "content": ai_response.strip()})
|
| 272 |
if len(chat_history) > 10: # Limit history to last 10 messages
|
| 273 |
chat_history = chat_history[-10:]
|