Update app.py
Browse files
app.py
CHANGED
|
@@ -1,54 +1,28 @@
|
|
| 1 |
-
|
| 2 |
-
from
|
| 3 |
-
import
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
system_prompt =
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
)
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
for
|
| 28 |
-
|
| 29 |
-
max_tokens=3000,
|
| 30 |
-
temperature=0.7,
|
| 31 |
-
top_p=0.9,
|
| 32 |
-
stream=True
|
| 33 |
-
):
|
| 34 |
-
content = chunk.choices[0].delta.content
|
| 35 |
-
if content:
|
| 36 |
-
yield content
|
| 37 |
-
|
| 38 |
-
@app.route("/generate", methods=["POST"])
|
| 39 |
-
def generate():
|
| 40 |
-
"""
|
| 41 |
-
Flask endpoint to generate code from user prompt.
|
| 42 |
-
"""
|
| 43 |
-
data = request.json
|
| 44 |
-
prompt = data.get("prompt", "")
|
| 45 |
-
|
| 46 |
-
def event_stream():
|
| 47 |
-
for chunk in generate_code_and_explanation(prompt):
|
| 48 |
-
yield chunk
|
| 49 |
-
|
| 50 |
-
return Response(event_stream(), mimetype="text/plain")
|
| 51 |
-
|
| 52 |
-
if __name__ == "__main__":
|
| 53 |
-
# Run Flask (Hugging Face Spaces will expose this as API)
|
| 54 |
-
app.run(host="0.0.0.0", port=7860)
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
from llama_cpp import Llama
|
| 3 |
+
from huggingface_hub import hf_hub_download
|
| 4 |
+
|
| 5 |
+
# Download GGUF model (Q4_K_M for ~6GB size, fits 16GB RAM)
|
| 6 |
+
model_path = hf_hub_download(
|
| 7 |
+
repo_id="bartowski/DeepSeek-Coder-V2-Lite-Instruct-GGUF",
|
| 8 |
+
filename="DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf"
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
# Load model on CPU (n_gpu_layers=-1 for full CPU, n_ctx=2048 to start small)
|
| 12 |
+
llm = Llama(model_path, n_ctx=2048, n_threads=2, verbose=False)
|
| 13 |
+
|
| 14 |
+
def chat_fn(message, history):
|
| 15 |
+
# Format prompt (DeepSeek-Coder template)
|
| 16 |
+
system_prompt = "You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer."
|
| 17 |
+
prompt = f"<|begin▁of▁sentence|>{system_prompt}\nUser: {message}\nAssistant:<|end▁of▁sentence|>Assistant:\n"
|
| 18 |
+
|
| 19 |
+
# Stream response
|
| 20 |
+
for chunk in llm(prompt, max_tokens=512, temperature=0.7, stream=True):
|
| 21 |
+
yield chunk['choices'][0]['text']
|
| 22 |
+
|
| 23 |
+
# Gradio chat UI with streaming
|
| 24 |
+
gr.ChatInterface(
|
| 25 |
+
fn=chat_fn,
|
| 26 |
+
title="DeepSeek Coder Assistant",
|
| 27 |
+
description="Send coding prompts for live streaming responses."
|
| 28 |
+
).launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|