Invescoz commited on
Commit
b379e0d
·
verified ·
1 Parent(s): 134d0c7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -54
app.py CHANGED
@@ -1,54 +1,28 @@
1
- from flask import Flask, request, Response
2
- from huggingface_hub import InferenceClient
3
- import os
4
- from typing import Generator
5
-
6
- # Initialize Flask
7
- app = Flask(__name__)
8
-
9
- # Initialize Hugging Face client
10
- client = InferenceClient(model="Qwen/Qwen2.5-1.5B-Instruct", token=os.getenv("HF_TOKEN"))
11
-
12
- def generate_code_and_explanation(prompt: str) -> Generator[str, None, None]:
13
- """
14
- Generates code + explanation with streaming from HF model.
15
- """
16
- system_prompt = (
17
- "You are a coding assistant like Grok. Given a user prompt, generate the requested code "
18
- "and provide a clear explanation. Stream the output line by line. "
19
- "Format code in ```python blocks and explanations in plain text with bullet points."
20
- )
21
-
22
- messages = [
23
- {"role": "system", "content": system_prompt},
24
- {"role": "user", "content": prompt}
25
- ]
26
-
27
- for chunk in client.chat_completion(
28
- messages=messages,
29
- max_tokens=3000,
30
- temperature=0.7,
31
- top_p=0.9,
32
- stream=True
33
- ):
34
- content = chunk.choices[0].delta.content
35
- if content:
36
- yield content
37
-
38
- @app.route("/generate", methods=["POST"])
39
- def generate():
40
- """
41
- Flask endpoint to generate code from user prompt.
42
- """
43
- data = request.json
44
- prompt = data.get("prompt", "")
45
-
46
- def event_stream():
47
- for chunk in generate_code_and_explanation(prompt):
48
- yield chunk
49
-
50
- return Response(event_stream(), mimetype="text/plain")
51
-
52
- if __name__ == "__main__":
53
- # Run Flask (Hugging Face Spaces will expose this as API)
54
- app.run(host="0.0.0.0", port=7860)
 
1
+ import gradio as gr
2
+ from llama_cpp import Llama
3
+ from huggingface_hub import hf_hub_download
4
+
5
+ # Download GGUF model (Q4_K_M for ~6GB size, fits 16GB RAM)
6
+ model_path = hf_hub_download(
7
+ repo_id="bartowski/DeepSeek-Coder-V2-Lite-Instruct-GGUF",
8
+ filename="DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf"
9
+ )
10
+
11
+ # Load model on CPU (n_gpu_layers=-1 for full CPU, n_ctx=2048 to start small)
12
+ llm = Llama(model_path, n_ctx=2048, n_threads=2, verbose=False)
13
+
14
+ def chat_fn(message, history):
15
+ # Format prompt (DeepSeek-Coder template)
16
+ system_prompt = "You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer."
17
+ prompt = f"<|begin▁of▁sentence|>{system_prompt}\nUser: {message}\nAssistant:<|end▁of▁sentence|>Assistant:\n"
18
+
19
+ # Stream response
20
+ for chunk in llm(prompt, max_tokens=512, temperature=0.7, stream=True):
21
+ yield chunk['choices'][0]['text']
22
+
23
+ # Gradio chat UI with streaming
24
+ gr.ChatInterface(
25
+ fn=chat_fn,
26
+ title="DeepSeek Coder Assistant",
27
+ description="Send coding prompts for live streaming responses."
28
+ ).launch()