Spaces:

Invescoz
/

Server-A

Sleeping

App Files Files Community

Invescoz commited on Sep 4, 2025

Commit

b379e0d

verified ·

1 Parent(s): 134d0c7

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -54

app.py CHANGED Viewed

@@ -1,54 +1,28 @@
-from flask import Flask, request, Response
-from huggingface_hub import InferenceClient
-import os
-from typing import Generator
-# Initialize Flask
-app = Flask(__name__)
-# Initialize Hugging Face client
-client = InferenceClient(model="Qwen/Qwen2.5-1.5B-Instruct", token=os.getenv("HF_TOKEN"))
-def generate_code_and_explanation(prompt: str) -> Generator[str, None, None]:
-    """
-    Generates code + explanation with streaming from HF model.
-    """
-    system_prompt = (
-        "You are a coding assistant like Grok. Given a user prompt, generate the requested code "
-        "and provide a clear explanation. Stream the output line by line. "
-        "Format code in ```python blocks and explanations in plain text with bullet points."
-    )
-    messages = [
-        {"role": "system", "content": system_prompt},
-        {"role": "user", "content": prompt}
-    ]
-    for chunk in client.chat_completion(
-        messages=messages,
-        max_tokens=3000,
-        temperature=0.7,
-        top_p=0.9,
-        stream=True
-    ):
-        content = chunk.choices[0].delta.content
-        if content:
-            yield content
-@app.route("/generate", methods=["POST"])
-def generate():
-    """
-    Flask endpoint to generate code from user prompt.
-    """
-    data = request.json
-    prompt = data.get("prompt", "")
-    def event_stream():
-        for chunk in generate_code_and_explanation(prompt):
-            yield chunk
-    return Response(event_stream(), mimetype="text/plain")
-if __name__ == "__main__":
-    # Run Flask (Hugging Face Spaces will expose this as API)
-    app.run(host="0.0.0.0", port=7860)

+import gradio as gr
+from llama_cpp import Llama
+from huggingface_hub import hf_hub_download
+# Download GGUF model (Q4_K_M for ~6GB size, fits 16GB RAM)
+model_path = hf_hub_download(
+    repo_id="bartowski/DeepSeek-Coder-V2-Lite-Instruct-GGUF",
+    filename="DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf"
+)
+# Load model on CPU (n_gpu_layers=-1 for full CPU, n_ctx=2048 to start small)
+llm = Llama(model_path, n_ctx=2048, n_threads=2, verbose=False)
+def chat_fn(message, history):
+    # Format prompt (DeepSeek-Coder template)
+    system_prompt = "You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer."
+    prompt = f"<｜begin▁of▁sentence｜>{system_prompt}\nUser: {message}\nAssistant:<｜end▁of▁sentence｜>Assistant:\n"
+    # Stream response
+    for chunk in llm(prompt, max_tokens=512, temperature=0.7, stream=True):
+        yield chunk['choices'][0]['text']
+# Gradio chat UI with streaming
+gr.ChatInterface(
+    fn=chat_fn,
+    title="DeepSeek Coder Assistant",
+    description="Send coding prompts for live streaming responses."
+).launch()