Spaces:

Anonymous0045
/

Coder

Sleeping

App Files Files Community

Anonymous0045 commited on Feb 23

Commit

197748f

verified ·

1 Parent(s): 4c3020e

Create app.py

Browse files

Files changed (1) hide show

app.py +127 -37

app.py CHANGED Viewed

@@ -1,36 +1,84 @@
 import gradio as gr
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import config
-import multiprocessing
-print("Downloading model...")
-model_path = hf_hub_download(
-    repo_id=config.MODEL_REPO,
-    filename=config.MODEL_FILE
-)
-print("Loading model...")
-cpu_threads = multiprocessing.cpu_count()
 llm = Llama(
     model_path=model_path,
     n_ctx=config.CTX_SIZE,
-    n_threads=cpu_threads,
     n_batch=512,
     use_mmap=True,
     use_mlock=False,
     verbose=False
 )
 SYSTEM_PROMPT = """You are DeepSeek Coder, an expert programming assistant.
-You write clean, correct, efficient code.
-Always return only code unless explanation is requested.
 """
 def format_prompt(message, history):
     prompt = SYSTEM_PROMPT + "\n\n"
@@ -43,53 +91,95 @@ def format_prompt(message, history):
     return prompt
-def generate(message, history):
     prompt = format_prompt(message, history)
     output = ""
-    for token in llm(
-        prompt,
-        max_tokens=config.MAX_TOKENS,
-        temperature=config.TEMPERATURE,
-        stream=True
-    ):
-        text = token["choices"][0]["text"]
-        output += text
-        yield output
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# DeepSeek Coder 1.3B (Production GGUF)")
     chatbot = gr.Chatbot(height=500)
     msg = gr.Textbox(
-        placeholder="Ask coding question...",
         container=False
     )
-    clear = gr.Button("Clear")
-    def user(user_message, history):
-        return "", history + [[user_message, ""]]
-    def bot(history):
-        user_message = history[-1][0]
-        for response in generate(user_message, history[:-1]):
-            history[-1][1] = response
-            yield history
-    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=True).then(
-        bot, chatbot, chatbot
     )
-    clear.click(lambda: [], None, chatbot, queue=False)
 demo.queue()
-demo.launch(server_name="0.0.0.0", server_port=7860)

+import os
+import multiprocessing
 import gradio as gr
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 import config
+# ============================
+# Environment & Token Setup
+# ============================
+HF_TOKEN = os.environ.get("HF_TOKEN")
+if HF_TOKEN is None:
+    print("Warning: HF_TOKEN not found. Download may fail for gated repos.")
+# ============================
+# Model Download (cached automatically by HF)
+# ============================
+print("Downloading model from Hugging Face Hub...")
+try:
+    model_path = hf_hub_download(
+        repo_id=config.MODEL_REPO,
+        filename=config.MODEL_FILE,
+        token=HF_TOKEN,
+        cache_dir="/tmp/hf_cache"
+    )
+    print(f"Model downloaded successfully: {model_path}")
+except Exception as e:
+    print("Model download failed:", str(e))
+    raise e
+# ============================
+# CPU Optimization
+# ============================
+CPU_THREADS = multiprocessing.cpu_count()
+print(f"CPU Threads available: {CPU_THREADS}")
+# ============================
+# Load llama.cpp model
+# ============================
+print("Loading model into memory...")
 llm = Llama(
     model_path=model_path,
     n_ctx=config.CTX_SIZE,
+    n_threads=CPU_THREADS,
     n_batch=512,
     use_mmap=True,
     use_mlock=False,
     verbose=False
 )
+print("Model loaded successfully.")
+# ============================
+# Prompt System
+# ============================
 SYSTEM_PROMPT = """You are DeepSeek Coder, an expert programming assistant.
+Rules:
+- Write clean, correct, production-ready code
+- Be concise
+- Only explain if asked
+- Prefer efficient solutions
 """
 def format_prompt(message, history):
     prompt = SYSTEM_PROMPT + "\n\n"
     return prompt
+# ============================
+# Streaming Generation
+# ============================
+def generate_stream(message, history):
     prompt = format_prompt(message, history)
     output = ""
+    try:
+        for token in llm(
+            prompt,
+            max_tokens=config.MAX_TOKENS,
+            temperature=config.TEMPERATURE,
+            top_p=0.95,
+            stream=True
+        ):
+            text = token["choices"][0]["text"]
+            output += text
+            yield output
+    except Exception as e:
+        yield f"Error during generation: {str(e)}"
+# ============================
+# Gradio UI Logic
+# ============================
+def user(user_message, history):
+    return "", history + [[user_message, ""]]
+def bot(history):
+    user_message = history[-1][0]
+    for response in generate_stream(user_message, history[:-1]):
+        history[-1][1] = response
+        yield history
+# ============================
+# Gradio Interface
+# ============================
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# DeepSeek Coder 1.3B (GGUF Production)")
+    gr.Markdown("Fast, efficient coding assistant running on llama.cpp")
     chatbot = gr.Chatbot(height=500)
     msg = gr.Textbox(
+        placeholder="Ask a coding question...",
         container=False
     )
+    clear = gr.Button("Clear Chat")
+    msg.submit(
+        user,
+        [msg, chatbot],
+        [msg, chatbot],
+        queue=True
+    ).then(
+        bot,
+        chatbot,
+        chatbot
+    )
+    clear.click(
+        lambda: [],
+        None,
+        chatbot,
+        queue=False
     )
+# ============================
+# Launch Server
+# ============================
 demo.queue()
+demo.launch(
+    server_name="0.0.0.0",
+    server_port=7860
+)