Spaces:

Anonymous0045
/

Coder

Sleeping

App Files Files Community

Anonymous0045 commited on Feb 23

Commit

2e24877

verified ·

1 Parent(s): 826e872

Create app.py

Browse files

Files changed (1) hide show

app.py +52 -107

app.py CHANGED Viewed

@@ -7,49 +7,30 @@ import config
 # ============================
-# Environment & Token Setup
 # ============================
 HF_TOKEN = os.environ.get("HF_TOKEN")
-if HF_TOKEN is None:
-    print("Warning: HF_TOKEN not found. Download may fail for gated repos.")
-# ============================
-# Model Download (cached automatically by HF)
-# ============================
 print("Downloading model from Hugging Face Hub...")
-try:
-    model_path = hf_hub_download(
-        repo_id=config.MODEL_REPO,
-        filename=config.MODEL_FILE,
-        token=HF_TOKEN,
-        cache_dir="/tmp/hf_cache"
-    )
-    print(f"Model downloaded successfully: {model_path}")
-except Exception as e:
-    print("Model download failed:", str(e))
-    raise e
 # ============================
-# CPU Optimization
 # ============================
 CPU_THREADS = multiprocessing.cpu_count()
-print(f"CPU Threads available: {CPU_THREADS}")
-# ============================
-# Load llama.cpp model
-# ============================
 print("Loading model into memory...")
 llm = Llama(
@@ -66,118 +47,82 @@ print("Model loaded successfully.")
 # ============================
-# Prompt System
 # ============================
 SYSTEM_PROMPT = """You are DeepSeek Coder, an expert programming assistant.
-Rules:
-- Write clean, correct, production-ready code
-- Be concise
-- Only explain if asked
-- Prefer efficient solutions
 """
-def format_prompt(message, history):
     prompt = SYSTEM_PROMPT + "\n\n"
-    for user, assistant in history:
-        prompt += f"User: {user}\nAssistant: {assistant}\n"
-    prompt += f"User: {message}\nAssistant:"
     return prompt
 # ============================
-# Streaming Generation
-# ============================
-def generate_stream(message, history):
-    prompt = format_prompt(message, history)
-    output = ""
-    try:
-        for token in llm(
-            prompt,
-            max_tokens=config.MAX_TOKENS,
-            temperature=config.TEMPERATURE,
-            top_p=0.95,
-            stream=True
-        ):
-            text = token["choices"][0]["text"]
-            output += text
-            yield output
-    except Exception as e:
-        yield f"Error during generation: {str(e)}"
-# ============================
-# Gradio UI Logic
 # ============================
-def user(user_message, history):
-    return "", history + [[user_message, ""]]
-def bot(history):
-    user_message = history[-1][0]
-    for response in generate_stream(user_message, history[:-1]):
-        history[-1][1] = response
-        yield history
 # ============================
-# Gradio Interface
 # ============================
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# DeepSeek Coder 1.3B (GGUF Production)")
-    gr.Markdown("Fast, efficient coding assistant running on llama.cpp")
-    chatbot = gr.Chatbot(height=500)
-    msg = gr.Textbox(
-        placeholder="Ask a coding question...",
-        container=False
-    )
-    clear = gr.Button("Clear Chat")
-    msg.submit(
-        user,
-        [msg, chatbot],
-        [msg, chatbot],
-        queue=True
-    ).then(
-        bot,
-        chatbot,
-        chatbot
-    )
-    clear.click(
-        lambda: [],
-        None,
-        chatbot,
-        queue=False
-    )
 # ============================
-# Launch Server
 # ============================
-demo.queue()
 demo.launch(
     server_name="0.0.0.0",

 # ============================
+# Download Model
 # ============================
 HF_TOKEN = os.environ.get("HF_TOKEN")
 print("Downloading model from Hugging Face Hub...")
+model_path = hf_hub_download(
+    repo_id=config.MODEL_REPO,
+    filename=config.MODEL_FILE,
+    token=HF_TOKEN,
+    cache_dir="/tmp/hf_cache"
+)
+print("Model downloaded successfully:", model_path)
 # ============================
+# Load Model
 # ============================
 CPU_THREADS = multiprocessing.cpu_count()
+print("CPU Threads available:", CPU_THREADS)
 print("Loading model into memory...")
 llm = Llama(
 # ============================
+# Prompt Formatting
 # ============================
 SYSTEM_PROMPT = """You are DeepSeek Coder, an expert programming assistant.
+You write clean, efficient, production-ready code.
+Only explain if user asks.
 """
+def build_prompt(messages):
     prompt = SYSTEM_PROMPT + "\n\n"
+    for msg in messages:
+        if msg["role"] == "user":
+            prompt += f"User: {msg['content']}\n"
+        elif msg["role"] == "assistant":
+            prompt += f"Assistant: {msg['content']}\n"
+    prompt += "Assistant:"
     return prompt
 # ============================
+# Streaming Generator
 # ============================
+def generate_response(message, history):
+    messages = history + [{"role": "user", "content": message}]
+    prompt = build_prompt(messages)
+    output = ""
+    for token in llm(
+        prompt,
+        max_tokens=config.MAX_TOKENS,
+        temperature=config.TEMPERATURE,
+        top_p=0.95,
+        stream=True
+    ):
+        text = token["choices"][0]["text"]
+        output += text
+        yield output
 # ============================
+# Gradio Chat Interface
 # ============================
+def chat(message, history):
+    history = history or []
+    assistant_response = ""
+    for partial in generate_response(message, history):
+        assistant_response = partial
+        yield history + [
+            {"role": "user", "content": message},
+            {"role": "assistant", "content": assistant_response},
+        ]
 # ============================
+# Launch UI
 # ============================
+demo = gr.ChatInterface(
+    fn=chat,
+    title="DeepSeek Coder 1.3B",
+    description="Production GGUF model running on llama.cpp",
+    type="messages"
+)
 demo.launch(
     server_name="0.0.0.0",