Spaces:

Sachin5112
/

Fast-AI

Sleeping

App Files Files Community

Sachin5112 commited on 24 days ago

Commit

91ce3bd

verified ·

1 Parent(s): 40a8619

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -36

app.py CHANGED Viewed

@@ -3,62 +3,109 @@ import gradio as gr
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
-# 1. Download the 1.5B model (Snappy for CPU)
 model_path = hf_hub_download(
-    repo_id="Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF",
-    filename="qwen2.5-coder-1.5b-instruct-q8_0.gguf"
 )
-# 2. Initialize Model
 llm = Llama(
     model_path=model_path,
-    n_ctx=2048,
-    n_threads=os.cpu_count() or 2,
     n_gpu_layers=0,
     verbose=False
 )
-custom_css = """
-.gradio-container { background-color: #0b0f19 !important; color: #ffffff !important; }
-#title-text { text-align: center; color: #00d4ff; padding: 10px; }
-footer { display: none !important; }
-"""
 def generate_response(message, history):
-    prompt = "<|im_start|>system\nYou are Zenith, an expert code explainer. Be concise and clear.<|im_end|>\n"
     for msg in history:
-        role = msg.get("role") if isinstance(msg, dict) else "user"
-        content = msg.get("content") if isinstance(msg, dict) else msg[0]
-        prompt += f"<|im_start|>{role}\n{content}<|im_end|>\n"
     prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
-    stream = llm(prompt, max_tokens=1024, temperature=0.1, stream=True)
     partial = ""
     for token in stream:
-        piece = token["choices"][0]["text"]
-        partial += piece
         yield partial
-# 3. Fixed UI for Gradio 6.0
-with gr.Blocks() as demo:
-    # Changed gr.Div to gr.Group
-    with gr.Group():
-        gr.Markdown("# ⚡ ZENITH CODER", elem_id="title-text")
-        gr.Markdown("Fast 1.5B CPU Assistant", elem_id="title-text")
-    gr.ChatInterface(
-        fn=generate_response,
-        type="messages",
-    )
 if __name__ == "__main__":
-    # Moved theme and css here to satisfy Gradio 6.0 requirements
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        theme=gr.themes.Soft(primary_hue="blue"),
-        css=custom_css
-    )

 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
+# ----------------------------
+# Model Download
+# ----------------------------
 model_path = hf_hub_download(
+    repo_id="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
+    filename="qwen2.5-coder-7b-instruct-q8_0.gguf"
 )
+# ----------------------------
+# Load Model (CPU)
+# ----------------------------
 llm = Llama(
     model_path=model_path,
+    n_ctx=4096,
+    n_threads=os.cpu_count(),
+    n_batch=512,
     n_gpu_layers=0,
     verbose=False
 )
+llm("Hello", max_tokens=1)  # warmup
+# ----------------------------
+# Chat Function
+# ----------------------------
 def generate_response(message, history):
+    prompt = "<|im_start|>system\nYou are an expert coding assistant.<|im_end|>\n"
     for msg in history:
+        if isinstance(msg, dict):
+            role = msg.get("role")
+            content = msg.get("content")
+            if role == "user":
+                prompt += f"<|im_start|>user\n{content}<|im_end|>\n"
+            elif role == "assistant":
+                prompt += f"<|im_start|>assistant\n{content}<|im_end|>\n"
     prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+    stream = llm(
+        prompt,
+        max_tokens=1024,
+        temperature=0.1,
+        top_p=0.9,
+        repeat_penalty=1.1,
+        stream=True
+    )
     partial = ""
     for token in stream:
+        partial += token["choices"][0]["text"]
         yield partial
+# ----------------------------
+# UI Styling (Hugging Face inspired)
+# ----------------------------
+custom_css = """
+#title {
+    text-align: center;
+    font-size: 28px;
+    font-weight: bold;
+}
+#subtitle {
+    text-align: center;
+    color: #888;
+    margin-bottom: 20px;
+}
+"""
+# ----------------------------
+# UI
+# ----------------------------
+with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
+    # Header
+    gr.HTML("""
+    <div style="text-align:center;">
+        <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg"
+             width="80"/>
+        <h1 id="title">Qwen2.5 Coder (CPU Edition)</h1>
+        <p id="subtitle">Local AI Coding Assistant powered by GGUF + llama.cpp</p>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### ⚡ Features\n- Runs fully on CPU\n- Streaming responses\n- Lightweight GGUF model")
+        with gr.Column(scale=3):
+            chatbot = gr.ChatInterface(
+                fn=generate_response,
+                chatbot=gr.Chatbot(height=500),
+                textbox=gr.Textbox(placeholder="Ask me to write code, debug, or explain...", scale=7),
+                title="",
+                description=""
+            )
+# ----------------------------
+# Launch
+# ----------------------------
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)