Fast-AI-Intelligent

Runtime error

App Files Files Community

Sachin5112 commited on 16 days ago

Commit

f3ec931

verified ·

1 Parent(s): 91ce3bd

Update app.py

Browse files

Files changed (1) hide show

app.py +61 -48

app.py CHANGED Viewed

@@ -4,16 +4,13 @@ from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
 # ----------------------------
-# Model Download
 # ----------------------------
 model_path = hf_hub_download(
     repo_id="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
     filename="qwen2.5-coder-7b-instruct-q8_0.gguf"
 )
-# ----------------------------
-# Load Model (CPU)
-# ----------------------------
 llm = Llama(
     model_path=model_path,
     n_ctx=4096,
@@ -23,25 +20,19 @@ llm = Llama(
     verbose=False
 )
-llm("Hello", max_tokens=1)  # warmup
 # ----------------------------
-# Chat Function
 # ----------------------------
 def generate_response(message, history):
     prompt = "<|im_start|>system\nYou are an expert coding assistant.<|im_end|>\n"
-    for msg in history:
-        if isinstance(msg, dict):
-            role = msg.get("role")
-            content = msg.get("content")
-            if role == "user":
-                prompt += f"<|im_start|>user\n{content}<|im_end|>\n"
-            elif role == "assistant":
-                prompt += f"<|im_start|>assistant\n{content}<|im_end|>\n"
     prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
@@ -54,58 +45,80 @@ def generate_response(message, history):
         stream=True
     )
-    partial = ""
     for token in stream:
-        partial += token["choices"][0]["text"]
-        yield partial
 # ----------------------------
-# UI Styling (Hugging Face inspired)
 # ----------------------------
-custom_css = """
-#title {
-    text-align: center;
-    font-size: 28px;
-    font-weight: bold;
 }
-#subtitle {
-    text-align: center;
-    color: #888;
-    margin-bottom: 20px;
 }
 """
 # ----------------------------
 # UI
 # ----------------------------
-with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
-    # Header
     gr.HTML("""
-    <div style="text-align:center;">
         <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg"
-             width="80"/>
-        <h1 id="title">Qwen2.5 Coder (CPU Edition)</h1>
-        <p id="subtitle">Local AI Coding Assistant powered by GGUF + llama.cpp</p>
     </div>
     """)
-    with gr.Row():
-        with gr.Column(scale=1):
-            gr.Markdown("### ⚡ Features\n- Runs fully on CPU\n- Streaming responses\n- Lightweight GGUF model")
-        with gr.Column(scale=3):
-            chatbot = gr.ChatInterface(
-                fn=generate_response,
-                chatbot=gr.Chatbot(height=500),
-                textbox=gr.Textbox(placeholder="Ask me to write code, debug, or explain...", scale=7),
-                title="",
-                description=""
-            )
-# ----------------------------
-# Launch
-# ----------------------------
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 from huggingface_hub import hf_hub_download
 # ----------------------------
+# Model
 # ----------------------------
 model_path = hf_hub_download(
     repo_id="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
     filename="qwen2.5-coder-7b-instruct-q8_0.gguf"
 )
 llm = Llama(
     model_path=model_path,
     n_ctx=4096,
     verbose=False
 )
+llm("warmup", max_tokens=1)
 # ----------------------------
+# Chat Logic
 # ----------------------------
 def generate_response(message, history):
     prompt = "<|im_start|>system\nYou are an expert coding assistant.<|im_end|>\n"
+    for user, assistant in history:
+        prompt += f"<|im_start|>user\n{user}<|im_end|>\n"
+        prompt += f"<|im_start|>assistant\n{assistant}<|im_end|>\n"
     prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
         stream=True
     )
+    out = ""
     for token in stream:
+        out += token["choices"][0]["text"]
+        yield out
 # ----------------------------
+# MOBILE APP CSS
 # ----------------------------
+css = """
+/* Center mobile frame */
+.gradio-container {
+    max-width: 430px !important;
+    margin: auto !important;
+    height: 100vh;
+    border-radius: 20px;
+    box-shadow: 0 10px 40px rgba(0,0,0,0.2);
+    overflow: hidden;
+    background: #0b0f19;
+}
+/* Chat background */
+.chatbot {
+    background: #0b0f19 !important;
+}
+/* User bubble */
+.user {
+    background: #2b6fff !important;
+    color: white !important;
+    border-radius: 18px 18px 4px 18px !important;
+    padding: 10px !important;
 }
+/* Bot bubble */
+.assistant {
+    background: #1c1f2a !important;
+    color: white !important;
+    border-radius: 18px 18px 18px 4px !important;
+    padding: 10px !important;
 }
+/* Input bar */
+textarea {
+    border-radius: 20px !important;
+    padding: 12px !important;
+}
+/* Hide footer */
+footer {display:none !important;}
 """
 # ----------------------------
 # UI
 # ----------------------------
+with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:
     gr.HTML("""
+    <div style="text-align:center; padding:10px;">
         <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg"
+             width="60"/>
+        <h2 style="color:white; margin:5px;">Qwen Coder</h2>
+        <p style="color:gray; font-size:12px;">Mobile AI Coding Assistant</p>
     </div>
     """)
+    gr.ChatInterface(
+        fn=generate_response,
+        fill_height=True,
+        textbox=gr.Textbox(placeholder="Message...", container=False),
+        chatbot=gr.Chatbot(height=600, type="tuples"),
+    )
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)