| import gradio as gr |
| from llama_cpp import Llama |
|
|
| |
| |
| llm = Llama.from_pretrained( |
| repo_id="WithinUsAI/IBM-Grok4-Ultra.Fast.Coder-1B-GGUF", |
| filename="*Q4_K_M*", |
| n_ctx=8192, |
| n_threads=4, |
| verbose=False, |
| ) |
|
|
| def chat(message, history): |
| |
| prompt = "<|im_start|>system\nYou are an ultra-fast, expert coding assistant. You write clean, efficient, and well-documented code.<|im_end|>\n" |
| |
| for user_msg, assistant_msg in history: |
| prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n" |
| prompt += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n" |
| |
| prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" |
|
|
| output = llm( |
| prompt, |
| max_tokens=1024, |
| stop=["<|im_end|>", "<|im_start|>"], |
| temperature=0.6, |
| top_p=0.95, |
| repeat_penalty=1.1, |
| echo=False, |
| ) |
| return output["choices"][0]["text"].strip() |
|
|
| |
| with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo: |
| gr.ChatInterface( |
| fn=chat, |
| title="💻 IBM-Grok4 Ultra Fast Coder — 1B", |
| description="Lightning-fast 1B coding assistant by **WithIn Us AI**. Built for speed, efficiency, and accurate code generation.", |
| examples=[ |
| "Write a Python web scraper using BeautifulSoup.", |
| "Create a simple React component for a login form.", |
| "Explain the difference between TCP and UDP.", |
| "Debug this code: def add(a,b) return a+b", |
| ], |
| ) |
|
|
| demo.launch() |
|
|