File size: 1,572 Bytes
dde7302 694f21e dde7302 694f21e 86a4bd4 dde7302 694f21e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 | import gradio as gr
from llama_cpp import Llama
llm = Llama.from_pretrained(
repo_id="WithinUsAI/Opus4.7-GODs.Ghost.Codex-4B.GGuF",
filename="*Q4_K_M*",
n_ctx=8192,
n_threads=4,
verbose=False,
)
def chat(message, history):
prompt = "<|im_start|>system\nYou are GODsGhost Codex, an expert coding assistant. You reason step-by-step and write clean, efficient code.<|im_end|>\n"
for user_msg, assistant_msg in history:
prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
prompt += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
output = llm(
prompt,
max_tokens=1024,
stop=["<|im_end|>", "<|im_start|>"],
temperature=0.7,
top_p=0.95,
repeat_penalty=1.1,
echo=False,
)
return output["choices"][0]["text"].strip()
# Wrap the ChatInterface in gr.Blocks to safely apply the theme
with gr.Blocks(theme=gr.themes.Default(primary_hue="orange")) as demo:
gr.ChatInterface(
fn=chat,
title="👻 Opus4.7 GOD's Ghost Codex — 4B",
description="Compact code-specialized model by **WithIn Us AI**. Distilled reasoning, local inference, built for developers.",
examples=[
"Write a Python binary search function",
"Debug this: def add(a,b) return a+b",
"Explain the difference between async and threading in Python",
"Write a REST API in FastAPI with CRUD operations",
],
)
demo.launch()
|