import gradio as gr from llama_cpp import Llama llm = Llama.from_pretrained( repo_id="WithinUsAI/Opus4.7-GODs.Ghost.Codex-4B.GGuF", filename="*Q4_K_M*", n_ctx=8192, n_threads=4, verbose=False, ) def chat(message, history): prompt = "<|im_start|>system\nYou are GODsGhost Codex, an expert coding assistant. You reason step-by-step and write clean, efficient code.<|im_end|>\n" for user_msg, assistant_msg in history: prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n" prompt += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n" prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" output = llm( prompt, max_tokens=1024, stop=["<|im_end|>", "<|im_start|>"], temperature=0.7, top_p=0.95, repeat_penalty=1.1, echo=False, ) return output["choices"][0]["text"].strip() # Wrap the ChatInterface in gr.Blocks to safely apply the theme with gr.Blocks(theme=gr.themes.Default(primary_hue="orange")) as demo: gr.ChatInterface( fn=chat, title="👻 Opus4.7 GOD's Ghost Codex — 4B", description="Compact code-specialized model by **WithIn Us AI**. Distilled reasoning, local inference, built for developers.", examples=[ "Write a Python binary search function", "Debug this: def add(a,b) return a+b", "Explain the difference between async and threading in Python", "Write a REST API in FastAPI with CRUD operations", ], ) demo.launch()