File size: 1,572 Bytes
dde7302
694f21e
 
 
 
 
 
 
 
 
dde7302
694f21e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86a4bd4
 
 
 
 
 
 
 
 
 
 
 
 
dde7302
694f21e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
from llama_cpp import Llama

llm = Llama.from_pretrained(
    repo_id="WithinUsAI/Opus4.7-GODs.Ghost.Codex-4B.GGuF",
    filename="*Q4_K_M*",
    n_ctx=8192,
    n_threads=4,
    verbose=False,
)

def chat(message, history):
    prompt = "<|im_start|>system\nYou are GODsGhost Codex, an expert coding assistant. You reason step-by-step and write clean, efficient code.<|im_end|>\n"
    for user_msg, assistant_msg in history:
        prompt += f"<|im_start|>user\n{user_msg}<|im_end|>\n"
        prompt += f"<|im_start|>assistant\n{assistant_msg}<|im_end|>\n"
    prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"

    output = llm(
        prompt,
        max_tokens=1024,
        stop=["<|im_end|>", "<|im_start|>"],
        temperature=0.7,
        top_p=0.95,
        repeat_penalty=1.1,
        echo=False,
    )
    return output["choices"][0]["text"].strip()

# Wrap the ChatInterface in gr.Blocks to safely apply the theme
with gr.Blocks(theme=gr.themes.Default(primary_hue="orange")) as demo:
    gr.ChatInterface(
        fn=chat,
        title="👻 Opus4.7 GOD's Ghost Codex — 4B",
        description="Compact code-specialized model by **WithIn Us AI**. Distilled reasoning, local inference, built for developers.",
        examples=[
            "Write a Python binary search function",
            "Debug this: def add(a,b) return a+b",
            "Explain the difference between async and threading in Python",
            "Write a REST API in FastAPI with CRUD operations",
        ],
    )

demo.launch()