| import gradio as gr
|
| import torch
|
| from inference import CodetteModelLoader, CodetteEngine
|
|
|
|
|
| ADAPTERS = {
|
| "Newton": "newton",
|
| "DaVinci": "davinci",
|
| "Empathy": "empathy",
|
| "Philosophy": "philosophy",
|
| "Quantum": "quantum",
|
| "RC-XI": "consciousness",
|
| "Multi-Perspective": "multi_perspective",
|
| "Systems": "systems_architecture"
|
| }
|
|
|
|
|
| def create_chat_app():
|
|
|
| loader = CodetteModelLoader(
|
| adapters={
|
| "newton": "adapters/newton/final",
|
| "davinci": "adapters/davinci/final",
|
| "empathy": "adapters/empathy/final",
|
| "philosophy": "adapters/philosophy/final",
|
| "quantum": "adapters/quantum/final",
|
| "consciousness": "adapters/consciousness/final",
|
| "multi_perspective": "adapters/multi_perspective/final",
|
| "systems_architecture": "adapters/systems_architecture/final",
|
| }
|
| )
|
|
|
| loader.load_adapters()
|
|
|
| registry = {
|
| name: {
|
| "generation": {
|
| "temperature": 0.7,
|
| "top_p": 0.9,
|
| "max_tokens": 512
|
| }
|
| }
|
| for name in loader.adapters
|
| }
|
|
|
| engine = CodetteEngine(loader, registry)
|
|
|
|
|
|
|
|
|
|
|
| def chat_stream(message, history, adapter, temp, top_p, max_tokens):
|
|
|
| messages = []
|
|
|
| for user, assistant in history:
|
| messages.append({"role": "user", "content": user})
|
| messages.append({"role": "assistant", "content": assistant})
|
|
|
| messages.append({"role": "user", "content": message})
|
|
|
| if adapter == "All (synthesized)":
|
|
|
| responses = engine.multi_perspective(
|
| messages,
|
| list(loader.adapters.keys())
|
| )
|
|
|
| reply = responses
|
|
|
| history.append((message, reply))
|
|
|
| yield history
|
|
|
| return
|
|
|
| adapter_key = ADAPTERS[adapter]
|
|
|
| loader.set_active_adapter(adapter_key)
|
|
|
| prompt = loader.format_messages(messages)
|
| inputs = loader.tokenize(prompt)
|
|
|
| streamer = engine.stream_generate(
|
| inputs,
|
| temperature=temp,
|
| top_p=top_p,
|
| max_tokens=max_tokens
|
| )
|
|
|
| response = ""
|
|
|
| for token in streamer:
|
|
|
| response += token
|
|
|
| yield history + [(message, response)]
|
|
|
| history.append((message, response))
|
|
|
|
|
|
|
|
|
|
|
| def compare(prompt, adapters):
|
|
|
| outputs = {}
|
|
|
| messages = [{"role": "user", "content": prompt}]
|
|
|
| for name in adapters:
|
|
|
| adapter_key = ADAPTERS[name]
|
|
|
| result = engine.generate(messages, adapter_key)
|
|
|
| outputs[name] = result
|
|
|
| return outputs
|
|
|
|
|
|
|
|
|
|
|
| def get_status():
|
|
|
| device = loader.model.device
|
|
|
| if torch.cuda.is_available():
|
|
|
| mem = torch.cuda.memory_allocated() / 1024**3
|
| total = torch.cuda.get_device_properties(0).total_memory / 1024**3
|
|
|
| gpu_info = f"{mem:.2f}GB / {total:.2f}GB"
|
|
|
| else:
|
|
|
| gpu_info = "CPU"
|
|
|
| return {
|
| "Base Model": loader.base_model_name,
|
| "Active Adapter": loader.active_adapter,
|
| "Loaded Adapters": list(loader.adapters.keys()),
|
| "Device": str(device),
|
| "GPU Memory": gpu_info,
|
| }
|
|
|
|
|
|
|
|
|
|
|
| with gr.Blocks(theme=gr.themes.Soft(), title="Codette") as app:
|
|
|
| gr.Markdown("# Codette Multi-Perspective AI")
|
|
|
| with gr.Tabs():
|
|
|
|
|
|
|
|
|
|
|
| with gr.Tab("Chat"):
|
|
|
| chatbot = gr.Chatbot(height=500)
|
|
|
| adapter = gr.Dropdown(
|
| choices=list(ADAPTERS.keys()) + ["All (synthesized)"],
|
| value="Multi-Perspective",
|
| label="Reasoning Perspective"
|
| )
|
|
|
| with gr.Row():
|
|
|
| temperature = gr.Slider(
|
| 0.0,
|
| 1.5,
|
| value=0.7,
|
| label="Temperature"
|
| )
|
|
|
| top_p = gr.Slider(
|
| 0.0,
|
| 1.0,
|
| value=0.9,
|
| label="Top P"
|
| )
|
|
|
| max_tokens = gr.Slider(
|
| 64,
|
| 2048,
|
| value=512,
|
| step=64,
|
| label="Max Tokens"
|
| )
|
|
|
| msg = gr.Textbox(
|
| placeholder="Ask Codette something...",
|
| lines=2
|
| )
|
|
|
| msg.submit(
|
| chat_stream,
|
| [msg, chatbot, adapter, temperature, top_p, max_tokens],
|
| chatbot
|
| )
|
|
|
|
|
|
|
|
|
|
|
| with gr.Tab("Compare"):
|
|
|
| prompt = gr.Textbox(label="Prompt")
|
|
|
| adapters = gr.CheckboxGroup(
|
| choices=list(ADAPTERS.keys()),
|
| label="Adapters to Compare",
|
| value=["Newton", "DaVinci"]
|
| )
|
|
|
| output = gr.JSON()
|
|
|
| run = gr.Button("Run Comparison")
|
|
|
| run.click(
|
| compare,
|
| [prompt, adapters],
|
| output
|
| )
|
|
|
|
|
|
|
|
|
|
|
| with gr.Tab("Status"):
|
|
|
| status_output = gr.JSON()
|
|
|
| refresh = gr.Button("Refresh")
|
|
|
| refresh.click(
|
| get_status,
|
| None,
|
| status_output
|
| )
|
|
|
| return app |