import os import time import gradio as gr from huggingface_hub import InferenceClient APP_TITLE = "YellowLabsStudio Research Console" APP_SUBTITLE = ( "Independent home lab AI research. Efficient models, local GPU experimentation, and practical agent systems." ) DEFAULT_SYSTEM_PROMPT = ( "You are a research assistant in an independent home lab. " "Be practical, technical, and reproducible. " "When you make claims that could be uncertain, say you cannot confirm. " "Prefer clear steps, short sections, and actionable guidance." ) MODEL_PRESETS = [ "moonshotai/Kimi-K2-Instruct-0905", "meta-llama/Llama-3.1-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.3", ] PROMPT_PRESETS = { "Home Lab Agent Workflow": "Design a simple tool using agent workflow that runs on local GPU. Include steps and tradeoffs.", "RAG Sanity Check": "Given a small set of notes, design a minimal RAG pipeline and an evaluation approach.", "Efficiency Test": "Explain how to reduce inference latency and memory use for a small language model on consumer GPU.", "Production Mindset": "Convert this experiment into a production ready service. Include observability, risks, and rollout steps.", } def _client() -> InferenceClient: token = os.environ.get("HF_TOKEN") if not token: raise RuntimeError( "Missing HF_TOKEN. Add it in Space Settings: Secrets, then restart the Space." ) return InferenceClient(token=token) def build_messages(system_prompt: str, chat_history, user_message: str): messages = [] if system_prompt.strip(): messages.append({"role": "system", "content": system_prompt.strip()}) for user_text, assistant_text in chat_history: if user_text: messages.append({"role": "user", "content": user_text}) if assistant_text: messages.append({"role": "assistant", "content": assistant_text}) messages.append({"role": "user", "content": user_message}) return messages def chat( model: str, system_prompt: str, temperature: float, top_p: float, max_tokens: int, user_message: str, chat_history, ): if not user_message or not user_message.strip(): return chat_history, "" started = time.time() meta = { "model": model, "temperature": temperature, "top_p": top_p, "max_tokens": max_tokens, } try: client = _client() messages = build_messages(system_prompt, chat_history, user_message) output = client.chat.completions.create( model=model, messages=messages, temperature=temperature, top_p=top_p, max_tokens=max_tokens, ) answer = output.choices[0].message.content or "" duration = round(time.time() - started, 3) meta_line = ( f"\n\n[run meta]\nmodel: {meta['model']}\n" f"temperature: {meta['temperature']}\ntop_p: {meta['top_p']}\n" f"max_tokens: {meta['max_tokens']}\nseconds: {duration}\n" ) chat_history = chat_history + [(user_message, answer + meta_line)] return chat_history, "" except Exception as e: err = f"Run failed. {type(e).__name__}: {e}" chat_history = chat_history + [(user_message, err)] return chat_history, "" with gr.Blocks(title=APP_TITLE) as demo: gr.Markdown(f"# {APP_TITLE}\n\n{APP_SUBTITLE}") with gr.Row(): with gr.Column(scale=1): gr.Markdown("## Controls") model = gr.Dropdown( choices=MODEL_PRESETS, value=MODEL_PRESETS[0], label="Model", ) system_prompt = gr.Textbox( value=DEFAULT_SYSTEM_PROMPT, label="System Prompt", lines=6, ) temperature = gr.Slider( minimum=0.0, maximum=1.5, value=0.4, step=0.05, label="Temperature", ) top_p = gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top P", ) max_tokens = gr.Slider( minimum=64, maximum=2048, value=512, step=64, label="Max Tokens", ) gr.Markdown("## Prompt Presets") preset = gr.Dropdown( choices=list(PROMPT_PRESETS.keys()), value=list(PROMPT_PRESETS.keys())[0], label="Preset", ) apply_preset = gr.Button("Load preset into input") with gr.Column(scale=2): gr.Markdown("## Lab Chat") chatbot = gr.Chatbot(height=520) user_message = gr.Textbox( label="Input", placeholder="Ask a research question, test an idea, or run an experiment prompt.", lines=3, ) with gr.Row(): send = gr.Button("Run") clear = gr.Button("Clear") def _load_preset(preset_name: str): return PROMPT_PRESETS.get(preset_name, "") apply_preset.click(_load_preset, inputs=preset, outputs=user_message) send.click( chat, inputs=[model, system_prompt, temperature, top_p, max_tokens, user_message, chatbot], outputs=[chatbot, user_message], ) clear.click(lambda: [], outputs=chatbot) demo.launch()