| import gradio as gr |
| import os |
| import time |
| import random |
| from huggingface_hub import InferenceClient, repo_exists |
| from concurrent.futures import ThreadPoolExecutor, as_completed |
|
|
| |
| |
| |
|
|
| DEFAULT_MODELS = [ |
| "meta-llama/Llama-3.1-8B-Instruct", |
| "Qwen/Qwen2.5-7B-Instruct", |
| "Qwen/Qwen2.5-Coder-32B-Instruct", |
| "meta-llama/Llama-3.2-3B-Instruct", |
| ] |
|
|
| DEFAULT_TEMP = 0.7 |
|
|
| SYSTEM_MESSAGE = ( |
| "You are a helpful assistant participating in a multi-agent review board. " |
| "Provide thoughtful, well-reasoned responses. When reviewing other agents' " |
| "responses in later rounds, carefully consider their reasoning and update " |
| "your answer if you find compelling arguments." |
| ) |
|
|
|
|
| |
| |
| |
| |
|
|
|
|
| def generate_answer( |
| token: str, |
| model: str, |
| messages: list[dict], |
| temperature: float, |
| ) -> str: |
| """Call the HF Inference API for a single agent turn, with retries.""" |
| client = InferenceClient(token=token, model=model) |
|
|
| max_tries = 4 |
| base_sleep = 1.5 |
| last_exc: Exception | None = None |
|
|
| for attempt in range(1, max_tries + 1): |
| try: |
| response = client.chat_completion( |
| messages=messages, |
| max_tokens=2048, |
| temperature=temperature, |
| top_p=0.9, |
| ) |
| return response.choices[0].message.content |
|
|
| except Exception as e: |
| last_exc = e |
| msg = repr(e).lower() |
|
|
| |
| transient = any( |
| k in msg |
| for k in [ |
| "timeout", |
| "timed out", |
| "503", |
| "502", |
| "504", |
| "429", |
| "rate limited", |
| "too many requests", |
| "loading", |
| "overloaded", |
| "temporarily unavailable", |
| "service unavailable", |
| "gateway", |
| ] |
| ) |
|
|
| |
| if (not transient) or (attempt == max_tries): |
| raise |
|
|
| |
| sleep_s = base_sleep * (2 ** (attempt - 1)) + random.uniform(0, 0.4) |
| time.sleep(sleep_s) |
|
|
| |
| raise last_exc if last_exc else RuntimeError("Unknown inference failure") |
| |
| def get_hf_token() -> str | None: |
| """ |
| Resolve a Hugging Face token from multiple possible sources. |
| Priority: |
| 1. HF_TOKEN (Space secret) |
| 2. HUGGINGFACEHUB_API_TOKEN (older standard) |
| 3. HF_OAUTH_ACCESS_TOKEN (OAuth injection) |
| """ |
| return ( |
| os.environ.get("HF_TOKEN") |
| or os.environ.get("HUGGINGFACEHUB_API_TOKEN") |
| or os.environ.get("HF_OAUTH_ACCESS_TOKEN") |
| ) |
|
|
| def construct_review_message(other_responses: list[tuple[str, str]]) -> dict: |
| """Build a peer-review prompt containing the other agents' latest answers.""" |
| if not other_responses: |
| return { |
| "role": "user", |
| "content": "Please double-check your answer and provide your final response.", |
| } |
|
|
| parts = ["These are the responses to the problem from other agents:\n"] |
| for label, resp in other_responses: |
| parts.append(f"{label} response:\n```\n{resp}\n```\n") |
| parts.append( |
| "Using the reasoning from other agents as additional advice, update your answer. " |
| "Examine your solution and that of the other agents step by step. Provide your final, updated response." |
| ) |
| return {"role": "user", "content": "\n".join(parts)} |
|
|
|
|
| def handle_inference_error(error: Exception, model_name: str) -> str: |
| """Return a user-friendly error string for common Inference API failures.""" |
| raw = repr(error) |
| low = raw.lower() |
| etype = type(error).__name__.lower() |
|
|
| if "timeout" in etype or "timeout" in low: |
| return ( |
| f"Request to '{model_name}' timed out. The model may be loading " |
| "(cold start) or overloaded. Try again in a moment." |
| ) |
| if "401" in raw or "403" in raw: |
| return ( |
| f"Access denied for '{model_name}'. Visit the model page on " |
| f"https://huggingface.co/{model_name} to accept its license/terms." |
| ) |
| if "404" in raw: |
| return f"Model '{model_name}' was not found on Hugging Face Hub." |
| if "422" in raw: |
| return ( |
| f"Model '{model_name}' does not support chat completion " |
| "via the Inference API." |
| ) |
| if "429" in raw: |
| return "Rate limited. Please wait a moment and try again." |
| if "402" in raw or "payment" in low or "credit" in low: |
| return ( |
| "Out of Inference API credits. " |
| "Check huggingface.co/settings/billing." |
| ) |
| return f"Error with '{model_name}': {raw[:300]}" |
|
|
| def supports_chat_completion(model_id: str, token: str) -> tuple[bool, str]: |
| try: |
| client = InferenceClient(token=token, model=model_id) |
| client.chat_completion( |
| messages=[{"role": "user", "content": "ping"}], |
| max_tokens=1, |
| temperature=0.0, |
| top_p=1.0, |
| ) |
| return True, "" |
| except Exception as e: |
| msg = handle_inference_error(e, model_id).strip().lower() |
|
|
| if "does not support chat completion" in msg: |
| return False, handle_inference_error(e, model_id) |
|
|
| if "access denied" in msg: |
| return False, handle_inference_error(e, model_id) |
|
|
| |
| return True, "" |
|
|
| def validate_model(model_id: str, token: str | None = None) -> tuple[bool, str]: |
| """Return *(ok, error_message)* after checking the model exists on the Hub.""" |
| if not model_id or not model_id.strip(): |
| return False, "Model ID cannot be empty." |
| model_id = model_id.strip() |
| if model_id in DEFAULT_MODELS: |
| return True, "" |
| try: |
| if not repo_exists(model_id, token=token): |
| return False, f"Model '{model_id}' not found on Hugging Face Hub." |
| return True, "" |
| except Exception as exc: |
| return False, f"Could not verify '{model_id}': {exc}" |
|
|
|
|
| def run_review_board( |
| prompt: str, |
| agent_configs: list[dict], |
| num_rounds: int, |
| token: str, |
| ): |
| """Generator yielding *(status_line, results_or_None)* tuples. |
| |
| *results* is ``None`` during processing and a dict mapping agent labels to |
| their final-round response text on the very last yield. |
| """ |
| num_agents = len(agent_configs) |
|
|
| |
| agent_contexts: list[list[dict]] = [ |
| [ |
| {"role": "system", "content": SYSTEM_MESSAGE}, |
| {"role": "user", "content": prompt}, |
| ] |
| for _ in range(num_agents) |
| ] |
|
|
| for round_num in range(num_rounds): |
| tag = f"Round {round_num + 1}/{num_rounds}" |
| yield f"**{tag}** -- Submitting requests...", None |
|
|
| |
| |
| if round_num > 0: |
| for i in range(num_agents): |
| others: list[tuple[str, str]] = [] |
|
|
| for j in range(num_agents): |
| if j == i: |
| continue |
|
|
| label = f"Agent {j + 1} (id={agent_configs[j]['id']})" |
| for msg in reversed(agent_contexts[j]): |
| if msg["role"] == "assistant": |
| others.append((label, msg["content"])) |
| break |
|
|
| agent_contexts[i].append(construct_review_message(others)) |
|
|
| |
| futures: dict = {} |
| max_workers = min(num_agents, 3) |
| with ThreadPoolExecutor(max_workers=max_workers) as pool: |
| for i, cfg in enumerate(agent_configs): |
| fut = pool.submit( |
| generate_answer, |
| token, |
| cfg["model"], |
| list(agent_contexts[i]), |
| cfg["temp"], |
| ) |
| futures[fut] = i |
|
|
| for fut in as_completed(futures): |
| idx = futures[fut] |
| model = agent_configs[idx]["model"] |
| try: |
| text = fut.result() |
| agent_contexts[idx].append( |
| {"role": "assistant", "content": text} |
| ) |
| yield ( |
| f"**{tag}** -- Agent {idx + 1} (`{model}`) responded.", |
| None, |
| ) |
| except Exception as exc: |
| err = handle_inference_error(exc, model) |
| agent_contexts[idx].append( |
| {"role": "assistant", "content": f"[Error: {err}]"} |
| ) |
| yield f"**{tag}** -- Agent {idx + 1} error: {err}", None |
|
|
| |
| results: dict[str, str] = {} |
| for i, cfg in enumerate(agent_configs): |
| last = "[No response generated]" |
| for msg in reversed(agent_contexts[i]): |
| if msg["role"] == "assistant": |
| last = msg["content"] |
| break |
| results[f"Agent {i + 1} (id={cfg['id']}) -- {cfg['model']}"] = last |
|
|
| yield ( |
| "**Complete!** Select an agent tab below to view their final response.", |
| results, |
| ) |
|
|
|
|
| |
| |
| |
|
|
| CUSTOM_CSS = """ |
| .agent-header-wrap { |
| padding: 0 !important; |
| min-height: 0 !important; |
| background: rgba(78, 70, 229, 1); |
| } |
| .agent-header { |
| display: block; |
| text-align: center; |
| cursor: help; |
| } |
| |
| .sidebar .group { |
| margin-bottom: 8px !important; |
| } |
| |
| *::-webkit-scrollbar { |
| width: 8px; |
| height: 8px; |
| } |
| *::-webkit-scrollbar-track { |
| background: transparent; |
| } |
| *::-webkit-scrollbar-thumb { |
| background: rgba(139, 92, 246, 0.45); |
| border-radius: 4px; |
| } |
| *::-webkit-scrollbar-thumb:hover { |
| background: rgba(139, 92, 246, 0.7); |
| } |
| |
| /* Themed scrollbar -- Firefox */ |
| * { |
| scrollbar-width: thin; |
| scrollbar-color: rgba(139, 92, 246, 0.45) transparent; |
| } |
| """ |
|
|
| with gr.Blocks( |
| title="Multi-Agent Review Board", |
| theme=gr.themes.Soft(), |
| css=CUSTOM_CSS, |
| ) as demo: |
|
|
| |
| agents_state = gr.State([1, 2]) |
| next_id_state = gr.State(3) |
| results_state = gr.State({}) |
|
|
| |
| with gr.Sidebar(): |
| gr.LoginButton() |
| gr.Markdown("---") |
|
|
| gr.Markdown("### Settings") |
| num_rounds = gr.Slider( |
| minimum=1, |
| maximum=10, |
| value=2, |
| step=1, |
| label="Rounds", |
| info="Round 1 = independent answers. Round 2+ = peer review.", |
| interactive=True |
| ) |
|
|
| gr.Markdown("---") |
| gr.Markdown("### Agents") |
|
|
| |
| @gr.render(inputs=agents_state) |
| def render_agents(agent_ids): |
| dropdowns: list = [] |
| sliders: list = [] |
|
|
| for idx, aid in enumerate(agent_ids): |
| default_model = DEFAULT_MODELS[idx % len(DEFAULT_MODELS)] |
|
|
| with gr.Group(): |
| with gr.Row(): |
| gr.HTML( |
| f'<span class="agent-header" title="Pick a model or type any HF model ID">' |
| f'<strong>Agent {idx + 1}</strong></span>', |
| elem_classes=["agent-header-wrap"], |
| ) |
| if len(agent_ids) > 2: |
| del_btn = gr.Button( |
| "✕", |
| variant="stop", |
| size="sm", |
| min_width=36, |
| scale=0, |
| key=f"del-{aid}", |
| ) |
|
|
| |
| def _delete(current_ids, _target=aid): |
| return [x for x in current_ids if x != _target] |
|
|
| del_btn.click(_delete, agents_state, agents_state) |
|
|
| dd = gr.Dropdown( |
| choices=DEFAULT_MODELS, |
| value=default_model, |
| allow_custom_value=True, |
| label=None, |
| show_label=False, |
| key=f"model-{aid}", |
| interactive=True |
| ) |
| temp = gr.Slider( |
| minimum=0.1, |
| maximum=2.0, |
| value=DEFAULT_TEMP, |
| step=0.1, |
| label="Temperature", |
| key=f"temp-{aid}", |
| interactive=True |
| ) |
|
|
| dropdowns.append(dd) |
| sliders.append(temp) |
|
|
| |
| def on_run(data): |
| hf_token = get_hf_token() |
|
|
| if not hf_token: |
| raise gr.Error( |
| "No Hugging Face token found.\n\n" |
| "Add an HF_TOKEN secret in the Space settings " |
| "or enable OAuth with model access." |
| ) |
|
|
| prompt = data[prompt_tb] |
| rounds = data[num_rounds] |
|
|
| if not prompt or not prompt.strip(): |
| raise gr.Error("Please enter a prompt.") |
|
|
| models = [data[dd] for dd in dropdowns] |
| temps = [data[sl] for sl in sliders] |
| |
| agent_ids_local = list(agent_ids) |
| |
| configs: list[dict] = [] |
| for i, (aid, model, t) in enumerate(zip(agent_ids_local, models, temps)): |
| if not model or not model.strip(): |
| raise gr.Error(f"Agent {i + 1}: please select or enter a model.") |
| model = model.strip() |
| |
| if model not in DEFAULT_MODELS: |
| ok, err = validate_model(model, hf_token) |
| if not ok: |
| raise gr.Error(f"Agent {i + 1}: {err}") |
| |
| ok, err = supports_chat_completion(model, hf_token) |
| if not ok: |
| raise gr.Error(f"Agent {i + 1}: {err}") |
| else: |
| |
| ok, err = supports_chat_completion(model, hf_token) |
| if not ok: |
| raise gr.Error(f"Agent {i + 1}: {err}") |
| |
| configs.append({"id": aid, "model": model, "temp": float(t)}) |
|
|
| |
| log: list[str] = [] |
| for status_line, results in run_review_board( |
| prompt.strip(), configs, int(rounds), hf_token |
| ): |
| log.append(status_line) |
| yield ( |
| "\n\n".join(log), |
| results if results is not None else {}, |
| ) |
|
|
| run_btn.click( |
| on_run, |
| inputs={prompt_tb, num_rounds} | set(dropdowns) | set(sliders), |
| outputs=[status_md, results_state], |
| ) |
|
|
| |
| add_btn = gr.Button("+ Add Agent", variant="secondary", size="sm") |
|
|
| def _add_agent(ids, nid): |
| return ids + [nid], nid + 1 |
|
|
| add_btn.click( |
| _add_agent, |
| [agents_state, next_id_state], |
| [agents_state, next_id_state], |
| ) |
|
|
| |
| gr.Markdown("# Multi-Agent Review Board") |
| gr.Markdown( |
| "Configure your agents in the sidebar, enter a prompt, and let " |
| "multiple AI models debate and refine their answers across rounds." |
| ) |
|
|
| prompt_tb = gr.Textbox( |
| label="Prompt", |
| placeholder="Enter your question or prompt here...", |
| lines=4, |
| ) |
| run_btn = gr.Button("Run Review Board", variant="primary", size="lg") |
| status_md = gr.Markdown("") |
|
|
| |
| @gr.render(inputs=results_state) |
| def render_results(results): |
| if not results: |
| return |
| gr.Markdown("---") |
| gr.Markdown("### Final Responses") |
| with gr.Tabs(): |
| for name, response in results.items(): |
| with gr.TabItem(name): |
| gr.Markdown(response) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch() |
|
|