Spaces:
Sleeping
Sleeping
| """Settings form for API keys and per-role model assignments. | |
| The frontend uses the unified ModelCatalog (model_settings/catalog.yaml) so | |
| users can mix-and-match providers per role. Models whose required env vars | |
| aren't set are shown but greyed out based on the keys actually entered in the | |
| form (not the process environment). | |
| """ | |
| import os | |
| import streamlit as st | |
| from scider.default.models import CAP_COMPLETION, ModelCatalog, ModelEntry | |
| # Model roles grouped by category. experiment_coding is handled separately | |
| # because it's tied to CODING_AGENT_VERSION. | |
| MODEL_ROLE_GROUPS = { | |
| "Ideation": { | |
| "ideation": "Idea generation", | |
| "paper_search": "Paper search", | |
| "metric_search": "Metric search", | |
| }, | |
| "Data Analysis": { | |
| "data": "Data analysis", | |
| }, | |
| "Experiment": { | |
| "experiment": "Experiment agent", | |
| }, | |
| "Critic": { | |
| "critic": "Critic evaluation", | |
| }, | |
| "Paper Writing": { | |
| "writing": "Writing agent", | |
| }, | |
| "System": { | |
| "history": "History compression", | |
| }, | |
| } | |
| # Mapping: env var name -> (settings key, env var name) | |
| _KEY_ENV_MAP = { | |
| "GEMINI_API_KEY": "gemini_api_key", | |
| "OPENAI_API_KEY": "openai_api_key", | |
| "ANTHROPIC_API_KEY": "anthropic_api_key", | |
| } | |
| def _initial_key_value(settings_key: str, current: dict) -> str: | |
| """Resolve initial key value from saved settings only (never from env).""" | |
| return current.get(settings_key, "") | |
| def _entry_available_with_keys(entry: ModelEntry, provided_keys: dict[str, str]) -> bool: | |
| """Check if a model entry is available given the keys the user actually entered.""" | |
| for env_var in entry.requires_env: | |
| settings_key = _KEY_ENV_MAP.get(env_var, "") | |
| if not provided_keys.get(settings_key, ""): | |
| return False | |
| return True | |
| def _make_format_func(provided_keys: dict[str, str]): | |
| """Build a format_func that checks availability against provided keys, not os.environ.""" | |
| def _format(model_id: str) -> str: | |
| entry = ModelCatalog.get(model_id) | |
| if entry is None: | |
| return f"{model_id} (unknown)" | |
| if not _entry_available_with_keys(entry, provided_keys): | |
| missing_env = [ | |
| k for k in entry.requires_env if not provided_keys.get(_KEY_ENV_MAP.get(k, ""), "") | |
| ] | |
| missing_labels = ", ".join(missing_env) | |
| return f"{entry.id} \u26a0 missing {missing_labels}" | |
| return f"{entry.id} ({entry.provider})" | |
| return _format | |
| def _completion_model_ids() -> list[str]: | |
| return [e.id for e in ModelCatalog.by_capability(CAP_COMPLETION)] | |
| def _claude_completion_ids() -> list[str]: | |
| return [e.id for e in ModelCatalog.by_capability(CAP_COMPLETION) if e.provider == "anthropic"] | |
| def _select_model( | |
| label: str, | |
| options: list[str], | |
| saved: str | None, | |
| fallback: str | None, | |
| key: str, | |
| format_func=None, | |
| ) -> str: | |
| default = saved if saved in options else (fallback if fallback in options else options[0]) | |
| idx = options.index(default) | |
| kwargs = {} | |
| if format_func is not None: | |
| kwargs["format_func"] = format_func | |
| return st.selectbox( | |
| label, | |
| options, | |
| index=idx, | |
| key=key, | |
| **kwargs, | |
| ) | |
| def render_settings_form(current_settings: dict | None = None) -> dict | None: | |
| """Render settings form. Returns new settings dict on submit, None otherwise.""" | |
| st.markdown("### Settings") | |
| st.caption( | |
| "Your settings are stored locally on this machine only and are never uploaded to the cloud." | |
| ) | |
| # Make sure the catalog is loaded once before we render anything. | |
| ModelCatalog.load() | |
| current = current_settings or {} | |
| current_roles = current.get("model_roles", {}) | |
| completion_ids = _completion_model_ids() | |
| claude_ids = _claude_completion_ids() or completion_ids | |
| # --- API Keys (outside form so we can read their values for rendering) --- | |
| # Streamlit forms capture widget values only on submit, so we use | |
| # session_state keys to read the *current* typed values for the | |
| # format_func, falling back to initial defaults. | |
| # Compute initial defaults: saved setting > env var > empty | |
| init_gemini = _initial_key_value("gemini_api_key", current) | |
| init_openai = _initial_key_value("openai_api_key", current) | |
| init_anthropic = _initial_key_value("anthropic_api_key", current) | |
| # Build a snapshot of provided keys for the format_func. | |
| # On first render we use initial values; after user types, session_state | |
| # updates on the next rerun (Streamlit forms only update on submit, but | |
| # since model dropdowns are inside the same form, the availability display | |
| # reflects the *initial* keys — which is correct: if you just opened | |
| # settings, the keys you already saved / have in env are "provided".) | |
| provided_keys = { | |
| "gemini_api_key": st.session_state.get("_sk_gemini", init_gemini), | |
| "openai_api_key": st.session_state.get("_sk_openai", init_openai), | |
| "anthropic_api_key": st.session_state.get("_sk_anthropic", init_anthropic), | |
| } | |
| format_func = _make_format_func(provided_keys) | |
| with st.form("settings_form"): | |
| # --- API Keys --- | |
| st.markdown("#### API Keys") | |
| st.caption( | |
| "Configure any combination of providers. Models whose key is missing " | |
| "will appear greyed-out in the dropdowns below." | |
| ) | |
| gemini_api_key = st.text_input( | |
| "Gemini API Key", | |
| type="password", | |
| placeholder="Enter your Gemini API key", | |
| value=init_gemini, | |
| key="_sk_gemini", | |
| ) | |
| openai_api_key = st.text_input( | |
| "OpenAI API Key", | |
| type="password", | |
| placeholder="Enter your OpenAI API key", | |
| value=init_openai, | |
| key="_sk_openai", | |
| ) | |
| anthropic_api_key = st.text_input( | |
| "Anthropic (Claude) API Key", | |
| type="password", | |
| placeholder="Optional — needed for Claude coding agent", | |
| value=init_anthropic, | |
| key="_sk_anthropic", | |
| ) | |
| st.divider() | |
| s2_api_key = st.text_input( | |
| "Semantic Scholar API Key", | |
| type="password", | |
| placeholder="Optional — enables Semantic Scholar paper search", | |
| value=_initial_key_value("s2_api_key", current), | |
| ) | |
| st.caption( | |
| "Optional. If provided, paper search will also query Semantic Scholar " | |
| "in addition to arXiv. Get a key at https://www.semanticscholar.org/product/api" | |
| ) | |
| # --- HuggingFace Dataset Download --- | |
| st.divider() | |
| st.markdown("#### HuggingFace Dataset Download") | |
| from scider.core import constant as _c | |
| if _c.HF_DATASET_DOWNLOAD_ENABLED: | |
| st.success(f"Enabled — max dataset size: {_c.HF_DATASET_MAX_SIZE_MB} MB") | |
| else: | |
| st.info("Disabled. Set `HF_DATASET_DOWNLOAD_ENABLED=true` in `.env` to enable.") | |
| st.caption( | |
| "When enabled, you can enter a HuggingFace dataset repo name " | |
| "(e.g. `google/fleurs`) instead of uploading a local file." | |
| ) | |
| # --- Memory --- | |
| st.divider() | |
| st.markdown("#### Memory") | |
| mem_read = os.getenv("SCIDER_MEMORY_READ", "true").lower() in {"1", "true", "yes", "y"} | |
| mem_write = os.getenv("SCIDER_MEMORY_WRITE", "true").lower() in {"1", "true", "yes", "y"} | |
| if mem_read and mem_write: | |
| st.success("Reading and writing enabled") | |
| elif mem_read: | |
| st.info("Reading enabled, writing disabled") | |
| elif mem_write: | |
| st.info("Writing enabled, reading disabled") | |
| else: | |
| st.warning("Memory disabled") | |
| st.caption("Configure via `SCIDER_MEMORY_READ` / `SCIDER_MEMORY_WRITE` in `.env`.") | |
| # --- Coding Agent --- | |
| st.divider() | |
| st.markdown("#### Coding Agent") | |
| coding_version = os.getenv("CODING_AGENT_VERSION", "claude_sdk") | |
| if coding_version in ("v3", "claude_sdk"): | |
| version_label = "Claude Agent SDK" | |
| elif coding_version in ("v2", "openhands"): | |
| version_label = "OpenHands" | |
| elif coding_version == "native": | |
| version_label = "Native (SciDER)" | |
| else: | |
| version_label = coding_version | |
| st.text_input( | |
| "Coding Agent Backend", | |
| value=version_label, | |
| disabled=True, | |
| key="coding_agent_version_display", | |
| ) | |
| st.caption( | |
| "To change the coding agent backend, set the `CODING_AGENT_VERSION` " | |
| "environment variable (`claude_sdk`, `openhands`, or `native`) in `.env`." | |
| ) | |
| if coding_version in ("v3", "claude_sdk"): | |
| coding_options = claude_ids | |
| coding_fallback = "claude-haiku-4-5" | |
| else: | |
| coding_options = completion_ids | |
| coding_fallback = "gemini-2.5-pro" | |
| coding_model = _select_model( | |
| "Code generation model", | |
| coding_options, | |
| saved=current_roles.get("experiment_coding"), | |
| fallback=coding_fallback, | |
| key="model_role_experiment_coding", | |
| format_func=format_func, | |
| ) | |
| # --- Per-role model selection --- | |
| st.divider() | |
| st.markdown("#### Model Assignments") | |
| st.caption( | |
| "Choose which model to use for each agent role. Models from any provider " | |
| "can be mixed freely." | |
| ) | |
| role_selections: dict[str, str] = {} | |
| max_cols = 3 | |
| for group_name, roles in MODEL_ROLE_GROUPS.items(): | |
| st.markdown(f"**{group_name}**") | |
| role_items = list(roles.items()) | |
| for row_start in range(0, len(role_items), max_cols): | |
| row = role_items[row_start : row_start + max_cols] | |
| cols = st.columns(max_cols) | |
| for col, (role, label) in zip(cols, row): | |
| with col: | |
| role_selections[role] = _select_model( | |
| label, | |
| completion_ids, | |
| saved=current_roles.get(role), | |
| fallback=None, | |
| key=f"model_role_{role}", | |
| format_func=format_func, | |
| ) | |
| role_selections["experiment_coding"] = coding_model | |
| # --- Submit --- | |
| submitted = st.form_submit_button("Save Settings", type="primary") | |
| if submitted: | |
| final_gemini = gemini_api_key.strip() | |
| final_openai = openai_api_key.strip() | |
| final_anthropic = anthropic_api_key.strip() | |
| final_s2 = s2_api_key.strip() | |
| if not (final_gemini or final_openai or final_anthropic): | |
| st.error("Provide at least one provider API key (Gemini, OpenAI, or Anthropic).") | |
| return None | |
| # Build final provided keys for availability check. | |
| final_keys = { | |
| "gemini_api_key": final_gemini, | |
| "openai_api_key": final_openai, | |
| "anthropic_api_key": final_anthropic, | |
| } | |
| # Validate that selected models have their keys filled in. | |
| unavailable = [] | |
| for role, mid in role_selections.items(): | |
| entry = ModelCatalog.get(mid) | |
| if entry and not _entry_available_with_keys(entry, final_keys): | |
| missing_env = [ | |
| k | |
| for k in entry.requires_env | |
| if not final_keys.get(_KEY_ENV_MAP.get(k, ""), "") | |
| ] | |
| unavailable.append((role, mid, missing_env)) | |
| if unavailable: | |
| lines = "\n".join( | |
| f"- **{role}** \u2192 `{mid}` (missing: {', '.join(missing)})" | |
| for role, mid, missing in unavailable | |
| ) | |
| st.error("Some selected models are still missing API keys:\n" + lines) | |
| return None | |
| return { | |
| "gemini_api_key": final_gemini, | |
| "openai_api_key": final_openai, | |
| "anthropic_api_key": final_anthropic, | |
| "s2_api_key": final_s2, | |
| "model_roles": role_selections, | |
| } | |
| return None | |