editorial-system / config.example.py
ICSAC's picture
Blind-review compaction + gemini chain tail for oversized submissions
4f41d03
"""ICSAC Editorial System β€” Configuration.
Copy this file to config.py. Secrets are loaded from environment variables.
Set them in /etc/icsac/editorial.env (loaded by systemd EnvironmentFile=).
For manual runs: source /etc/icsac/editorial.env && export ZENODO_TOKEN TELEGRAM_TOKEN TELEGRAM_CHAT_ID
"""
import os
import os as _os
def _load_env_file(path: str = "/etc/icsac/editorial.env") -> None:
"""Self-load env file if vars not already set. Lets Python invocations
work without ceremony β€” systemd EnvironmentFile= still wins when present.
"""
p = _os.path.expanduser(path)
if not _os.path.isfile(p):
return
with open(p) as f:
for line in f:
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, _, v = line.partition("=")
k = k.strip()
v = v.strip().strip('"').strip("'")
_os.environ.setdefault(k, v)
_load_env_file()
ZENODO_TOKEN = os.environ.get("ZENODO_TOKEN", "")
ZENODO_API = "https://zenodo.org/api"
TELEGRAM_TOKEN = os.environ.get("TELEGRAM_TOKEN", "")
TELEGRAM_CHAT_ID = os.environ.get("TELEGRAM_CHAT_ID", "")
# Optional: Telegram supergroup-thread routing. When set, editorial-system
# messages pin to a specific topic so notifications can share a bot/supergroup
# with other operator monitoring without crossing streams.
TELEGRAM_THREAD_ID = os.environ.get("TELEGRAM_THREAD_ID", "")
# Optional: Tier-3 test routing. The submission worker (when test_mode and
# tier=3) and apply_decision (when applying a verdict to a test sub_id)
# route curator-facing Telegram to this chat instead of TELEGRAM_CHAT_ID.
# Leave unset to skip the curator Telegram in T3 entirely (panel/RQC/email
# draft still run); the worker logs a single warning when this happens.
TELEGRAM_TEST_CHAT_ID = os.environ.get("TELEGRAM_TEST_CHAT_ID", "")
TELEGRAM_TEST_THREAD_ID = os.environ.get("TELEGRAM_TEST_THREAD_ID", "")
# Optional: IMAP draft-save mode. When email_send is invoked with draft=True,
# the rendered MIME message is APPENDed to Gmail's Drafts folder via IMAP
# (operator manually reviews + sends from Gmail UI). Leave unset to disable
# draft mode entirely.
IMAP_HOST = os.environ.get("IMAP_HOST", "imap.gmail.com")
IMAP_PORT = int(os.environ.get("IMAP_PORT", "993"))
IMAP_USER = os.environ.get("IMAP_USER", "")
IMAP_PASSWORD = os.environ.get("IMAP_PASSWORD", "")
IMAP_DRAFTS_FOLDER = os.environ.get("IMAP_DRAFTS_FOLDER", "[Gmail]/Drafts")
OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
HF_TOKEN = os.environ.get("HF_TOKEN", "")
# Panel slot chains. Entries are tagged with backend prefix:
# "hf|<model>:<provider>" β†’ HuggingFace Inference Providers Router
# (custom provider keys live in HF settings;
# billing routes through the upstream provider)
# "or|<model>" β†’ OpenRouter direct
# Untagged entries fall through to OR for backward compatibility.
# Consecutive OR entries are batched into a single OR call (`models` array,
# max 3 per OR's cap). HF entries fire one HTTP request each because HF's
# explicit provider pin does not auto-failover within the call β€” the panel
# chain dispatcher is responsible for trying the next entry on failure.
#
# Cross-provider redundancy (2026-04-27): every slot's chain spans Groq +
# Cerebras + OR-free so a single-provider outage can't take more than one
# chain entry per slot. Cerebras free-tier 8K context cap forces
# Qwen3-235B-A22B-Instruct-2507 (the 64K-context exempt model) anywhere
# Cerebras appears in a slot.
OPENROUTER_MODELS = [
# Slot 1: Groq Llama-3.3-70B β†’ Cerebras Qwen3-235B β†’ OR cross-family.
[
"hf|meta-llama/Llama-3.3-70B-Instruct:groq",
"hf|Qwen/Qwen3-235B-A22B-Instruct-2507:cerebras",
"or|openai/gpt-oss-120b:free",
"or|z-ai/glm-4.5-air:free",
"gemini",
],
# Slot 2: Groq gpt-oss-120b β†’ Cerebras Qwen3-235B β†’ OR Nvidia/Hermes.
# nemotron-3-super-120b-a12b excluded (won't emit JSON reliably).
[
"hf|openai/gpt-oss-120b:groq",
"hf|Qwen/Qwen3-235B-A22B-Instruct-2507:cerebras",
"or|nvidia/nemotron-nano-12b-v2-vl:free",
"or|nousresearch/hermes-3-llama-3.1-405b:free",
"gemini",
],
# Slot 3: Cerebras primary β†’ Groq Llama β†’ OR Google/cross-family.
[
"hf|Qwen/Qwen3-235B-A22B-Instruct-2507:cerebras",
"hf|meta-llama/Llama-3.3-70B-Instruct:groq",
"or|google/gemma-4-26b-a4b-it:free",
"or|z-ai/glm-4.5-air:free",
"gemini",
],
# Slot 4: HF Groq primary, HF Cerebras fallback, OR tail. Reordered
# 2026-04-27 after qwen3-next-80b-a3b-instruct:free failed all 4
# consecutive panel passes (SUB-00003 pass 0+1, SUB-00004 pass 0+1).
# Kept minimax + gemma-4-31b as the OR tail so slot 4 still has a
# full OR-only fallback path with model-family diversity from slots
# 1-3 OR tails (gpt-oss/z-ai, nemotron/hermes, gemma-4-26b/z-ai).
# NB: this puts slot 4 on the same primary (HF Groq llama-3.3) as
# slot 1 β€” accepted trade-off; total Groq-outage now drops the panel
# to 4/5 via Cerebras fallback rather than staying functional, but a
# CHRONIC slot-4 failure (which is what we had) was permanently below
# MIN_REVIEWERS=4 in pass 1. Reliability beats slot-level diversity.
[
"hf|meta-llama/Llama-3.3-70B-Instruct:groq",
"hf|Qwen/Qwen3-235B-A22B-Instruct-2507:cerebras",
"or|minimax/minimax-m2.5:free",
"or|google/gemma-4-31b-it:free",
"gemini",
],
]
OPENROUTER_MODELS_API_URL = "https://openrouter.ai/api/v1/models"
# Self-heal thresholds (claude + 4 OR slots = 5 total panelists per pass).
# MIN_REVIEWERS=4 tolerates 1 slot failure per pass after self-heal retry.
# Combined with REVIEW_PASSES below, a paper yields 8-10 valid reviews in
# the aggregate. Tightened from MIN_REVIEWERS=3 + 3 passes 2026-04-26
# after observing pass-to-pass stdev was uniformly tiny (≀0.41 on the
# noisiest dim, ≀0.09 on most) β€” 3rd pass added marginal stderr at 33%
# more compute. Two passes captures essentially the same signal; pairing
# with MIN_REVIEWERS=4 keeps each pass closer to full panel.
MIN_REVIEWERS = 4
MAX_SLOT_RETRIES = 1 # per failed slot, after the initial attempt
RETRY_COOLDOWN_SEC = 30 # wait between initial pass and retry pass
# Multi-pass aggregation: run the full panel N times, aggregate mean+stdev
# across passes. 2 passes balances stability against compute. Set to 1
# to disable multi-pass; 3+ for noise-reduction at compute cost.
REVIEW_PASSES = 2
SMTP_HOST = os.environ.get("SMTP_HOST", "smtp.gmail.com")
SMTP_PORT = int(os.environ.get("SMTP_PORT", "465"))
SMTP_USER = os.environ.get("SMTP_USER", "")
SMTP_PASSWORD = os.environ.get("SMTP_PASSWORD", "")
FROM_EMAIL = os.environ.get("FROM_EMAIL", "info@icsacinstitute.org")
REPLY_TO_EMAIL = os.environ.get("REPLY_TO_EMAIL", "info@icsacinstitute.org")
NTFY_PAIN_URL = os.environ.get("NTFY_PAIN_URL", "")
NTFY_BACKUPS_URL = os.environ.get("NTFY_BACKUPS_URL", "")
BRAIN_URL = os.environ.get("BRAIN_URL", "")
KUMA_PUSH_URL = os.environ.get("KUMA_PUSH_URL", "")
COMMUNITY_ID = "icsac"
GOOGLE_FORM_URL = os.environ.get("GOOGLE_FORM_URL", "https://example.com/your-community-signup-form")
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# Runtime output directory β€” created on first run, gitignored.
REVIEWS_DIR = os.path.join(BASE_DIR, "reviews")
# Runtime output directory β€” created on first run, gitignored.
DOWNLOADS_DIR = os.path.join(BASE_DIR, "downloads")
RUBRICS_DIR = os.path.join(BASE_DIR, "rubrics")
TEMPLATES_DIR = os.path.join(BASE_DIR, "templates")
# Site base URL used to build share-target landing pages (icsacinstitute.org/accepted/<id>)
SITE_BASE_URL = "https://icsacinstitute.org"
CLAUDE_CMD = "claude"
GEMINI_CMD = "gemini"
RUBRIC_DIMENSIONS = [
"domain_fit",
"methodological_transparency",
"internal_consistency",
"citation_integrity",
"novelty_signal",
"ai_provenance_signal",
]
# Path to the institute's website repo (used by publications.py to commit
# accepted-paper landing pages + redacted reviews). Empty disables the
# website-registry push; the Zenodo accept itself still proceeds.
ICSAC_WEBSITE_REPO = os.environ.get("ICSAC_WEBSITE_REPO", "")