issdandavis Claude Opus 4.6 (1M context) commited on
Commit
200947b
Β·
1 Parent(s): cfd286b

feat: add Round Table multi-model chat + FU state machine

Browse files

- Tab 6: Round Table with Mistral-7B (KO:Intent), Phi-3-mini (CA:Compute), Qwen2.5 (DR:Architecture)
- Concurrent model fan-out via ThreadPoolExecutor
- DuckDuckGo web search toggle for grounded answers
- Governance gate on both input and output
- Consensus summary with agreement detection
- FunctionalUnit state machine (INERT->ADMITTED->DEFERRED->QUARANTINED->ESCALATED->DENIED)
- Context-gated content moderation (referential/quote/targeted, not blanket bans)
- Response cache for repeated queries
- All interactions logged for training flywheel

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (2) hide show
  1. app.py +333 -15
  2. requirements.txt +2 -0
app.py CHANGED
@@ -18,12 +18,25 @@ import os
18
  import re
19
  import time
20
  from collections import Counter
 
21
  from dataclasses import dataclass, field
22
  from datetime import datetime, timezone
23
  from enum import Enum
24
  from pathlib import Path
25
  from typing import Optional
26
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  # ── Constants ──────────────────────────────────────────────────────
28
 
29
  PHI = (1 + math.sqrt(5)) / 2
@@ -245,6 +258,43 @@ class FunctionalUnit:
245
  },
246
  }
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  DATASET_REPO = "issdandavis/scbe-aethermoore-training-data"
249
  FEEDBACK_DIR = Path("/tmp/mesh_foundry_feedback")
250
  FEEDBACK_DIR.mkdir(parents=True, exist_ok=True)
@@ -457,21 +507,7 @@ def governance_gate(text: str) -> dict:
457
  cost = harmonic_wall(d_star, R=4.0)
458
 
459
  # ── Attack pattern detection ──
460
- attack_patterns = [
461
- (r"ignore\s+(all\s+)?(previous|prior)\s+(instructions|rules)", "override"),
462
- (r"system\s+(override|prompt)", "override"),
463
- (r"bypass|disable\s+(safety|filter)", "bypass"),
464
- (r"\bjailbreak|DAN\b", "jailbreak"),
465
- (r"\bpassword|\bcredential|API\s+key", "exfil"),
466
- (r"\bsudo\b|\brm\s+-rf\b|/etc/passwd", "command_injection"),
467
- (r"\beval\(|\bexec\(|__import__", "code_injection"),
468
- (r"base64|rot13", "encoding_attack"),
469
- (r"grandmother.*password|authorized.*researcher", "social_engineering"),
470
- ]
471
- threats = []
472
- for pattern, category in attack_patterns:
473
- if re.search(pattern, text, re.IGNORECASE):
474
- threats.append(category)
475
 
476
  # ── L13: Decision logic ──
477
  signal_count = len(threats) + (1 if d_star > 0.4 else 0) + (1 if len(null_tongues) >= 3 else 0)
@@ -779,6 +815,249 @@ def submit_feedback(input_text, decision, agrees, correction):
779
  return log_feedback(input_text, decision, agrees, correction)
780
 
781
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
782
  # ── Gradio App ─────────────────────────────────────────────────────
783
 
784
  CUSTOM_CSS = """
@@ -918,6 +1197,45 @@ ORCID: 0009-0002-3936-9369
918
  inputs=[fb_input, fb_decision, fb_agrees, fb_correction],
919
  outputs=fb_result)
920
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
921
  gr.Markdown("""
922
  ---
923
  **SCBE-AETHERMOORE** | Built by Issac Davis | Patent Pending USPTO #63/961,403
 
18
  import re
19
  import time
20
  from collections import Counter
21
+ from concurrent.futures import ThreadPoolExecutor, as_completed
22
  from dataclasses import dataclass, field
23
  from datetime import datetime, timezone
24
  from enum import Enum
25
  from pathlib import Path
26
  from typing import Optional
27
 
28
+ try:
29
+ from huggingface_hub import InferenceClient
30
+ HF_INFERENCE_AVAILABLE = True
31
+ except ImportError:
32
+ HF_INFERENCE_AVAILABLE = False
33
+
34
+ try:
35
+ from duckduckgo_search import DDGS
36
+ SEARCH_AVAILABLE = True
37
+ except ImportError:
38
+ SEARCH_AVAILABLE = False
39
+
40
  # ── Constants ──────────────────────────────────────────────────────
41
 
42
  PHI = (1 + math.sqrt(5)) / 2
 
258
  },
259
  }
260
 
261
+ # ── Round Table Models ────────────────────────────────────────────
262
+ # Each model gets a Sacred Tongue role β€” different lens on the same input.
263
+
264
+ ROUNDTABLE_MODELS = {
265
+ "Mistral-7B": {
266
+ "model_id": "mistralai/Mistral-7B-Instruct-v0.3",
267
+ "tongue": "KO",
268
+ "role": "Intent",
269
+ "color": "#4CAF50",
270
+ "system": "You are the Intent analyst at the SCBE Round Table. "
271
+ "Your Sacred Tongue is KO (Intent). Analyze through the lens of "
272
+ "purpose, motivation, and direction. Be concise (2-3 paragraphs max).",
273
+ },
274
+ "Phi-3-mini": {
275
+ "model_id": "microsoft/Phi-3-mini-4k-instruct",
276
+ "tongue": "CA",
277
+ "role": "Compute",
278
+ "color": "#FF9800",
279
+ "system": "You are the Compute specialist at the SCBE Round Table. "
280
+ "Your Sacred Tongue is CA (Compute). Analyze through the lens of "
281
+ "logic, process, and analytical rigor. Be concise (2-3 paragraphs max).",
282
+ },
283
+ "Qwen2.5-1.5B": {
284
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
285
+ "tongue": "DR",
286
+ "role": "Architecture",
287
+ "color": "#607D8B",
288
+ "system": "You are the Architecture specialist at the SCBE Round Table. "
289
+ "Your Sacred Tongue is DR (Schema). Analyze through the lens of "
290
+ "structure, systems design, and patterns. Be concise (2-3 paragraphs max).",
291
+ },
292
+ }
293
+
294
+ # Simple LRU cache for repeated queries
295
+ _response_cache: dict = {}
296
+ _CACHE_MAX = 20
297
+
298
  DATASET_REPO = "issdandavis/scbe-aethermoore-training-data"
299
  FEEDBACK_DIR = Path("/tmp/mesh_foundry_feedback")
300
  FEEDBACK_DIR.mkdir(parents=True, exist_ok=True)
 
507
  cost = harmonic_wall(d_star, R=4.0)
508
 
509
  # ── Attack pattern detection ──
510
+ threats = detect_threats(text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
 
512
  # ── L13: Decision logic ──
513
  signal_count = len(threats) + (1 if d_star > 0.4 else 0) + (1 if len(null_tongues) >= 3 else 0)
 
815
  return log_feedback(input_text, decision, agrees, correction)
816
 
817
 
818
+ # ── Round Table Functions ─────────────────────────────────────────
819
+
820
+ _ATTACK_PATTERNS = [
821
+ (r"ignore\s+(all\s+)?(previous|prior)\s+(instructions|rules)", "override"),
822
+ (r"system\s+(override|prompt)", "override"),
823
+ (r"bypass|disable\s+(safety|filter)", "bypass"),
824
+ (r"\bjailbreak|DAN\b", "jailbreak"),
825
+ (r"\bpassword|\bcredential|API\s+key", "exfil"),
826
+ (r"\bsudo\b|\brm\s+-rf\b|/etc/passwd", "command_injection"),
827
+ (r"\beval\(|\bexec\(|__import__", "code_injection"),
828
+ (r"base64|rot13", "encoding_attack"),
829
+ (r"grandmother.*password|authorized.*researcher", "social_engineering"),
830
+ ]
831
+
832
+
833
+ def detect_threats(text: str) -> list:
834
+ """Quick threat pattern scan β€” shared between governance_gate and round table."""
835
+ threats = []
836
+ for pattern, category in _ATTACK_PATTERNS:
837
+ if re.search(pattern, text, re.IGNORECASE):
838
+ threats.append(category)
839
+ return threats
840
+
841
+
842
+ def log_training_interaction(input_text: str, output_text: str, decision: str, activations: dict):
843
+ """Log a round table interaction for the training flywheel."""
844
+ record = {
845
+ "timestamp": datetime.now(timezone.utc).isoformat(),
846
+ "input": input_text,
847
+ "output": output_text[:500],
848
+ "tab": "round_table",
849
+ "decision": decision,
850
+ "activations": activations,
851
+ }
852
+ log_path = FEEDBACK_DIR / f"interactions_{datetime.now().strftime('%Y%m%d')}.jsonl"
853
+ with open(log_path, "a", encoding="utf-8") as f:
854
+ f.write(json.dumps(record, ensure_ascii=False) + "\n")
855
+
856
+
857
+ def _call_hf_model(model_id: str, system_prompt: str, messages: list, context: str = "") -> str:
858
+ """Call a HuggingFace model via InferenceClient. Returns response text or error string."""
859
+ if not HF_INFERENCE_AVAILABLE:
860
+ return "[Model unavailable β€” huggingface_hub not installed]"
861
+
862
+ try:
863
+ client = InferenceClient(model=model_id, timeout=15)
864
+ chat_messages = [{"role": "system", "content": system_prompt}]
865
+ if context:
866
+ chat_messages.append({"role": "user", "content": f"[Web context]\n{context}"})
867
+ chat_messages.append({"role": "assistant", "content": "I'll incorporate that context."})
868
+ chat_messages.extend(messages)
869
+
870
+ response = client.chat_completion(
871
+ messages=chat_messages,
872
+ max_tokens=512,
873
+ temperature=0.7,
874
+ )
875
+ return response.choices[0].message.content.strip()
876
+ except Exception as e:
877
+ err = str(e)
878
+ if "429" in err or "503" in err:
879
+ return f"[{model_id.split('/')[-1]} is rate-limited β€” try again shortly]"
880
+ return f"[Error from {model_id.split('/')[-1]}: {err[:120]}]"
881
+
882
+
883
+ def _web_search(query: str, max_results: int = 5) -> str:
884
+ """Search the web via DuckDuckGo. Returns a context string (max 1500 chars)."""
885
+ if not SEARCH_AVAILABLE:
886
+ return ""
887
+ try:
888
+ results = DDGS().text(query, max_results=max_results)
889
+ snippets = []
890
+ total = 0
891
+ for r in results:
892
+ snippet = f"- {r.get('title', '')}: {r.get('body', '')}"
893
+ if total + len(snippet) > 1500:
894
+ break
895
+ snippets.append(snippet)
896
+ total += len(snippet)
897
+ return "\n".join(snippets) if snippets else ""
898
+ except Exception:
899
+ return ""
900
+
901
+
902
+ def _roundtable_consensus(responses: dict) -> str:
903
+ """Generate a consensus summary from multiple model responses."""
904
+ valid = {k: v for k, v in responses.items() if not v.startswith("[")}
905
+ if not valid:
906
+ return "All models unavailable. Please try again."
907
+ if len(valid) == 1:
908
+ name = list(valid.keys())[0]
909
+ return f"Only **{name}** responded. No consensus possible with a single voice."
910
+
911
+ # Find agreement keywords
912
+ all_words = []
913
+ for text in valid.values():
914
+ words = set(re.findall(r'\b[a-zA-Z]{4,}\b', text.lower()))
915
+ all_words.append(words)
916
+
917
+ common = set.intersection(*all_words) if all_words else set()
918
+ # Filter out stop-ish words
919
+ stop = {"that", "this", "with", "from", "have", "been", "were", "their", "about",
920
+ "would", "could", "should", "which", "there", "these", "than", "more", "also"}
921
+ agreement_words = sorted(common - stop)[:8]
922
+
923
+ names = list(valid.keys())
924
+ lines = [f"**Round Table Consensus** ({len(valid)}/{len(responses)} models responded)\n"]
925
+ if agreement_words:
926
+ lines.append(f"**Common ground**: {', '.join(agreement_words)}")
927
+ else:
928
+ lines.append("**Divergent** β€” models found little common ground.")
929
+
930
+ for name in names:
931
+ cfg = ROUNDTABLE_MODELS.get(name, {})
932
+ tongue = cfg.get("tongue", "?")
933
+ role = cfg.get("role", "?")
934
+ # First sentence as summary
935
+ first = valid[name].split(".")[0].strip()
936
+ if len(first) > 150:
937
+ first = first[:147] + "..."
938
+ lines.append(f"- **{name}** [{tongue}:{role}]: {first}")
939
+
940
+ return "\n".join(lines)
941
+
942
+
943
+ def roundtable_query(user_msg: str, selected_models: list, search_enabled: bool, history: list):
944
+ """Main Round Table orchestrator. Returns updated chat history."""
945
+ if not user_msg.strip():
946
+ return history, ""
947
+
948
+ # Add user message
949
+ history = history or []
950
+ history.append({"role": "user", "content": user_msg})
951
+
952
+ # Governance gate on input
953
+ activations = tongue_activation(user_msg)
954
+ threats = detect_threats(user_msg)
955
+ gate = ContextGate()
956
+ mode = gate.evaluate(user_msg, threats, activations)
957
+
958
+ if mode == ResponseMode.REFUSED:
959
+ history.append({
960
+ "role": "assistant",
961
+ "content": "Input blocked by governance gate (targeted hostility detected).",
962
+ "metadata": {"title": "Governance Gate"},
963
+ })
964
+ return history, ""
965
+
966
+ # Web search if enabled
967
+ context = ""
968
+ if search_enabled:
969
+ context = _web_search(user_msg)
970
+ if context:
971
+ history.append({
972
+ "role": "assistant",
973
+ "content": f"**Web search results:**\n{context[:500]}{'...' if len(context) > 500 else ''}",
974
+ "metadata": {"title": "Web Search"},
975
+ })
976
+
977
+ # Check cache
978
+ cache_key = hashlib.md5(f"{user_msg}:{sorted(selected_models)}".encode()).hexdigest()
979
+ if cache_key in _response_cache:
980
+ cached = _response_cache[cache_key]
981
+ for name, text in cached.items():
982
+ cfg = ROUNDTABLE_MODELS.get(name, {})
983
+ history.append({
984
+ "role": "assistant",
985
+ "content": text,
986
+ "metadata": {"title": f"{name} [{cfg.get('tongue', '?')}:{cfg.get('role', '?')}] (cached)"},
987
+ })
988
+ history.append({
989
+ "role": "assistant",
990
+ "content": _roundtable_consensus(cached),
991
+ "metadata": {"title": "Consensus"},
992
+ })
993
+ return history, ""
994
+
995
+ # Fan out to selected models
996
+ if not selected_models:
997
+ selected_models = list(ROUNDTABLE_MODELS.keys())
998
+
999
+ messages = [{"role": "user", "content": user_msg}]
1000
+ responses = {}
1001
+
1002
+ with ThreadPoolExecutor(max_workers=4) as executor:
1003
+ futures = {}
1004
+ for name in selected_models:
1005
+ cfg = ROUNDTABLE_MODELS.get(name)
1006
+ if not cfg:
1007
+ continue
1008
+ fut = executor.submit(
1009
+ _call_hf_model, cfg["model_id"], cfg["system"], messages, context
1010
+ )
1011
+ futures[fut] = name
1012
+
1013
+ for fut in as_completed(futures):
1014
+ name = futures[fut]
1015
+ try:
1016
+ responses[name] = fut.result()
1017
+ except Exception as e:
1018
+ responses[name] = f"[{name} failed: {str(e)[:80]}]"
1019
+
1020
+ # Governance gate on each output + add to history
1021
+ for name in selected_models:
1022
+ if name not in responses:
1023
+ continue
1024
+ text = responses[name]
1025
+ cfg = ROUNDTABLE_MODELS.get(name, {})
1026
+
1027
+ # Gate output
1028
+ out_threats = detect_threats(text)
1029
+ out_gate = ContextGate()
1030
+ out_mode = out_gate.evaluate(text, out_threats, tongue_activation(text))
1031
+ if out_mode == ResponseMode.REFUSED:
1032
+ text = f"[Response from {name} blocked by governance gate]"
1033
+ responses[name] = text
1034
+
1035
+ history.append({
1036
+ "role": "assistant",
1037
+ "content": text,
1038
+ "metadata": {"title": f"{name} [{cfg.get('tongue', '?')}:{cfg.get('role', '?')}]"},
1039
+ })
1040
+
1041
+ # Consensus
1042
+ consensus = _roundtable_consensus(responses)
1043
+ history.append({
1044
+ "role": "assistant",
1045
+ "content": consensus,
1046
+ "metadata": {"title": "Consensus"},
1047
+ })
1048
+
1049
+ # Cache
1050
+ if len(_response_cache) >= _CACHE_MAX:
1051
+ oldest = next(iter(_response_cache))
1052
+ del _response_cache[oldest]
1053
+ _response_cache[cache_key] = responses
1054
+
1055
+ # Log for training flywheel
1056
+ log_training_interaction(user_msg, consensus, "ALLOW", activations)
1057
+
1058
+ return history, ""
1059
+
1060
+
1061
  # ── Gradio App ─────────────────────────────────────────────────────
1062
 
1063
  CUSTOM_CSS = """
 
1197
  inputs=[fb_input, fb_decision, fb_agrees, fb_correction],
1198
  outputs=fb_result)
1199
 
1200
+ # ── Tab 6: Round Table ──
1201
+ with gr.Tab("Round Table"):
1202
+ gr.Markdown(
1203
+ "### Multi-Model Round Table\n\n"
1204
+ "Ask a question and get simultaneous responses from multiple AI models, "
1205
+ "each assigned a Sacred Tongue role. Toggle web search for grounded answers."
1206
+ )
1207
+ rt_chatbot = gr.Chatbot(
1208
+ type="messages",
1209
+ height=600,
1210
+ label="Round Table",
1211
+ show_copy_button=True,
1212
+ )
1213
+ with gr.Row():
1214
+ rt_input = gr.Textbox(
1215
+ placeholder="Ask the Round Table anything...",
1216
+ show_label=False,
1217
+ scale=4,
1218
+ )
1219
+ rt_send = gr.Button("Send", variant="primary", scale=1)
1220
+ with gr.Row():
1221
+ rt_models = gr.CheckboxGroup(
1222
+ choices=list(ROUNDTABLE_MODELS.keys()),
1223
+ value=list(ROUNDTABLE_MODELS.keys()),
1224
+ label="Models",
1225
+ )
1226
+ rt_search = gr.Checkbox(label="Search the web", value=False)
1227
+
1228
+ rt_send.click(
1229
+ roundtable_query,
1230
+ inputs=[rt_input, rt_models, rt_search, rt_chatbot],
1231
+ outputs=[rt_chatbot, rt_input],
1232
+ )
1233
+ rt_input.submit(
1234
+ roundtable_query,
1235
+ inputs=[rt_input, rt_models, rt_search, rt_chatbot],
1236
+ outputs=[rt_chatbot, rt_input],
1237
+ )
1238
+
1239
  gr.Markdown("""
1240
  ---
1241
  **SCBE-AETHERMOORE** | Built by Issac Davis | Patent Pending USPTO #63/961,403
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
  gradio>=5.0.0
2
  datasets>=2.14.0
 
 
 
1
  gradio>=5.0.0
2
  datasets>=2.14.0
3
+ huggingface_hub>=0.20.0
4
+ duckduckgo-search>=5.0.0