Update agent.py
Browse files
agent.py
CHANGED
|
@@ -92,8 +92,10 @@ DEFAULT_RUN_KEY: str = "abstract"
|
|
| 92 |
THREAD_PREFIX: str = "TA-"
|
| 93 |
MAX_USER_MESSAGE_CHARS: int = 4000
|
| 94 |
VERIFY_CHAT_MAX_ROWS: int = 20
|
| 95 |
-
PROVIDER_RETRY_ATTEMPTS: int =
|
| 96 |
-
PROVIDER_RETRY_BASE_DELAY_S: float =
|
|
|
|
|
|
|
| 97 |
|
| 98 |
# FIX ISSUE 4 β surface missing API key immediately at import time
|
| 99 |
_KEY_MISSING = not bool(MISTRAL_API_KEY)
|
|
@@ -313,6 +315,7 @@ After researcher confirms:
|
|
| 313 |
β Filters publisher boilerplate (copyright, license text)
|
| 314 |
β Embeds with SPECTER2 (L2-normalized)
|
| 315 |
β UMAP reduces dimensions for HDBSCAN clustering
|
|
|
|
| 316 |
β Finds 5 nearest centroid sentences per topic
|
| 317 |
β Saves Plotly HTML visualizations
|
| 318 |
β Saves embeddings + summaries checkpoints
|
|
@@ -360,7 +363,7 @@ After researcher confirms:
|
|
| 360 |
| Research Area | General research area (NOT PACIS β that comes later in Phase 5.5) |
|
| 361 |
| Confidence | How well the 5 sentences match the label |
|
| 362 |
| Sentences | Number of sentences clustered here |
|
| 363 |
-
|
| 364 |
| Approve | Edit: yes/no β keep or reject this topic |
|
| 365 |
| Rename To | Edit: type new name if label is wrong |
|
| 366 |
| Your Reasoning | Edit: why you renamed/rejected |"
|
|
@@ -742,10 +745,25 @@ def _is_transient_provider_error(exc: Exception) -> bool:
|
|
| 742 |
or '"raw_status_code":503' in msg
|
| 743 |
or '"raw_status_code":502' in msg
|
| 744 |
or '"raw_status_code":504' in msg
|
|
|
|
|
|
|
|
|
|
|
|
|
| 745 |
or "service unavailable" in msg
|
| 746 |
)
|
| 747 |
|
| 748 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 749 |
def _invoke_react_with_retries(enriched: str, thread_id: str) -> dict:
|
| 750 |
"""Call the ReAct graph with bounded retries for transient provider failures."""
|
| 751 |
last_exc: Exception | None = None
|
|
@@ -762,7 +780,10 @@ def _invoke_react_with_retries(enriched: str, thread_id: str) -> dict:
|
|
| 762 |
raise
|
| 763 |
last_exc = exc
|
| 764 |
if attempt < PROVIDER_RETRY_ATTEMPTS - 1:
|
| 765 |
-
|
|
|
|
|
|
|
|
|
|
| 766 |
continue
|
| 767 |
raise last_exc
|
| 768 |
|
|
@@ -825,6 +846,7 @@ def _collect_output_files(state: dict) -> list[str]:
|
|
| 825 |
rdir = _run_dir(run_key)
|
| 826 |
candidates = [
|
| 827 |
str(rdir / "summaries.json"),
|
|
|
|
| 828 |
str(rdir / "labels.json"),
|
| 829 |
str(rdir / "labels_verification.json"),
|
| 830 |
str(rdir / "themes.json"),
|
|
@@ -933,12 +955,60 @@ def _populate_review_df(state: dict) -> dict:
|
|
| 933 |
"Approve", "Rename To", "Reasoning"
|
| 934 |
"""
|
| 935 |
labels_path = OUTPUT_DIR / state.get("run_key", DEFAULT_RUN_KEY) / "labels.json"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 936 |
|
| 937 |
def _reasoning_cell(row: dict) -> str:
|
| 938 |
-
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 942 |
|
| 943 |
return (
|
| 944 |
{
|
|
@@ -946,12 +1016,12 @@ def _populate_review_df(state: dict) -> dict:
|
|
| 946 |
"review_df": list(map(
|
| 947 |
lambda r: {
|
| 948 |
"#": r.get("cluster_id", 0),
|
| 949 |
-
"Topic Label":
|
| 950 |
"Top Evidence":r["evidence"][0] if r.get("evidence") else "",
|
| 951 |
"Sentences": r.get("size", 0),
|
| 952 |
-
"Papers":
|
| 953 |
"Approve": False,
|
| 954 |
-
"Rename To":
|
| 955 |
"Reasoning": _reasoning_cell(r),
|
| 956 |
},
|
| 957 |
_load_json(labels_path),
|
|
@@ -1031,15 +1101,16 @@ def _build_verify_chat_report(rows: list[dict]) -> str:
|
|
| 1031 |
|
| 1032 |
shown = rows[:VERIFY_CHAT_MAX_ROWS]
|
| 1033 |
header = [
|
| 1034 |
-
"| # | Mistral Label | Groq-Ollama Label | Groq-GPT Label |",
|
| 1035 |
-
"|---|---|---|---|",
|
| 1036 |
]
|
| 1037 |
lines = list(map(
|
| 1038 |
lambda r: (
|
| 1039 |
f"| {int(r.get('cluster_id', 0))} "
|
| 1040 |
f"| {_sanitize_markdown_cell(r.get('mistral_label') or r.get('label', ''))} "
|
| 1041 |
f"| {_sanitize_markdown_cell(r.get('groq_ollama_label') or r.get('groq_label', ''))} "
|
| 1042 |
-
f"| {_sanitize_markdown_cell(r.get('groq_gpt_label', ''))}
|
|
|
|
| 1043 |
),
|
| 1044 |
shown,
|
| 1045 |
))
|
|
@@ -1122,7 +1193,7 @@ def _handle_verify_command(state: dict) -> tuple[str, dict]:
|
|
| 1122 |
reply = (
|
| 1123 |
"VERIFY complete. Groq-Ollama and Groq-GPT topic labeling has been added for Phase 2 topics.\n\n"
|
| 1124 |
f"Verified topics: {verified_count}/{labelled_count}\n"
|
| 1125 |
-
"Mistral vs Groq-Ollama vs Groq-GPT comparison is shown below in chat.\n\n"
|
| 1126 |
f"{report}\n\n"
|
| 1127 |
"Compare labels, edit Rename To/Approve, then click Submit Review to continue.\n\n"
|
| 1128 |
"[STOP GATE 1 β AWAITING REVIEW TABLE SUBMISSION]"
|
|
|
|
| 92 |
THREAD_PREFIX: str = "TA-"
|
| 93 |
MAX_USER_MESSAGE_CHARS: int = 4000
|
| 94 |
VERIFY_CHAT_MAX_ROWS: int = 20
|
| 95 |
+
PROVIDER_RETRY_ATTEMPTS: int = 4
|
| 96 |
+
PROVIDER_RETRY_BASE_DELAY_S: float = 2.0
|
| 97 |
+
PROVIDER_RETRY_RATE_LIMIT_DELAY_S: float = 6.0
|
| 98 |
+
PROVIDER_RETRY_MAX_DELAY_S: float = 18.0
|
| 99 |
|
| 100 |
# FIX ISSUE 4 β surface missing API key immediately at import time
|
| 101 |
_KEY_MISSING = not bool(MISTRAL_API_KEY)
|
|
|
|
| 315 |
β Filters publisher boilerplate (copyright, license text)
|
| 316 |
β Embeds with SPECTER2 (L2-normalized)
|
| 317 |
β UMAP reduces dimensions for HDBSCAN clustering
|
| 318 |
+
β Auto-optimizes HDBSCAN parameters after the first run (optimization.json)
|
| 319 |
β Finds 5 nearest centroid sentences per topic
|
| 320 |
β Saves Plotly HTML visualizations
|
| 321 |
β Saves embeddings + summaries checkpoints
|
|
|
|
| 363 |
| Research Area | General research area (NOT PACIS β that comes later in Phase 5.5) |
|
| 364 |
| Confidence | How well the 5 sentences match the label |
|
| 365 |
| Sentences | Number of sentences clustered here |
|
| 366 |
+
| Papers | Unique paper count plus top 3 paper titles |
|
| 367 |
| Approve | Edit: yes/no β keep or reject this topic |
|
| 368 |
| Rename To | Edit: type new name if label is wrong |
|
| 369 |
| Your Reasoning | Edit: why you renamed/rejected |"
|
|
|
|
| 745 |
or '"raw_status_code":503' in msg
|
| 746 |
or '"raw_status_code":502' in msg
|
| 747 |
or '"raw_status_code":504' in msg
|
| 748 |
+
or '"raw_status_code":429' in msg
|
| 749 |
+
or '"status":429' in msg
|
| 750 |
+
or "too many requests" in msg
|
| 751 |
+
or "rate limit" in msg
|
| 752 |
or "service unavailable" in msg
|
| 753 |
)
|
| 754 |
|
| 755 |
|
| 756 |
+
def _is_rate_limit_error(exc: Exception) -> bool:
|
| 757 |
+
msg = str(exc).lower()
|
| 758 |
+
return (
|
| 759 |
+
"rate limit" in msg
|
| 760 |
+
or "too many requests" in msg
|
| 761 |
+
or '"raw_status_code":429' in msg
|
| 762 |
+
or '"status":429' in msg
|
| 763 |
+
or "status code: 429" in msg
|
| 764 |
+
)
|
| 765 |
+
|
| 766 |
+
|
| 767 |
def _invoke_react_with_retries(enriched: str, thread_id: str) -> dict:
|
| 768 |
"""Call the ReAct graph with bounded retries for transient provider failures."""
|
| 769 |
last_exc: Exception | None = None
|
|
|
|
| 780 |
raise
|
| 781 |
last_exc = exc
|
| 782 |
if attempt < PROVIDER_RETRY_ATTEMPTS - 1:
|
| 783 |
+
delay = PROVIDER_RETRY_BASE_DELAY_S * (attempt + 1)
|
| 784 |
+
if _is_rate_limit_error(exc):
|
| 785 |
+
delay = max(delay, PROVIDER_RETRY_RATE_LIMIT_DELAY_S * (attempt + 1))
|
| 786 |
+
time.sleep(min(PROVIDER_RETRY_MAX_DELAY_S, delay))
|
| 787 |
continue
|
| 788 |
raise last_exc
|
| 789 |
|
|
|
|
| 846 |
rdir = _run_dir(run_key)
|
| 847 |
candidates = [
|
| 848 |
str(rdir / "summaries.json"),
|
| 849 |
+
str(rdir / "optimization.json"),
|
| 850 |
str(rdir / "labels.json"),
|
| 851 |
str(rdir / "labels_verification.json"),
|
| 852 |
str(rdir / "themes.json"),
|
|
|
|
| 955 |
"Approve", "Rename To", "Reasoning"
|
| 956 |
"""
|
| 957 |
labels_path = OUTPUT_DIR / state.get("run_key", DEFAULT_RUN_KEY) / "labels.json"
|
| 958 |
+
summaries_path = OUTPUT_DIR / state.get("run_key", DEFAULT_RUN_KEY) / "summaries.json"
|
| 959 |
+
summaries = _load_json(summaries_path) if summaries_path.exists() else []
|
| 960 |
+
summary_by_id = {
|
| 961 |
+
int(item.get("cluster_id", -1)): item
|
| 962 |
+
for item in summaries
|
| 963 |
+
if isinstance(item, dict)
|
| 964 |
+
}
|
| 965 |
|
| 966 |
def _reasoning_cell(row: dict) -> str:
|
| 967 |
+
return str(
|
| 968 |
+
row.get("adjudicated_reasoning")
|
| 969 |
+
or row.get("mistral_reasoning")
|
| 970 |
+
or row.get("reasoning", "")
|
| 971 |
+
).strip()
|
| 972 |
+
|
| 973 |
+
def _papers_cell(row: dict) -> str:
|
| 974 |
+
cid = int(row.get("cluster_id", row.get("#", -1)) or -1)
|
| 975 |
+
summary = summary_by_id.get(cid, {})
|
| 976 |
+
count = row.get("paper_count")
|
| 977 |
+
if count is None:
|
| 978 |
+
count = summary.get("paper_count")
|
| 979 |
+
top_papers = row.get("top_papers") or summary.get("top_papers", [])
|
| 980 |
+
if isinstance(top_papers, list) and top_papers:
|
| 981 |
+
titles = []
|
| 982 |
+
for entry in top_papers[:3]:
|
| 983 |
+
if isinstance(entry, dict):
|
| 984 |
+
title = str(
|
| 985 |
+
entry.get("paper_title")
|
| 986 |
+
or entry.get("title")
|
| 987 |
+
or ""
|
| 988 |
+
).strip()
|
| 989 |
+
paper_count = entry.get("count")
|
| 990 |
+
if title:
|
| 991 |
+
titles.append(
|
| 992 |
+
f"{title} ({paper_count})"
|
| 993 |
+
if paper_count
|
| 994 |
+
else title
|
| 995 |
+
)
|
| 996 |
+
else:
|
| 997 |
+
titles.append(str(entry))
|
| 998 |
+
title_str = "; ".join(filter(None, titles))
|
| 999 |
+
if count:
|
| 1000 |
+
return f"{count} | {title_str}" if title_str else str(count)
|
| 1001 |
+
return title_str
|
| 1002 |
+
|
| 1003 |
+
return str(count) if count else ""
|
| 1004 |
+
|
| 1005 |
+
def _label_value(row: dict) -> str:
|
| 1006 |
+
return str(
|
| 1007 |
+
row.get("adjudicated_label")
|
| 1008 |
+
or row.get("mistral_label")
|
| 1009 |
+
or row.get("label")
|
| 1010 |
+
or ""
|
| 1011 |
+
).strip()
|
| 1012 |
|
| 1013 |
return (
|
| 1014 |
{
|
|
|
|
| 1016 |
"review_df": list(map(
|
| 1017 |
lambda r: {
|
| 1018 |
"#": r.get("cluster_id", 0),
|
| 1019 |
+
"Topic Label": _label_value(r),
|
| 1020 |
"Top Evidence":r["evidence"][0] if r.get("evidence") else "",
|
| 1021 |
"Sentences": r.get("size", 0),
|
| 1022 |
+
"Papers": _papers_cell(r),
|
| 1023 |
"Approve": False,
|
| 1024 |
+
"Rename To": _label_value(r),
|
| 1025 |
"Reasoning": _reasoning_cell(r),
|
| 1026 |
},
|
| 1027 |
_load_json(labels_path),
|
|
|
|
| 1101 |
|
| 1102 |
shown = rows[:VERIFY_CHAT_MAX_ROWS]
|
| 1103 |
header = [
|
| 1104 |
+
"| # | Mistral Label | Groq-Ollama Label | Groq-GPT Label | Best Label |",
|
| 1105 |
+
"|---|---|---|---|---|",
|
| 1106 |
]
|
| 1107 |
lines = list(map(
|
| 1108 |
lambda r: (
|
| 1109 |
f"| {int(r.get('cluster_id', 0))} "
|
| 1110 |
f"| {_sanitize_markdown_cell(r.get('mistral_label') or r.get('label', ''))} "
|
| 1111 |
f"| {_sanitize_markdown_cell(r.get('groq_ollama_label') or r.get('groq_label', ''))} "
|
| 1112 |
+
f"| {_sanitize_markdown_cell(r.get('groq_gpt_label', ''))} "
|
| 1113 |
+
f"| {_sanitize_markdown_cell(r.get('adjudicated_label', ''))} |"
|
| 1114 |
),
|
| 1115 |
shown,
|
| 1116 |
))
|
|
|
|
| 1193 |
reply = (
|
| 1194 |
"VERIFY complete. Groq-Ollama and Groq-GPT topic labeling has been added for Phase 2 topics.\n\n"
|
| 1195 |
f"Verified topics: {verified_count}/{labelled_count}\n"
|
| 1196 |
+
"Mistral vs Groq-Ollama vs Groq-GPT comparison (plus adjudicated best label) is shown below in chat.\n\n"
|
| 1197 |
f"{report}\n\n"
|
| 1198 |
"Compare labels, edit Rename To/Approve, then click Submit Review to continue.\n\n"
|
| 1199 |
"[STOP GATE 1 β AWAITING REVIEW TABLE SUBMISSION]"
|