atharvthite05 commited on
Commit
6cd378e
Β·
verified Β·
1 Parent(s): 60bc73f

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +86 -15
agent.py CHANGED
@@ -92,8 +92,10 @@ DEFAULT_RUN_KEY: str = "abstract"
92
  THREAD_PREFIX: str = "TA-"
93
  MAX_USER_MESSAGE_CHARS: int = 4000
94
  VERIFY_CHAT_MAX_ROWS: int = 20
95
- PROVIDER_RETRY_ATTEMPTS: int = 3
96
- PROVIDER_RETRY_BASE_DELAY_S: float = 1.5
 
 
97
 
98
  # FIX ISSUE 4 β€” surface missing API key immediately at import time
99
  _KEY_MISSING = not bool(MISTRAL_API_KEY)
@@ -313,6 +315,7 @@ After researcher confirms:
313
  β†’ Filters publisher boilerplate (copyright, license text)
314
  β†’ Embeds with SPECTER2 (L2-normalized)
315
  β†’ UMAP reduces dimensions for HDBSCAN clustering
 
316
  β†’ Finds 5 nearest centroid sentences per topic
317
  β†’ Saves Plotly HTML visualizations
318
  β†’ Saves embeddings + summaries checkpoints
@@ -360,7 +363,7 @@ After researcher confirms:
360
  | Research Area | General research area (NOT PACIS β€” that comes later in Phase 5.5) |
361
  | Confidence | How well the 5 sentences match the label |
362
  | Sentences | Number of sentences clustered here |
363
- | Papers | Number of unique papers contributing sentences |
364
  | Approve | Edit: yes/no β€” keep or reject this topic |
365
  | Rename To | Edit: type new name if label is wrong |
366
  | Your Reasoning | Edit: why you renamed/rejected |"
@@ -742,10 +745,25 @@ def _is_transient_provider_error(exc: Exception) -> bool:
742
  or '"raw_status_code":503' in msg
743
  or '"raw_status_code":502' in msg
744
  or '"raw_status_code":504' in msg
 
 
 
 
745
  or "service unavailable" in msg
746
  )
747
 
748
 
 
 
 
 
 
 
 
 
 
 
 
749
  def _invoke_react_with_retries(enriched: str, thread_id: str) -> dict:
750
  """Call the ReAct graph with bounded retries for transient provider failures."""
751
  last_exc: Exception | None = None
@@ -762,7 +780,10 @@ def _invoke_react_with_retries(enriched: str, thread_id: str) -> dict:
762
  raise
763
  last_exc = exc
764
  if attempt < PROVIDER_RETRY_ATTEMPTS - 1:
765
- time.sleep(PROVIDER_RETRY_BASE_DELAY_S * (attempt + 1))
 
 
 
766
  continue
767
  raise last_exc
768
 
@@ -825,6 +846,7 @@ def _collect_output_files(state: dict) -> list[str]:
825
  rdir = _run_dir(run_key)
826
  candidates = [
827
  str(rdir / "summaries.json"),
 
828
  str(rdir / "labels.json"),
829
  str(rdir / "labels_verification.json"),
830
  str(rdir / "themes.json"),
@@ -933,12 +955,60 @@ def _populate_review_df(state: dict) -> dict:
933
  "Approve", "Rename To", "Reasoning"
934
  """
935
  labels_path = OUTPUT_DIR / state.get("run_key", DEFAULT_RUN_KEY) / "labels.json"
 
 
 
 
 
 
 
936
 
937
  def _reasoning_cell(row: dict) -> str:
938
- return str(
939
- row.get("mistral_reasoning")
940
- or row.get("reasoning", "")
941
- ).strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
942
 
943
  return (
944
  {
@@ -946,12 +1016,12 @@ def _populate_review_df(state: dict) -> dict:
946
  "review_df": list(map(
947
  lambda r: {
948
  "#": r.get("cluster_id", 0),
949
- "Topic Label": r.get("label") or r.get("mistral_label", ""),
950
  "Top Evidence":r["evidence"][0] if r.get("evidence") else "",
951
  "Sentences": r.get("size", 0),
952
- "Papers": "",
953
  "Approve": False,
954
- "Rename To": r.get("label") or r.get("mistral_label", ""),
955
  "Reasoning": _reasoning_cell(r),
956
  },
957
  _load_json(labels_path),
@@ -1031,15 +1101,16 @@ def _build_verify_chat_report(rows: list[dict]) -> str:
1031
 
1032
  shown = rows[:VERIFY_CHAT_MAX_ROWS]
1033
  header = [
1034
- "| # | Mistral Label | Groq-Ollama Label | Groq-GPT Label |",
1035
- "|---|---|---|---|",
1036
  ]
1037
  lines = list(map(
1038
  lambda r: (
1039
  f"| {int(r.get('cluster_id', 0))} "
1040
  f"| {_sanitize_markdown_cell(r.get('mistral_label') or r.get('label', ''))} "
1041
  f"| {_sanitize_markdown_cell(r.get('groq_ollama_label') or r.get('groq_label', ''))} "
1042
- f"| {_sanitize_markdown_cell(r.get('groq_gpt_label', ''))} |"
 
1043
  ),
1044
  shown,
1045
  ))
@@ -1122,7 +1193,7 @@ def _handle_verify_command(state: dict) -> tuple[str, dict]:
1122
  reply = (
1123
  "VERIFY complete. Groq-Ollama and Groq-GPT topic labeling has been added for Phase 2 topics.\n\n"
1124
  f"Verified topics: {verified_count}/{labelled_count}\n"
1125
- "Mistral vs Groq-Ollama vs Groq-GPT comparison is shown below in chat.\n\n"
1126
  f"{report}\n\n"
1127
  "Compare labels, edit Rename To/Approve, then click Submit Review to continue.\n\n"
1128
  "[STOP GATE 1 β€” AWAITING REVIEW TABLE SUBMISSION]"
 
92
  THREAD_PREFIX: str = "TA-"
93
  MAX_USER_MESSAGE_CHARS: int = 4000
94
  VERIFY_CHAT_MAX_ROWS: int = 20
95
+ PROVIDER_RETRY_ATTEMPTS: int = 4
96
+ PROVIDER_RETRY_BASE_DELAY_S: float = 2.0
97
+ PROVIDER_RETRY_RATE_LIMIT_DELAY_S: float = 6.0
98
+ PROVIDER_RETRY_MAX_DELAY_S: float = 18.0
99
 
100
  # FIX ISSUE 4 β€” surface missing API key immediately at import time
101
  _KEY_MISSING = not bool(MISTRAL_API_KEY)
 
315
  β†’ Filters publisher boilerplate (copyright, license text)
316
  β†’ Embeds with SPECTER2 (L2-normalized)
317
  β†’ UMAP reduces dimensions for HDBSCAN clustering
318
+ β†’ Auto-optimizes HDBSCAN parameters after the first run (optimization.json)
319
  β†’ Finds 5 nearest centroid sentences per topic
320
  β†’ Saves Plotly HTML visualizations
321
  β†’ Saves embeddings + summaries checkpoints
 
363
  | Research Area | General research area (NOT PACIS β€” that comes later in Phase 5.5) |
364
  | Confidence | How well the 5 sentences match the label |
365
  | Sentences | Number of sentences clustered here |
366
+ | Papers | Unique paper count plus top 3 paper titles |
367
  | Approve | Edit: yes/no β€” keep or reject this topic |
368
  | Rename To | Edit: type new name if label is wrong |
369
  | Your Reasoning | Edit: why you renamed/rejected |"
 
745
  or '"raw_status_code":503' in msg
746
  or '"raw_status_code":502' in msg
747
  or '"raw_status_code":504' in msg
748
+ or '"raw_status_code":429' in msg
749
+ or '"status":429' in msg
750
+ or "too many requests" in msg
751
+ or "rate limit" in msg
752
  or "service unavailable" in msg
753
  )
754
 
755
 
756
+ def _is_rate_limit_error(exc: Exception) -> bool:
757
+ msg = str(exc).lower()
758
+ return (
759
+ "rate limit" in msg
760
+ or "too many requests" in msg
761
+ or '"raw_status_code":429' in msg
762
+ or '"status":429' in msg
763
+ or "status code: 429" in msg
764
+ )
765
+
766
+
767
  def _invoke_react_with_retries(enriched: str, thread_id: str) -> dict:
768
  """Call the ReAct graph with bounded retries for transient provider failures."""
769
  last_exc: Exception | None = None
 
780
  raise
781
  last_exc = exc
782
  if attempt < PROVIDER_RETRY_ATTEMPTS - 1:
783
+ delay = PROVIDER_RETRY_BASE_DELAY_S * (attempt + 1)
784
+ if _is_rate_limit_error(exc):
785
+ delay = max(delay, PROVIDER_RETRY_RATE_LIMIT_DELAY_S * (attempt + 1))
786
+ time.sleep(min(PROVIDER_RETRY_MAX_DELAY_S, delay))
787
  continue
788
  raise last_exc
789
 
 
846
  rdir = _run_dir(run_key)
847
  candidates = [
848
  str(rdir / "summaries.json"),
849
+ str(rdir / "optimization.json"),
850
  str(rdir / "labels.json"),
851
  str(rdir / "labels_verification.json"),
852
  str(rdir / "themes.json"),
 
955
  "Approve", "Rename To", "Reasoning"
956
  """
957
  labels_path = OUTPUT_DIR / state.get("run_key", DEFAULT_RUN_KEY) / "labels.json"
958
+ summaries_path = OUTPUT_DIR / state.get("run_key", DEFAULT_RUN_KEY) / "summaries.json"
959
+ summaries = _load_json(summaries_path) if summaries_path.exists() else []
960
+ summary_by_id = {
961
+ int(item.get("cluster_id", -1)): item
962
+ for item in summaries
963
+ if isinstance(item, dict)
964
+ }
965
 
966
  def _reasoning_cell(row: dict) -> str:
967
+ return str(
968
+ row.get("adjudicated_reasoning")
969
+ or row.get("mistral_reasoning")
970
+ or row.get("reasoning", "")
971
+ ).strip()
972
+
973
+ def _papers_cell(row: dict) -> str:
974
+ cid = int(row.get("cluster_id", row.get("#", -1)) or -1)
975
+ summary = summary_by_id.get(cid, {})
976
+ count = row.get("paper_count")
977
+ if count is None:
978
+ count = summary.get("paper_count")
979
+ top_papers = row.get("top_papers") or summary.get("top_papers", [])
980
+ if isinstance(top_papers, list) and top_papers:
981
+ titles = []
982
+ for entry in top_papers[:3]:
983
+ if isinstance(entry, dict):
984
+ title = str(
985
+ entry.get("paper_title")
986
+ or entry.get("title")
987
+ or ""
988
+ ).strip()
989
+ paper_count = entry.get("count")
990
+ if title:
991
+ titles.append(
992
+ f"{title} ({paper_count})"
993
+ if paper_count
994
+ else title
995
+ )
996
+ else:
997
+ titles.append(str(entry))
998
+ title_str = "; ".join(filter(None, titles))
999
+ if count:
1000
+ return f"{count} | {title_str}" if title_str else str(count)
1001
+ return title_str
1002
+
1003
+ return str(count) if count else ""
1004
+
1005
+ def _label_value(row: dict) -> str:
1006
+ return str(
1007
+ row.get("adjudicated_label")
1008
+ or row.get("mistral_label")
1009
+ or row.get("label")
1010
+ or ""
1011
+ ).strip()
1012
 
1013
  return (
1014
  {
 
1016
  "review_df": list(map(
1017
  lambda r: {
1018
  "#": r.get("cluster_id", 0),
1019
+ "Topic Label": _label_value(r),
1020
  "Top Evidence":r["evidence"][0] if r.get("evidence") else "",
1021
  "Sentences": r.get("size", 0),
1022
+ "Papers": _papers_cell(r),
1023
  "Approve": False,
1024
+ "Rename To": _label_value(r),
1025
  "Reasoning": _reasoning_cell(r),
1026
  },
1027
  _load_json(labels_path),
 
1101
 
1102
  shown = rows[:VERIFY_CHAT_MAX_ROWS]
1103
  header = [
1104
+ "| # | Mistral Label | Groq-Ollama Label | Groq-GPT Label | Best Label |",
1105
+ "|---|---|---|---|---|",
1106
  ]
1107
  lines = list(map(
1108
  lambda r: (
1109
  f"| {int(r.get('cluster_id', 0))} "
1110
  f"| {_sanitize_markdown_cell(r.get('mistral_label') or r.get('label', ''))} "
1111
  f"| {_sanitize_markdown_cell(r.get('groq_ollama_label') or r.get('groq_label', ''))} "
1112
+ f"| {_sanitize_markdown_cell(r.get('groq_gpt_label', ''))} "
1113
+ f"| {_sanitize_markdown_cell(r.get('adjudicated_label', ''))} |"
1114
  ),
1115
  shown,
1116
  ))
 
1193
  reply = (
1194
  "VERIFY complete. Groq-Ollama and Groq-GPT topic labeling has been added for Phase 2 topics.\n\n"
1195
  f"Verified topics: {verified_count}/{labelled_count}\n"
1196
+ "Mistral vs Groq-Ollama vs Groq-GPT comparison (plus adjudicated best label) is shown below in chat.\n\n"
1197
  f"{report}\n\n"
1198
  "Compare labels, edit Rename To/Approve, then click Submit Review to continue.\n\n"
1199
  "[STOP GATE 1 β€” AWAITING REVIEW TABLE SUBMISSION]"