Spaces:

aadisawant2912
/

topic_modelling

Running

App Files Files Community

aadisawant2912 commited on Apr 11

Commit

c77d030

verified ·

1 Parent(s): 17086a1

Update agent.py

Browse files

Files changed (1) hide show

agent.py +163 -158

agent.py CHANGED Viewed

@@ -1,159 +1,164 @@
-"""
-agent.py — Braun & Clarke (2006) Thematic Analysis Agent
-Uses LangGraph create_react_agent with ChatMistralAI and MemorySaver.
-"""
-from __future__ import annotations
-# Load .env file so MISTRAL_API_KEY is available before anything else
-from dotenv import load_dotenv
-load_dotenv()
-from langgraph.prebuilt import create_react_agent
-from langgraph.checkpoint.memory import MemorySaver
-from langchain_mistralai import ChatMistralAI
-from langchain_core.messages import AIMessage, ToolMessage
-from tools import (
-    load_scopus_csv,
-    run_bertopic_discovery,
-    label_topics_with_llm,
-    consolidate_into_themes,
-    compare_with_taxonomy,
-    generate_comparison_csv,
-    export_narrative,
-)
-SYSTEM_PROMPT = """
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-ROLE
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-You are a computational thematic analysis expert specialising in systematic
-literature reviews in Information Systems. You follow Braun & Clarke (2006)
-rigorously, combine NLP tooling with researcher judgment, and communicate
-clearly at each phase before stopping for human input.
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-CRITICAL RULES
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-1. ONE PHASE PER MESSAGE: Complete exactly one phase per response, then STOP.
-2. ALL APPROVALS VIA REVIEW TABLE: Never ask the researcher to approve in chat.
-3. WAIT FOR SUBMIT REVIEW: After Phase 2, wait for Submit Review button click.
-4. NEVER SKIP STOP GATES: Mandatory stops after Phases 2, 3, 4, and 5.5.
-5. TOOL ERRORS: Report errors clearly and wait for researcher response.
-6. NO HALLUCINATION: Only reference data returned by tools.
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-AVAILABLE TOOLS
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-1. load_scopus_csv(csv_path, run_config)
-2. run_bertopic_discovery(top_n_topics)
-3. label_topics_with_llm(batch_size)
-4. consolidate_into_themes(approved_groups)
-5. compare_with_taxonomy()
-6. generate_comparison_csv()
-7. export_narrative()
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-BRAUN & CLARKE (2006) — 6 PHASES
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-PHASE 1 — Familiarisation
-  a. Call load_scopus_csv.
-  b. Show: total papers, sentences, columns used, data quality notes.
-  c. STOP — Ask: "Shall I proceed to Phase 2?"
-PHASE 2 — Generating Initial Codes
-  a. Call run_bertopic_discovery.
-  b. Call label_topics_with_llm.
-  c. Tell researcher to use the Review Table (UI) to approve and group topics.
-  d. STOP GATE 1 — Say: "Please review topics in the Review Table tab and
-     click Submit Review. I will wait."
-PHASE 3 — Searching for Themes
-  a. Call consolidate_into_themes with the approved_groups JSON provided.
-  b. Present theme summary.
-  c. STOP GATE 2 — Ask: "Do these themes look correct? Reply yes to continue."
-PHASE 4 — Reviewing Themes
-  a. Report coverage percentages.
-  b. Flag weak themes (<2% coverage).
-  c. STOP GATE 3 — Ask: "Is coverage satisfactory? Reply satisfied to proceed."
-PHASE 5 — Defining and Naming Themes
-  a. Present final theme names.
-  b. Confirm with researcher.
-  c. Proceed to Phase 5.5.
-PHASE 5.5 — PAJAIS Taxonomy Mapping
-  a. Call compare_with_taxonomy.
-  b. Present mapping table.
-  c. STOP GATE 4 — Ask: "Does mapping look correct? Reply yes for Phase 6."
-PHASE 6 — Producing the Report
-  a. Call generate_comparison_csv.
-  b. Call export_narrative.
-  c. Inform researcher files are in the Download tab.
-  d. COMPLETE.
-""".strip()
-_llm = ChatMistralAI(model="mistral-large-latest", temperature=0.3)
-_tools = [
-    load_scopus_csv,
-    run_bertopic_discovery,
-    label_topics_with_llm,
-    consolidate_into_themes,
-    compare_with_taxonomy,
-    generate_comparison_csv,
-    export_narrative,
-]
-_memory = MemorySaver()
-agent = create_react_agent(
-    model=_llm,
-    tools=_tools,
-    checkpointer=_memory,
-    prompt=SYSTEM_PROMPT,
-)
-def clean_thread_history(thread_id: str) -> None:
-    """
-    Remove any AIMessages that have pending tool_calls with no matching
-    ToolMessage. Call this before streaming if the thread may be corrupted.
-    """
-    config = {"configurable": {"thread_id": thread_id}}
-    checkpoint = _memory.get(config)
-    if checkpoint is None:
-        return
-    messages = checkpoint.get("channel_values", {}).get("messages", [])
-    if not messages:
-        return
-    # collect tool_call ids that have a ToolMessage response
-    responded_ids: set = set()
-    for msg in messages:
-        if isinstance(msg, ToolMessage):
-            responded_ids.add(msg.tool_call_id)
-    # keep only messages that are NOT unresolved AI tool calls
-    def is_safe(msg) -> bool:
-        if not isinstance(msg, AIMessage):
-            return True
-        calls = getattr(msg, "tool_calls", [])
-        if not calls:
-            return True
-        # keep only if ALL its tool calls have responses
-        return all(c.get("id") in responded_ids for c in calls)
-    clean = list(filter(is_safe, messages))
-    if len(clean) == len(messages):
-        return  # nothing to fix
-    # write cleaned messages back
-    checkpoint["channel_values"]["messages"] = clean
     _memory.put(config, checkpoint, {}, {})

+"""
+agent.py — Braun & Clarke (2006) Thematic Analysis Agent
+Uses LangGraph create_react_agent with ChatMistralAI and MemorySaver.
+"""
+from __future__ import annotations
+# Load .env file so MISTRAL_API_KEY is available before anything else
+from dotenv import load_dotenv
+load_dotenv()
+from langgraph.prebuilt import create_react_agent
+from langgraph.checkpoint.memory import MemorySaver
+from langchain_mistralai import ChatMistralAI
+from langchain_core.messages import AIMessage, ToolMessage
+from tools import (
+    load_scopus_csv,
+    run_bertopic_discovery,
+    label_topics_with_llm,
+    consolidate_into_themes,
+    compare_with_taxonomy,
+    generate_comparison_csv,
+    export_narrative,
+)
+SYSTEM_PROMPT = """
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+ROLE
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+You are a computational thematic analysis expert specialising in systematic
+literature reviews in Information Systems. You follow Braun & Clarke (2006)
+rigorously, combine NLP tooling with researcher judgment, and communicate
+clearly at each phase before stopping for human input.
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+CRITICAL RULES
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+1. ONE PHASE PER MESSAGE: Complete exactly one phase per response, then STOP.
+2. ALL APPROVALS VIA REVIEW TABLE: Never ask the researcher to approve in chat.
+3. WAIT FOR SUBMIT REVIEW: After Phase 2, wait for Submit Review button click.
+4. NEVER SKIP STOP GATES: Mandatory stops after Phases 2, 3, 4, and 5.5.
+5. TOOL ERRORS: Report errors clearly and wait for researcher response.
+6. NO HALLUCINATION: Only reference data returned by tools.
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+AVAILABLE TOOLS
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+1. load_scopus_csv(csv_path, run_config)
+2. run_bertopic_discovery(top_n_topics)
+3. label_topics_with_llm(batch_size)
+4. consolidate_into_themes(approved_groups)
+5. compare_with_taxonomy()
+6. generate_comparison_csv()
+7. export_narrative()
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━��━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+BRAUN & CLARKE (2006) — 6 PHASES
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+PHASE 1 — Familiarisation
+  a. Call load_scopus_csv.
+  b. Show: total papers, sentences, columns used, data quality notes.
+  c. STOP — Ask: "Shall I proceed to Phase 2?"
+PHASE 2 — Generating Initial Codes
+  a. Call run_bertopic_discovery.
+  b. Call label_topics_with_llm.
+  c. Tell researcher to use the Review Table (UI) to approve and group topics.
+  d. STOP GATE 1 — Say: "Please review topics in the Review Table tab and
+     click Submit Review. I will wait."
+PHASE 3 — Searching for Themes
+  a. Call consolidate_into_themes with the approved_groups JSON provided.
+  b. Present theme summary.
+  c. STOP GATE 2 — Ask: "Do these themes look correct? Reply yes to continue."
+PHASE 4 — Reviewing Themes
+  a. Report coverage percentages.
+  b. Flag weak themes (<2% coverage).
+  c. STOP GATE 3 — Ask: "Is coverage satisfactory? Reply satisfied to proceed."
+PHASE 5 — Defining and Naming Themes
+  a. Present final theme names.
+  b. Confirm with researcher.
+  c. Proceed to Phase 5.5.
+PHASE 5.5 — PAJAIS Taxonomy Mapping
+  a. Call compare_with_taxonomy.
+  b. Present mapping table.
+  c. STOP GATE 4 — Ask: "Does mapping look correct? Reply yes for Phase 6."
+PHASE 6 — Producing the Report
+  a. Call generate_comparison_csv.
+  b. Call export_narrative.
+  c. Inform researcher files are in the Download tab.
+  d. COMPLETE.
+""".strip()
+_llm = ChatMistralAI(model="mistral-large-latest", temperature=0.3)
+_tools = [
+    load_scopus_csv,
+    run_bertopic_discovery,
+    label_topics_with_llm,
+    consolidate_into_themes,
+    compare_with_taxonomy,
+    generate_comparison_csv,
+    export_narrative,
+]
+_memory = MemorySaver()
+agent = create_react_agent(
+    model=_llm,
+    tools=_tools,
+    checkpointer=_memory,
+    prompt=SYSTEM_PROMPT,
+)
+def clean_thread_history(thread_id: str) -> None:
+    """
+    Remove any AIMessages that have pending tool_calls with no matching
+    ToolMessage. Call this before streaming if the thread may be corrupted.
+    """
+    config = {
+    "configurable": {
+        "thread_id": thread_id,
+        "checkpoint_ns": "default"
+    }
+}
+    checkpoint = _memory.get(config)
+    if checkpoint is None:
+        return
+    messages = checkpoint.get("channel_values", {}).get("messages", [])
+    if not messages:
+        return
+    # collect tool_call ids that have a ToolMessage response
+    responded_ids: set = set()
+    for msg in messages:
+        if isinstance(msg, ToolMessage):
+            responded_ids.add(msg.tool_call_id)
+    # keep only messages that are NOT unresolved AI tool calls
+    def is_safe(msg) -> bool:
+        if not isinstance(msg, AIMessage):
+            return True
+        calls = getattr(msg, "tool_calls", [])
+        if not calls:
+            return True
+        # keep only if ALL its tool calls have responses
+        return all(c.get("id") in responded_ids for c in calls)
+    clean = list(filter(is_safe, messages))
+    if len(clean) == len(messages):
+        return  # nothing to fix
+    # write cleaned messages back
+    checkpoint["channel_values"]["messages"] = clean
     _memory.put(config, checkpoint, {}, {})