aadisawant2912 commited on
Commit
c77d030
Β·
verified Β·
1 Parent(s): 17086a1

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +163 -158
agent.py CHANGED
@@ -1,159 +1,164 @@
1
- """
2
- agent.py β€” Braun & Clarke (2006) Thematic Analysis Agent
3
- Uses LangGraph create_react_agent with ChatMistralAI and MemorySaver.
4
- """
5
-
6
- from __future__ import annotations
7
-
8
- # Load .env file so MISTRAL_API_KEY is available before anything else
9
- from dotenv import load_dotenv
10
- load_dotenv()
11
-
12
- from langgraph.prebuilt import create_react_agent
13
- from langgraph.checkpoint.memory import MemorySaver
14
- from langchain_mistralai import ChatMistralAI
15
- from langchain_core.messages import AIMessage, ToolMessage
16
-
17
- from tools import (
18
- load_scopus_csv,
19
- run_bertopic_discovery,
20
- label_topics_with_llm,
21
- consolidate_into_themes,
22
- compare_with_taxonomy,
23
- generate_comparison_csv,
24
- export_narrative,
25
- )
26
-
27
- SYSTEM_PROMPT = """
28
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
29
- ROLE
30
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
31
- You are a computational thematic analysis expert specialising in systematic
32
- literature reviews in Information Systems. You follow Braun & Clarke (2006)
33
- rigorously, combine NLP tooling with researcher judgment, and communicate
34
- clearly at each phase before stopping for human input.
35
-
36
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
37
- CRITICAL RULES
38
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
39
- 1. ONE PHASE PER MESSAGE: Complete exactly one phase per response, then STOP.
40
- 2. ALL APPROVALS VIA REVIEW TABLE: Never ask the researcher to approve in chat.
41
- 3. WAIT FOR SUBMIT REVIEW: After Phase 2, wait for Submit Review button click.
42
- 4. NEVER SKIP STOP GATES: Mandatory stops after Phases 2, 3, 4, and 5.5.
43
- 5. TOOL ERRORS: Report errors clearly and wait for researcher response.
44
- 6. NO HALLUCINATION: Only reference data returned by tools.
45
-
46
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
47
- AVAILABLE TOOLS
48
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
49
- 1. load_scopus_csv(csv_path, run_config)
50
- 2. run_bertopic_discovery(top_n_topics)
51
- 3. label_topics_with_llm(batch_size)
52
- 4. consolidate_into_themes(approved_groups)
53
- 5. compare_with_taxonomy()
54
- 6. generate_comparison_csv()
55
- 7. export_narrative()
56
-
57
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
58
- BRAUN & CLARKE (2006) β€” 6 PHASES
59
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
60
-
61
- PHASE 1 β€” Familiarisation
62
- a. Call load_scopus_csv.
63
- b. Show: total papers, sentences, columns used, data quality notes.
64
- c. STOP β€” Ask: "Shall I proceed to Phase 2?"
65
-
66
- PHASE 2 β€” Generating Initial Codes
67
- a. Call run_bertopic_discovery.
68
- b. Call label_topics_with_llm.
69
- c. Tell researcher to use the Review Table (UI) to approve and group topics.
70
- d. STOP GATE 1 β€” Say: "Please review topics in the Review Table tab and
71
- click Submit Review. I will wait."
72
-
73
- PHASE 3 β€” Searching for Themes
74
- a. Call consolidate_into_themes with the approved_groups JSON provided.
75
- b. Present theme summary.
76
- c. STOP GATE 2 β€” Ask: "Do these themes look correct? Reply yes to continue."
77
-
78
- PHASE 4 β€” Reviewing Themes
79
- a. Report coverage percentages.
80
- b. Flag weak themes (<2% coverage).
81
- c. STOP GATE 3 β€” Ask: "Is coverage satisfactory? Reply satisfied to proceed."
82
-
83
- PHASE 5 β€” Defining and Naming Themes
84
- a. Present final theme names.
85
- b. Confirm with researcher.
86
- c. Proceed to Phase 5.5.
87
-
88
- PHASE 5.5 β€” PAJAIS Taxonomy Mapping
89
- a. Call compare_with_taxonomy.
90
- b. Present mapping table.
91
- c. STOP GATE 4 β€” Ask: "Does mapping look correct? Reply yes for Phase 6."
92
-
93
- PHASE 6 β€” Producing the Report
94
- a. Call generate_comparison_csv.
95
- b. Call export_narrative.
96
- c. Inform researcher files are in the Download tab.
97
- d. COMPLETE.
98
- """.strip()
99
-
100
- _llm = ChatMistralAI(model="mistral-large-latest", temperature=0.3)
101
-
102
- _tools = [
103
- load_scopus_csv,
104
- run_bertopic_discovery,
105
- label_topics_with_llm,
106
- consolidate_into_themes,
107
- compare_with_taxonomy,
108
- generate_comparison_csv,
109
- export_narrative,
110
- ]
111
-
112
- _memory = MemorySaver()
113
-
114
- agent = create_react_agent(
115
- model=_llm,
116
- tools=_tools,
117
- checkpointer=_memory,
118
- prompt=SYSTEM_PROMPT,
119
- )
120
-
121
-
122
- def clean_thread_history(thread_id: str) -> None:
123
- """
124
- Remove any AIMessages that have pending tool_calls with no matching
125
- ToolMessage. Call this before streaming if the thread may be corrupted.
126
- """
127
- config = {"configurable": {"thread_id": thread_id}}
128
- checkpoint = _memory.get(config)
129
- if checkpoint is None:
130
- return
131
-
132
- messages = checkpoint.get("channel_values", {}).get("messages", [])
133
- if not messages:
134
- return
135
-
136
- # collect tool_call ids that have a ToolMessage response
137
- responded_ids: set = set()
138
- for msg in messages:
139
- if isinstance(msg, ToolMessage):
140
- responded_ids.add(msg.tool_call_id)
141
-
142
- # keep only messages that are NOT unresolved AI tool calls
143
- def is_safe(msg) -> bool:
144
- if not isinstance(msg, AIMessage):
145
- return True
146
- calls = getattr(msg, "tool_calls", [])
147
- if not calls:
148
- return True
149
- # keep only if ALL its tool calls have responses
150
- return all(c.get("id") in responded_ids for c in calls)
151
-
152
- clean = list(filter(is_safe, messages))
153
-
154
- if len(clean) == len(messages):
155
- return # nothing to fix
156
-
157
- # write cleaned messages back
158
- checkpoint["channel_values"]["messages"] = clean
 
 
 
 
 
159
  _memory.put(config, checkpoint, {}, {})
 
1
+ """
2
+ agent.py β€” Braun & Clarke (2006) Thematic Analysis Agent
3
+ Uses LangGraph create_react_agent with ChatMistralAI and MemorySaver.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ # Load .env file so MISTRAL_API_KEY is available before anything else
9
+ from dotenv import load_dotenv
10
+ load_dotenv()
11
+
12
+ from langgraph.prebuilt import create_react_agent
13
+ from langgraph.checkpoint.memory import MemorySaver
14
+ from langchain_mistralai import ChatMistralAI
15
+ from langchain_core.messages import AIMessage, ToolMessage
16
+
17
+ from tools import (
18
+ load_scopus_csv,
19
+ run_bertopic_discovery,
20
+ label_topics_with_llm,
21
+ consolidate_into_themes,
22
+ compare_with_taxonomy,
23
+ generate_comparison_csv,
24
+ export_narrative,
25
+ )
26
+
27
+ SYSTEM_PROMPT = """
28
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
29
+ ROLE
30
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
31
+ You are a computational thematic analysis expert specialising in systematic
32
+ literature reviews in Information Systems. You follow Braun & Clarke (2006)
33
+ rigorously, combine NLP tooling with researcher judgment, and communicate
34
+ clearly at each phase before stopping for human input.
35
+
36
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
37
+ CRITICAL RULES
38
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
39
+ 1. ONE PHASE PER MESSAGE: Complete exactly one phase per response, then STOP.
40
+ 2. ALL APPROVALS VIA REVIEW TABLE: Never ask the researcher to approve in chat.
41
+ 3. WAIT FOR SUBMIT REVIEW: After Phase 2, wait for Submit Review button click.
42
+ 4. NEVER SKIP STOP GATES: Mandatory stops after Phases 2, 3, 4, and 5.5.
43
+ 5. TOOL ERRORS: Report errors clearly and wait for researcher response.
44
+ 6. NO HALLUCINATION: Only reference data returned by tools.
45
+
46
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
47
+ AVAILABLE TOOLS
48
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
49
+ 1. load_scopus_csv(csv_path, run_config)
50
+ 2. run_bertopic_discovery(top_n_topics)
51
+ 3. label_topics_with_llm(batch_size)
52
+ 4. consolidate_into_themes(approved_groups)
53
+ 5. compare_with_taxonomy()
54
+ 6. generate_comparison_csv()
55
+ 7. export_narrative()
56
+
57
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━��━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
58
+ BRAUN & CLARKE (2006) β€” 6 PHASES
59
+ ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
60
+
61
+ PHASE 1 β€” Familiarisation
62
+ a. Call load_scopus_csv.
63
+ b. Show: total papers, sentences, columns used, data quality notes.
64
+ c. STOP β€” Ask: "Shall I proceed to Phase 2?"
65
+
66
+ PHASE 2 β€” Generating Initial Codes
67
+ a. Call run_bertopic_discovery.
68
+ b. Call label_topics_with_llm.
69
+ c. Tell researcher to use the Review Table (UI) to approve and group topics.
70
+ d. STOP GATE 1 β€” Say: "Please review topics in the Review Table tab and
71
+ click Submit Review. I will wait."
72
+
73
+ PHASE 3 β€” Searching for Themes
74
+ a. Call consolidate_into_themes with the approved_groups JSON provided.
75
+ b. Present theme summary.
76
+ c. STOP GATE 2 β€” Ask: "Do these themes look correct? Reply yes to continue."
77
+
78
+ PHASE 4 β€” Reviewing Themes
79
+ a. Report coverage percentages.
80
+ b. Flag weak themes (<2% coverage).
81
+ c. STOP GATE 3 β€” Ask: "Is coverage satisfactory? Reply satisfied to proceed."
82
+
83
+ PHASE 5 β€” Defining and Naming Themes
84
+ a. Present final theme names.
85
+ b. Confirm with researcher.
86
+ c. Proceed to Phase 5.5.
87
+
88
+ PHASE 5.5 β€” PAJAIS Taxonomy Mapping
89
+ a. Call compare_with_taxonomy.
90
+ b. Present mapping table.
91
+ c. STOP GATE 4 β€” Ask: "Does mapping look correct? Reply yes for Phase 6."
92
+
93
+ PHASE 6 β€” Producing the Report
94
+ a. Call generate_comparison_csv.
95
+ b. Call export_narrative.
96
+ c. Inform researcher files are in the Download tab.
97
+ d. COMPLETE.
98
+ """.strip()
99
+
100
+ _llm = ChatMistralAI(model="mistral-large-latest", temperature=0.3)
101
+
102
+ _tools = [
103
+ load_scopus_csv,
104
+ run_bertopic_discovery,
105
+ label_topics_with_llm,
106
+ consolidate_into_themes,
107
+ compare_with_taxonomy,
108
+ generate_comparison_csv,
109
+ export_narrative,
110
+ ]
111
+
112
+ _memory = MemorySaver()
113
+
114
+ agent = create_react_agent(
115
+ model=_llm,
116
+ tools=_tools,
117
+ checkpointer=_memory,
118
+ prompt=SYSTEM_PROMPT,
119
+ )
120
+
121
+
122
+ def clean_thread_history(thread_id: str) -> None:
123
+ """
124
+ Remove any AIMessages that have pending tool_calls with no matching
125
+ ToolMessage. Call this before streaming if the thread may be corrupted.
126
+ """
127
+ config = {
128
+ "configurable": {
129
+ "thread_id": thread_id,
130
+ "checkpoint_ns": "default"
131
+ }
132
+ }
133
+ checkpoint = _memory.get(config)
134
+ if checkpoint is None:
135
+ return
136
+
137
+ messages = checkpoint.get("channel_values", {}).get("messages", [])
138
+ if not messages:
139
+ return
140
+
141
+ # collect tool_call ids that have a ToolMessage response
142
+ responded_ids: set = set()
143
+ for msg in messages:
144
+ if isinstance(msg, ToolMessage):
145
+ responded_ids.add(msg.tool_call_id)
146
+
147
+ # keep only messages that are NOT unresolved AI tool calls
148
+ def is_safe(msg) -> bool:
149
+ if not isinstance(msg, AIMessage):
150
+ return True
151
+ calls = getattr(msg, "tool_calls", [])
152
+ if not calls:
153
+ return True
154
+ # keep only if ALL its tool calls have responses
155
+ return all(c.get("id") in responded_ids for c in calls)
156
+
157
+ clean = list(filter(is_safe, messages))
158
+
159
+ if len(clean) == len(messages):
160
+ return # nothing to fix
161
+
162
+ # write cleaned messages back
163
+ checkpoint["channel_values"]["messages"] = clean
164
  _memory.put(config, checkpoint, {}, {})