Spjimr / workbench_thematic_analysis /node_02_initial_codes.py
Viske's picture
Fix deployment: defer agent load, add missing workbench packages and credentials
9ebfd41
# ============================================================================
# node_02_initial_codes.py — Phase 2 (REAL, compliant thin wrapper)
# ============================================================================
#
# COMPLIANCE
# ----------
# Thin orchestrator. Reads state, calls the code_sentences tool, shapes
# output rows, writes result back. All domain logic (prompts, LLM loop,
# code cleanup) lives in workbench_thematic_analysis/tools/.
# ============================================================================
from collections import Counter
from training_data import TRAINING_EXAMPLES
from .tools import code_sentences
def phase2_initial_codes_node(state):
max_n = state["max_sentences_to_code"]
examples = TRAINING_EXAMPLES[:max_n]
sentences = [e["sentence"] for e in examples]
true_labels = [e["label"] for e in examples]
codes = code_sentences(
sentences=sentences,
llm_provider=state["llm_provider"],
llm_key=state["llm_key"],
)
coded_rows = [
{
"idx": i,
"sentence": sentences[i],
"true_label": true_labels[i],
"llm_code": codes[i],
}
for i in range(len(sentences))
]
code_counts = dict(Counter(codes))
return {
"phase2_initial_codes": {
"status": "real",
"n_sentences_coded": len(coded_rows),
"n_unique_codes": len(code_counts),
"coded_rows": coded_rows,
"code_frequency": code_counts,
},
"steps": [{
"step": state.get("iteration", 0),
"node": "phase2_initial_codes",
"action": "coded sentences (one LLM call per sentence)",
"detail": f"{len(coded_rows)} sentences, {len(code_counts)} unique codes",
}],
}