# ============================================================================ # node_02_initial_codes.py — Phase 2 (REAL, compliant thin wrapper) # ============================================================================ # # COMPLIANCE # ---------- # Thin orchestrator. Reads state, calls the code_sentences tool, shapes # output rows, writes result back. All domain logic (prompts, LLM loop, # code cleanup) lives in workbench_thematic_analysis/tools/. # ============================================================================ from collections import Counter from training_data import TRAINING_EXAMPLES from .tools import code_sentences def phase2_initial_codes_node(state): max_n = state["max_sentences_to_code"] examples = TRAINING_EXAMPLES[:max_n] sentences = [e["sentence"] for e in examples] true_labels = [e["label"] for e in examples] codes = code_sentences( sentences=sentences, llm_provider=state["llm_provider"], llm_key=state["llm_key"], ) coded_rows = [ { "idx": i, "sentence": sentences[i], "true_label": true_labels[i], "llm_code": codes[i], } for i in range(len(sentences)) ] code_counts = dict(Counter(codes)) return { "phase2_initial_codes": { "status": "real", "n_sentences_coded": len(coded_rows), "n_unique_codes": len(code_counts), "coded_rows": coded_rows, "code_frequency": code_counts, }, "steps": [{ "step": state.get("iteration", 0), "node": "phase2_initial_codes", "action": "coded sentences (one LLM call per sentence)", "detail": f"{len(coded_rows)} sentences, {len(code_counts)} unique codes", }], }