Spaces:

Viske
/

Spjimr

Running

Fix deployment: defer agent load, add missing workbench packages and credentials

9ebfd41 4 days ago

1.83 kB

	# ============================================================================
	# node_02_initial_codes.py — Phase 2 (REAL, compliant thin wrapper)
	# ============================================================================
	#
	# COMPLIANCE
	# ----------
	# Thin orchestrator. Reads state, calls the code_sentences tool, shapes
	# output rows, writes result back. All domain logic (prompts, LLM loop,
	# code cleanup) lives in workbench_thematic_analysis/tools/.
	# ============================================================================

	from collections import Counter
	from training_data import TRAINING_EXAMPLES
	from .tools import code_sentences


	def phase2_initial_codes_node(state):
	max_n = state["max_sentences_to_code"]
	examples = TRAINING_EXAMPLES[:max_n]
	sentences = [e["sentence"] for e in examples]
	true_labels = [e["label"] for e in examples]

	codes = code_sentences(
	sentences=sentences,
	llm_provider=state["llm_provider"],
	llm_key=state["llm_key"],
	)

	coded_rows = [
	{
	"idx": i,
	"sentence": sentences[i],
	"true_label": true_labels[i],
	"llm_code": codes[i],
	}
	for i in range(len(sentences))
	]

	code_counts = dict(Counter(codes))

	return {
	"phase2_initial_codes": {
	"status": "real",
	"n_sentences_coded": len(coded_rows),
	"n_unique_codes": len(code_counts),
	"coded_rows": coded_rows,
	"code_frequency": code_counts,
	},
	"steps": [{
	"step": state.get("iteration", 0),
	"node": "phase2_initial_codes",
	"action": "coded sentences (one LLM call per sentence)",
	"detail": f"{len(coded_rows)} sentences, {len(code_counts)} unique codes",
	}],
	}